library(tidyverse)
library(arrow)
library(readxl)
library(writexl)
library(fs)
source("load_core_data.R")
source("read_metadata.R")
source("shared.R")
prjs_geostatus <- read_parquet(here::here("data-processed",
                                          "projects-geo-check-groups.parquet"),
                               col_select = c("prj_id", "geostatus"))
places_geostatus <- read_parquet(here::here("data-processed",
                                            "places-geo-check.parquet"))
prj_id_nh <- prjs_geostatus %>% filter(geostatus == "více míst nehierarchicky") %>% 
  pull(prj_id)
plcnh <- places_geostatus %>% filter(prj_id %in% prj_id_nh)
file_nh_resolved_previously <- here::here("data-processed", "dtl-nehier", "all.parquet")
if(file.exists(file_nh_resolved_previously)) {
  dtl_nh_resolved_previously <- read_parquet(file_nh_resolved_previously)
} else {
  dtl_nh_resolved_previously <- tibble(prj_id = character())
}
dtl_nh_checked <- plcnh %>% 
  left_join(dts) %>% 
  inner_join(dtl %>% 
               filter(prj_id %in% prj_id_nh) %>% 
               select(prj_id, prj_radek, typ, value, level) %>% 
               spread(typ, value) %>% 
               select(prj_id, value = id, geo_id = shortid, nazev, level)) %>% 
  rename(geo_id_long = value, geo_nazev = nazev) %>% 
  mutate(vybrat = NA,
         geo_id = ifelse(is.na(geo_id), geo_id_long, geo_id),
         geo_id_long = if_else(level %in% c("obec", "zuj"), geo_id_long, NA_character_),
         resolved = if_else(prj_id %in% dtl_nh_resolved_previously$prj_id,
                            TRUE, FALSE)) %>% 
  bind_rows(dtl_nh_resolved_previously %>% 
              select(-any_of(c("rozpad_duvod", "obec_puvod", 
                               "rozpad_typ", "chunk", "radek")))) %>% 
  group_by(prj_id) %>% 
  mutate(oddelovac = if_else(row_number() == n(), "--------", NA_character_)) %>% 
  ungroup() %>% 
  relocate(op_id, prj_nazev, geo_id, geo_nazev, level, vybrat, 
           oddelovac, prj_anotace, resolved)
## Joining, by = c("op_id", "prj_id")
## Joining, by = c("prj_id", "value", "level")
writexl::write_xlsx(dtl_nh_checked %>% filter(!resolved), 
                    here::here("data-output", "prj_nehierarchicke-vse.xlsx"))
dtl_nh_resolved_all <- read_xlsx(here::here(cnf$input_resolved_xlsx)) %>% 
  mutate(level = factor(level, levels = geolevels, ordered = T), 
         vybrat = as.logical(vybrat)) 
prjs_included_by_default <- dtl_nh_resolved_all %>% 
  group_by(prj_id) %>% 
  filter(all(is.na(vybrat))) %>% distinct(prj_id) %>% pull()
prjs_with_some_dropped <- dtl_nh_resolved_all %>% 
  group_by(prj_id) %>% 
  filter(!all(is.na(vybrat))) %>% distinct(prj_id) %>% pull()
prjs_with_all_dropped <-  dtl_nh_resolved_all %>% 
  group_by(prj_id) %>% 
  filter(all(!vybrat)) %>% distinct(prj_id) %>% pull()
dtl_nh_resolved_true <- dtl_nh_resolved_all %>% 
  group_by(prj_id) %>% 
  # these are included by default - they were not touched by the user
  mutate(all_na = all(is.na(vybrat))) %>% 
  # keep all those untouched and those lines where user said to keep
  filter(all_na | vybrat)

length(unique(dtl_nh_resolved_true$prj_id))
## [1] 119
setdiff(unique(dtl_nh_resolved_true$prj_id), prj_id_nh)
## character(0)
dtl_nh_resolved_obce <- dtl_nh_resolved_true %>% 
  filter(level == "obec")
dtl_nh_resolved_zuj <- dtl_nh_resolved_true %>% filter(level == "zuj")
dtl_nh_resolved_ostatni <- dtl_nh_resolved_true %>% filter(!level %in% c("obec", "zuj"))
rozpadac_na_obce <- read_parquet(here::here("data-processed", "rozpadac-na-obce.parquet"))
dtl_nh_resolved_obce_fin <- dtl_nh_resolved_obce %>% 
  select(prj_id, geo_id, level_orig = level, op_id) %>% 
  mutate(rozpad_duvod = "manuálně vybrána obec",
         obec_puvod = "manuální řešení původně nekonzistentní geolokace",
         level = "obec",
         geo_id_orig = geo_id)
dtl_nh_resolved_ostatni_fin <- dtl_nh_resolved_ostatni %>%
  left_join(rozpadac_na_obce %>% 
              rename(level = rozpad_jak, geo_id = value)) %>% 
  select(prj_id, geo_id_orig = geo_id, level, geo_id = obec, op_id) %>% 
  mutate(level = "obec",
         rozpad_duvod = "mechanický rozpad z manuálně vybraného ORP, okresu nebo kraje",
         obec_puvod = "")
## Joining, by = c("geo_id", "level")
dtl_nh_resolved_zuj_fin <- dtl_nh_resolved_zuj %>% 
  ungroup() %>% 
  mutate(level_orig = level,
         geo_id_orig = geo_id,
         level = if_else(geo_id %in% ids$obec, 
                        "obec",
                        as.character(level_orig)) %>% 
           factor(geolevels, ordered = T)) %>% 
  select(prj_id, op_id, geo_id, geo_id_orig, level, level_orig) %>% 
  mutate(rozpad_duvod = "manuálně vybrána ZÚJ; úroveň převedena na obec kde se obec a ZÚJ kryjí")
dtl_nh_resolved_all <- bind_rows(dtl_nh_resolved_obce_fin,
                                 dtl_nh_resolved_zuj_fin,
                                 dtl_nh_resolved_ostatni_fin) %>% 
  mutate(level_orig = as.character(level)) %>% 
  group_by(prj_id) %>% 
  mutate(radek = row_number(),
         rozpad_typ = "více míst nehierarchicky",
         obec_puvod = "manuální řešení původně nekonzistentní geolokace") %>% 
  add_chunk_number() %>% 
  select(prj_id, radek, level, geo_id, level_orig, geo_id_orig, rozpad_duvod,
         obec_puvod, op_id, chunk, rozpad_typ) %>% 
  mutate(level_orig = as.character(level),
         level = factor(level, levels = geolevels, ordered = T)) %>% 
  arrange(prj_id, geo_id, desc(level), level_orig) %>% 
  distinct(prj_id, op_id, geo_id, level, .keep_all = T) %>% 
  add_long_geoid(ids) 
## Adding missing grouping variables: `op_id`
## Joining, by = c("prj_id", "op_id")
## Joining, by = c("level", "geo_id")
dir.create(here::here("data-processed", "dtl-nehier"), showWarnings = F)
write_parquet(dtl_nh_resolved_all, file_nh_resolved_previously)