dt <- read_parquet(here::here("data-processed",
"misto_fix-02-gnames.parquet"))
dtl <- read_parquet(here::here("data-processed",
"misto_fix-02-gnames_long-geo.parquet"))
source("read_metadata.R")
prjs_geostatus <- read_parquet(here::here("data-processed",
"projects-geo-check-groups.parquet"))
places_geostatus <- read_parquet(here::here("data-processed",
"places-geo-check.parquet"))
ops <- read_parquet(here::here("data-processed",
"op-codes.parquet"))
prj_meta <- read_parquet(here::here("data-processed",
"prj-esif-meta.parquet"))
head(prjs_geostatus$prj_id) # memo jak vypadá číslo projektu
## [1] "CZ.06.1.42/0.0/0.0/16_030/0005269" "CZ.06.1.42/0.0/0.0/16_030/0005330" "CZ.06.1.42/0.0/0.0/16_030/0005331" "CZ.06.1.42/0.0/0.0/16_031/0005096" "CZ.06.1.42/0.0/0.0/16_031/0005101" "CZ.06.3.33/0.0/0.0/16_036/0006175"
prjs_groupable <- prjs_geostatus %>%
separate(prj_id, sep = "/", into = c("prog", "obj", "what", "vyzva", "proj"),
remove = F) %>%
separate(vyzva, sep = "_", into = c("vyzva_rok", "vyzva_cislo"), remove = F)
## Warning: Expected 5 pieces. Missing pieces filled with `NA` in 2 rows [37325, 37326].
prjs_groupable %>%
filter(geostatus == "více míst nehierarchicky") %>%
count(op_id, sort = T)
op_id <chr> | n <int> | |||
---|---|---|---|---|
OP VVV | 50 | |||
OP PIK | 46 | |||
OP ZP | 13 | |||
IROP | 7 | |||
OP D | 3 |
prjs_groupable %>%
filter(geostatus == "více míst nehierarchicky") %>%
count(vyzva_rok, geostatus, op_id) %>%
ggplot(aes(vyzva_rok, n, fill = op_id)) +
geom_col() +
facet_wrap(~geostatus)
prjs_groupable %>%
select(op_id, prj_id, geostatus, grp = vyzva) %>%
group_by(op_id, grp) %>%
mutate(prjs_in_grp = n()) %>%
filter(geostatus %in% c("bez místa", "více míst nehierarchicky")) %>%
group_by(op_id, grp, geostatus) %>%
summarise(pocet = n(), podil_na_skupine = pocet/mean(prjs_in_grp), .groups = "drop") %>%
mutate(podil_na_celku = pocet/sum(pocet)) %>%
arrange(desc(podil_na_celku)) %>%
mutate(cum_podil_na_celku = cumsum(podil_na_celku))
op_id <chr> | grp <chr> | geostatus <fct> | pocet <int> | podil_na_skupine <dbl> | podil_na_celku <dbl> | |
---|---|---|---|---|---|---|
OP PIK | 15_019 | více míst nehierarchicky | 23 | 0.0611702128 | 0.157534247 | |
OP VVV | 16_013 | více míst nehierarchicky | 9 | 0.1500000000 | 0.061643836 | |
OP VVV | 15_005 | více míst nehierarchicky | 6 | 0.0185758514 | 0.041095890 | |
OP VVV | 15_003 | více míst nehierarchicky | 5 | 0.0476190476 | 0.034246575 | |
OP ZP | 17_078 | bez místa | 5 | 0.1250000000 | 0.034246575 | |
OP PIK | 15_018 | více míst nehierarchicky | 4 | 0.0125391850 | 0.027397260 | |
OP VVV | 15_007 | více míst nehierarchicky | 4 | 0.0327868852 | 0.027397260 | |
OP VVV | 16_032 | více míst nehierarchicky | 4 | 0.0303030303 | 0.027397260 | |
OP VVV | 17_047 | více míst nehierarchicky | 4 | 0.0157480315 | 0.027397260 | |
OP ZP | 19_120 | bez místa | 4 | 0.2000000000 | 0.027397260 |
Co to je?
prjs_groupable %>%
separate(prog, into = c("cnt", "prg", "posa", "n2"), remove = F) %>%
distinct(op_id, n2) %>%
mutate(n = 1) %>%
spread(op_id, n) %>%
arrange(n2)
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 2 rows [37325, 37326].
n2 <chr> | IROP <dbl> | OP D <dbl> | OP PIK <dbl> | OP PPR <dbl> | OP TP <dbl> | OP VVV <dbl> | OP Z <dbl> | OP ZP <dbl> |
---|---|---|---|---|---|---|---|---|
01 | NA | NA | NA | NA | NA | 1 | NA | NA |
02 | NA | NA | 1 | 1 | NA | NA | NA | NA |
03 | NA | NA | 1 | NA | NA | NA | NA | NA |
04 | NA | NA | 1 | NA | NA | NA | NA | NA |
05 | 1 | NA | NA | NA | NA | NA | NA | NA |
06 | NA | NA | 1 | NA | NA | NA | NA | NA |
07 | NA | NA | 1 | NA | NA | NA | NA | NA |
09 | NA | NA | 1 | NA | NA | NA | NA | NA |
10 | NA | NA | 1 | NA | NA | NA | NA | NA |
11 | 1 | NA | NA | 1 | NA | NA | NA | 1 |
prj_ids_nonhierarchical <- prjs_geostatus$prj_id[prjs_geostatus$geostatus == "více míst nehierarchicky"]
library(ggupset)
gst_for_upset <- dtl %>%
filter(typ == "id" & !is.na(value),
prj_id %in% prj_ids_nonhierarchical) %>%
select(op_id, prj_id, level) %>%
distinct() %>%
mutate(level = as.character(level)) %>%
group_by(prj_id) %>%
summarise(geos = list(level)) %>%
left_join(dt %>% filter(prj_radek == 1) %>% select(prj_id, op_id)) %>%
# mutate(nr = map_int(geos, nrow)) %>%
# arrange(desc(nr)) %>%
filter(TRUE)
## `summarise()` ungrouping output (override with `.groups` argument)
## Joining, by = "prj_id"
ggplot2::update_geom_defaults("line", list(colour = "black"))
ggplot(gst_for_upset) +
geom_bar(mapping = aes(x = geos, fill = op_id), position = "stack") +
scale_x_upset(order_by = "degree", sets = geolevels) +
# scale_y_continuous(limits = c(0, 6e4)) +
# scale_y_log10(limits = c(1, 1e5), n.breaks = 10,
# labels = scales::label_number(1)) +
# scale_y_continuous(trans = scales::log_trans(), breaks = scales::pretty_breaks()) +
ptrr::theme_ptrr() +
scale_fill_brewer(type = "qual")
ggplot2::update_geom_defaults("line", list(colour = "darkblue"))
ggplot2::update_geom_defaults("line", list(colour = "black"))
ggplot(gst_for_upset) +
geom_bar(mapping = aes(x = geos)) +
scale_x_upset(order_by = "degree", sets = geolevels) +
# scale_y_log10(n.breaks = 6, labels = scales::label_number(1)) +
# scale_y_continuous(trans = scales::log_trans(), breaks = scales::pretty_breaks()) +
ptrr::theme_ptrr(multiplot = T) +
facet_wrap(~op_id, scales = "fixed")
ggplot2::update_geom_defaults("line", list(colour = "darkblue"))
dtl_with_geostatus <- dtl %>%
left_join(prjs_geostatus %>%
select(prj_id, geostatus)) %>%
select(op_id, prj_id, level, typ, geostatus, value) %>%
filter(typ %in% c("nazev", "id"), geostatus == "více míst nehierarchicky") %>%
drop_na(value)
## Joining, by = "prj_id"
dtl_with_geostatus_places <- dtl %>%
filter(typ == "id") %>%
drop_na(value) %>%
left_join(places_geostatus %>%
select(prj_id, value, level, value_has_valid_parent)) %>%
left_join(prjs_geostatus %>%
select(prj_id, geostatus)) %>%
select(op_id, prj_id, level, typ, value_has_valid_parent, value,
prj_geostatus = geostatus)
## Joining, by = c("prj_id", "level", "value")
## Joining, by = "prj_id"
data_geocheck_file <- here::here("data-processed", "dt-geohierarchy-check.parquet")
data_geocheck <- read_parquet(data_geocheck_file)
# data_geocheck %>%
# filter(prj_id %in% prj_ids_nonhierarchical) %>%
# arrange(prj_id)
data_geocheck %>%
group_by(prj_id, level, value) %>%
filter(all(!levels_ok)) %>%
# filter(str_detect(prj_id, "CZ.04")) %>%
ungroup() %>%
count(prj_id, sort = T)
prj_id <chr> | n <int> | |||
---|---|---|---|---|
CZ.02.3.68/0.0/0.0/16_010/0000507 | 225 | |||
CZ.05.1.24/0.0/0.0/16_037/0002602 | 110 | |||
CZ.05.2.32/0.0/0.0/15_017/0002616 | 42 | |||
CZ.06.3.72/0.0/0.0/15_012/0004558 | 34 | |||
CZ.02.3.61/0.0/0.0/15_007/0000253 | 14 | |||
CZ.02.3.68/0.0/0.0/16_032/0008063 | 14 | |||
CZ.04.2.40/0.0/0.0/19_069/0000467 | 11 | |||
CZ.02.3.61/0.0/0.0/19_075/0016923 | 10 | |||
CZ.02.3.68/0.0/0.0/16_032/0008083 | 6 | |||
CZ.02.3.68/0.0/0.0/17_047/0010653 | 6 |