6
6
# ' Pass the string in the `dataset_id` column to `get_czso_table()`. `dataset_iri`
7
7
# ' is the unique identifier of the dataset in the national catalogue and also the URL
8
8
# ' containing all metadata for the dataset.
9
- # '
9
+ # ' @param search_terms a regex pattern, or a vector of regex patterns, to filter the catalogue by.
10
+ # ' A case-insensitive filter is performed on the title, description and keywords.
11
+ # ' The search returns only catalogue entries where all the patterns are matched anywhere within the title, description or keywords.
10
12
# ' @return a data frame with details on all CZSO datasets available in the Czech National Open Data Catalogue.
11
13
# ' The columns are fairly well described by their names, except:
12
14
# '
23
25
# ' @examples
24
26
# ' \donttest{
25
27
# ' czso_get_catalogue()
28
+ # ' czso_get_catalogue(search_terms = c("kraj", "me?zd"))
26
29
# ' }
27
- czso_get_catalogue <- function () {
30
+ czso_get_catalogue <- function (search_terms = NULL ) {
28
31
url <- " https://vdb.czso.cz/pll/eweb/lkod_ld.seznam"
29
32
30
33
if (is_above_bigsur()) stop_on_openssl()
31
34
32
- suppressWarnings(readr :: read_csv(url ,
35
+ ctlg <- suppressWarnings(readr :: read_csv(url ,
33
36
col_types = readr :: cols(
34
37
dataset_iri = readr :: col_character(),
35
38
dataset_id = readr :: col_character(),
@@ -46,6 +49,48 @@ czso_get_catalogue <- function() {
46
49
))) %> %
47
50
dplyr :: mutate(periodicity = dplyr :: recode(.data $ periodicity , nikdy = " NEVER" ))
48
51
52
+ if (! is.null(search_terms )) {
53
+ czso_filter_catalogue(ctlg , search_terms )
54
+ } else {
55
+ ctlg
56
+ }
57
+
58
+ }
59
+
60
+ # ' Filter the catalogue using a set of keywords
61
+ # '
62
+ # ' @param catalogue a catalogue as returned by `czso_get_catalogue()`
63
+ # ' @param search_terms #' A regex pattern (incl. plain text), or a vector of regex patterns, to filter the catalogue by.
64
+ # ' A case-insensitive filter is performed on the title, description and keywords.
65
+ # ' The search returns only catalogue entries where all the patterns are matched anywhere within the title, description or keywords.
66
+ # '
67
+ # ' @return A tibble with the filtered catalogue.
68
+ # ' @export
69
+ # '
70
+ # ' @examples
71
+ # ' ctlg <- czso_get_catalogue()
72
+ # ' czso_filter_catalogue(ctlg, search_terms = c("kraj", "me?zd"))
73
+ # ' czso_filter_catalogue(ctlg, search_terms = c("úmrt", "orp"))
74
+ # ' czso_filter_catalogue(ctlg, search_terms = c("kraj", "vazba", "orp"))
75
+ # ' czso_filter_catalogue(ctlg, search_terms = c("ISCO", "číselník"))
76
+ # ' czso_filter_catalogue(ctlg, search_terms = c("zaměstnání", "číselník"))
77
+ czso_filter_catalogue <- function (catalogue , search_terms ) {
78
+ # Initialize an empty vector to store IDs of the relevant catalogue entries
79
+ relevant_ids <- c()
80
+
81
+ # Iterate over each row in the input data frame
82
+ for (i in 1 : nrow(catalogue )) {
83
+ row <- catalogue [i , c(" dataset_id" , " title" , " description" , " keywords_all" )]
84
+ # Check if any of the patterns match in any of the three text columns
85
+ if (all(sapply(search_terms , function (pattern ) any(grepl(pattern , row ,
86
+ ignore.case = TRUE ))))){
87
+ # Append the row to the filtered data frame
88
+ relevant_ids <- c(relevant_ids , row [[" dataset_id" ]])
89
+ }
90
+ }
91
+ filtered_catalogue <- catalogue [catalogue $ dataset_id %in% relevant_ids , ]
92
+
93
+ filtered_catalogue
49
94
}
50
95
51
96
0 commit comments