1
- # TODO: Download (cache) entire nbm_block dataset and make function to get opinionated county level summary
2
-
3
- # ' Load part of NBM counts for givent Census County from CORI S3 bucket
1
+ # ' Load NBM service counts for given Census County from CORI S3 bucket
4
2
# '
5
3
# ' Get all the data related to a county.
6
4
# '
24
22
# ' }
25
23
get_nbm_county <- function (geoid_co , release = " latest" ) {
26
24
25
+ # # Download nbm_release parquet
26
+ data_dir <- paste0(here :: here(), " /inst/ext_data/nbm" )
27
+ if (! dir.exists(data_dir )) dir.create(data_dir , recursive = TRUE , showWarnings = FALSE )
28
+
27
29
release_target <- " "
28
30
29
31
if (release %in% c(" D23" , " J24" , " D24" )) {
30
32
release_target <- paste0(" -" , release )
31
33
}
34
+ s3_bucket_name <- " cori.data.fcc"
35
+
36
+ nbm_release <- paste0(" nbm_block" , release_target )
37
+ nbm_release_dir <- paste0(data_dir , " /" , nbm_release )
38
+
39
+ if (! dir.exists(nbm_release_dir )) {
40
+ dir.create(nbm_release_dir , recursive = TRUE , showWarnings = FALSE )
41
+
42
+ s3_data_files <- (
43
+ cori.db :: list_s3_objects(bucket_name = s3_bucket_name ) | >
44
+ dplyr :: filter(grepl(nbm_release , `key` ))
45
+ )$ `key`
46
+
47
+ if (! all(s3_data_files %in% list.files(data_dir , recursive = TRUE , full.names = FALSE ))) {
48
+ s3_download_command <- paste0(" aws s3 cp --recursive s3://" , s3_bucket_name , " /" , nbm_release , " " , nbm_release_dir )
49
+
50
+ print(s3_download_command )
51
+
52
+ system(s3_download_command )
53
+ }
54
+ }
32
55
33
56
if (nchar(geoid_co ) != 5L ) stop(" geoid_co should be a 5-digit string" )
34
57
@@ -45,8 +68,10 @@ get_nbm_county <- function(geoid_co, release = "latest") {
45
68
sum(cnt_fiber_locations) as cnt_fiber_locations,
46
69
sum(cnt_100_20) as cnt_100_20,
47
70
sum(cnt_25_3) as cnt_25_3
48
- from read_parquet('inst/ext_data/nbm/nbm_block" , release_target , " /*/*.parquet')
49
- where geoid_co = '%s';" ), geoid_co )
71
+ from read_parquet('" , nbm_release_dir , " /*/*.parquet')
72
+ where geoid_co = '%s'
73
+ group by geoid
74
+ ;" ), geoid_co )
50
75
51
76
# # TODO: If no geoid_co (county FIPS) specified, create summary for all counties...
52
77
# statement <- c(
@@ -56,7 +81,7 @@ get_nbm_county <- function(geoid_co, release = "latest") {
56
81
# sum(cnt_fiber_locations) as cnt_fiber_locations,
57
82
# sum(cnt_100_20) as cnt_100_20,
58
83
# sum(cnt_25_3) as cnt_25_3
59
- # FROM read_parquet('inst/ext_data/nbm/nbm_block ", release_target , "/*/*.parquet')
84
+ # FROM read_parquet('", nbm_release_dir , "/*/*.parquet')
60
85
# GROUP BY geoid;"
61
86
# )
62
87
0 commit comments