Skip to content

Commit 6cd6b3d

Browse files
committed
cache nbm block parquette data for get nbm county
1 parent e4d5707 commit 6cd6b3d

File tree

4 files changed

+34
-9
lines changed

4 files changed

+34
-9
lines changed

R/get_nbm_bl.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#' Load part of NBM at Census Block from CORI S3 bucket
1+
#' Load NBM service counts for given Census Block from CORI S3 bucket
22
#'
33
#' Get all the data related to a states or county.
44
#'

R/get_nbm_county.R

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
# TODO: Download (cache) entire nbm_block dataset and make function to get opinionated county level summary
2-
3-
#' Load part of NBM counts for givent Census County from CORI S3 bucket
1+
#' Load NBM service counts for given Census County from CORI S3 bucket
42
#'
53
#' Get all the data related to a county.
64
#'
@@ -24,11 +22,36 @@
2422
#' }
2523
get_nbm_county <- function(geoid_co, release = "latest") {
2624

25+
## Download nbm_release parquet
26+
data_dir <- paste0(here::here(), "/inst/ext_data/nbm")
27+
if (! dir.exists(data_dir)) dir.create(data_dir, recursive = TRUE, showWarnings = FALSE)
28+
2729
release_target <- ""
2830

2931
if (release %in% c("D23", "J24", "D24")) {
3032
release_target <- paste0("-", release)
3133
}
34+
s3_bucket_name <- "cori.data.fcc"
35+
36+
nbm_release <- paste0("nbm_block", release_target)
37+
nbm_release_dir <- paste0(data_dir, "/", nbm_release)
38+
39+
if (! dir.exists(nbm_release_dir)) {
40+
dir.create(nbm_release_dir, recursive = TRUE, showWarnings = FALSE)
41+
42+
s3_data_files <- (
43+
cori.db::list_s3_objects(bucket_name = s3_bucket_name) |>
44+
dplyr::filter(grepl(nbm_release, `key`))
45+
)$`key`
46+
47+
if (!all(s3_data_files %in% list.files(data_dir, recursive = TRUE, full.names = FALSE))) {
48+
s3_download_command <- paste0("aws s3 cp --recursive s3://", s3_bucket_name, "/", nbm_release, " ", nbm_release_dir)
49+
50+
print(s3_download_command)
51+
52+
system(s3_download_command)
53+
}
54+
}
3255

3356
if (nchar(geoid_co) != 5L) stop("geoid_co should be a 5-digit string")
3457

@@ -45,8 +68,10 @@ get_nbm_county <- function(geoid_co, release = "latest") {
4568
sum(cnt_fiber_locations) as cnt_fiber_locations,
4669
sum(cnt_100_20) as cnt_100_20,
4770
sum(cnt_25_3) as cnt_25_3
48-
from read_parquet('inst/ext_data/nbm/nbm_block", release_target, "/*/*.parquet')
49-
where geoid_co = '%s';"), geoid_co)
71+
from read_parquet('", nbm_release_dir, "/*/*.parquet')
72+
where geoid_co = '%s'
73+
group by geoid
74+
;"), geoid_co)
5075

5176
## TODO: If no geoid_co (county FIPS) specified, create summary for all counties...
5277
# statement <- c(
@@ -56,7 +81,7 @@ get_nbm_county <- function(geoid_co, release = "latest") {
5681
# sum(cnt_fiber_locations) as cnt_fiber_locations,
5782
# sum(cnt_100_20) as cnt_100_20,
5883
# sum(cnt_25_3) as cnt_25_3
59-
# FROM read_parquet('inst/ext_data/nbm/nbm_block", release_target, "/*/*.parquet')
84+
# FROM read_parquet('", nbm_release_dir, "/*/*.parquet')
6085
# GROUP BY geoid;"
6186
# )
6287

man/get_nbm_bl.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_nbm_county.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)