Skip to content

Commit f7578ed

Browse files
authored
Merge pull request #26 from ruralinnovation/dev/Update_release_J24
add raw data for june 2024
2 parents fe69872 + 4fb7a72 commit f7578ed

File tree

10 files changed

+556
-12
lines changed

10 files changed

+556
-12
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: cori.data.fcc
22
Title: Process FCC data
3-
Version: 0.1.2
3+
Version: 0.2.1
44
Authors@R:
55
person(given="Olivier", family="Leroy", email="olivier.leroy@ruralinnovation.us", role = c("aut", "cre"))
66
Description: Functions to get and process FCC data.

NEWS.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# cori.data.fcc 0.2.1
2+
3+
## Major changes
4+
5+
* We updated raw data with the June 2024 FCC release
6+
7+
* CORI opinionated version at Census Block level is also now using that version.
8+
19
# cori.data.fcc 0.1.2
210

311
## Minor improvements

R/get_county_nbm_raw.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#'
1313
#' @param geoid_co a string matching a GEOID for a county
1414
#' @param frn a string of 10 numbers matching FCC's FRN, default is "all"
15-
#' @param release a date, set by default to be '2023-12-01'
15+
#' @param release a date, set by default to be '2024-06-01'
1616
#'
1717
#' @return a data frame
1818
#'
@@ -24,7 +24,7 @@
2424
#' guilford_cty <- get_county_nbm_raw(geoid_co = "37081")
2525
#'}
2626

27-
get_county_nbm_raw <- function(geoid_co, frn = "all", release = "2023-12-01") {
27+
get_county_nbm_raw <- function(geoid_co, frn = "all", release = "2024-06-01") {
2828

2929
# do I need a look up for county?
3030

R/get_frn_nbm_bl.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,12 @@ get_frn_nbm_bl <- function(frn) {
3636
DBI::dbExecute(con, "INSTALL httpfs;LOAD httpfs")
3737
statement <- sprintf(
3838
"select *
39-
from read_parquet('s3://cori.data.fcc/nbm_block/*/*.parquet')
39+
from read_parquet('s3://cori.data.fcc/nbm_block-J24/*/*.parquet')
4040
where
4141
combo_frn in (
4242
select combo_frn
4343
from
44-
read_parquet('s3://cori.data.fcc/rel_combo_frn.parquet')
44+
read_parquet('s3://cori.data.fcc/rel_combo_frn-J24.parquet')
4545
where frn = '%s'
4646
);", frn)
4747

R/get_nbm_bl.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ get_nbm_bl <- function(geoid_co) {
3131
DBI::dbExecute(con, "INSTALL httpfs;LOAD httpfs")
3232
statement <- sprintf(
3333
"select *
34-
from read_parquet('s3://cori.data.fcc/nbm_block/*/*.parquet')
34+
from read_parquet('s3://cori.data.fcc/nbm_block-J24/*/*.parquet')
3535
where geoid_co = '%s';", geoid_co)
3636

3737
DBI::dbGetQuery(con, statement)

data-raw/NBM.R

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,4 +116,73 @@ DBI::dbExecute(con, copy_stat)
116116

117117
DBI::dbDisconnect(con)
118118

119-
system("aws s3 nbm_raw s3://cori.data.fcc/nbm_raw")
119+
system("aws s3 sync nbm_raw s3://cori.data.fcc/nbm_raw")
120+
121+
## update January 2025, adding June2024
122+
# assuming list of csv in data_swamp
123+
124+
library(duckdb)
125+
126+
con <- DBI::dbConnect(duckdb::duckdb(), tempfile())
127+
128+
# I needed to run because FCC naming J24 can be june, january ...
129+
dir <- "data_swamp/10dec2024/"
130+
131+
raw_csv <- list.files(dir, pattern = "*.csv", recursive = TRUE)
132+
raw_csv <- paste0(dir, raw_csv)
133+
134+
# better names is defined above
135+
better_name <- vapply(raw_csv, better_fcc_name, FUN.VALUE = character(1))
136+
137+
file.rename(raw_csv, better_name)
138+
139+
140+
## I went overkill with that one, it is probably not needed
141+
DBI::dbExecute(con, "PRAGMA max_temp_directory_size='10GiB'")
142+
143+
copy_stat <- "
144+
COPY
145+
(SELECT
146+
frn,
147+
provider_id,
148+
brand_name,
149+
location_id,
150+
technology,
151+
max_advertised_download_speed,
152+
max_advertised_upload_speed,
153+
low_latency,
154+
business_residential_code,
155+
state_usps,
156+
block_geoid as geoid_bl,
157+
substring(block_geoid, 1, 5) as geoid_co,
158+
strptime(split_part(split_part(filename, '_', 8), '.', 1), '%d%b%Y')::DATE
159+
as file_time_stamp,
160+
strptime(split_part(filename, '_', 7), '%B%Y')::DATE as release
161+
FROM
162+
read_csv(
163+
'data_swamp/10dec2024/*.csv',
164+
types = {
165+
'frn' : 'VARCHAR(10)',
166+
'provider_id': 'TEXT',
167+
'brand_name' : 'TEXT',
168+
'location_id': 'TEXT',
169+
'technology' : 'VARCHAR(2)',
170+
'max_advertised_download_speed' : INTEGER,
171+
'max_advertised_upload_speed' : INTEGER,
172+
'low_latency' : 'BOOLEAN',
173+
'business_residential_code': 'VARCHAR(1)',
174+
'state_usps' : 'VARCHAR(2)',
175+
'block_geoid': 'VARCHAR(15)'
176+
},
177+
ignore_errors = true,
178+
delim=',', quote='\"',
179+
new_line='\\n', skip=0,
180+
header=true, filename=true))
181+
TO 'nbm_raw' (FORMAT 'parquet', PARTITION_BY(release, state_usps, technology)
182+
);"
183+
184+
DBI::dbExecute(con, copy_stat)
185+
186+
DBI::dbDisconnect(con)
187+
188+
system("aws s3 sync nbm_raw/release=2024-06-01 s3://cori.data.fcc/nbm_raw/release=2024-06-01")

0 commit comments

Comments
 (0)