Skip to content

Commit b7ed9e9

Browse files
committed
fix: Added custom error message only for the 404 case and a method to automatically fix the casing of the crawl reference
1 parent 05ce83a commit b7ed9e9

File tree

2 files changed

+15
-9
lines changed

2 files changed

+15
-9
lines changed

src/cli.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,11 @@ fn crawl_name_format(crawl: &str) -> Result<String, String> {
9595
let main_re = Regex::new(r"^(CC\-MAIN)\-([0-9]{4})\-([0-9]{2})$").unwrap();
9696
let news_re = Regex::new(r"^(CC\-NEWS)\-([0-9]{4})\-([0-9]{2})$").unwrap();
9797

98-
if !(main_re.is_match(crawl) || news_re.is_match(crawl)) {
99-
return Err("Please use the CC-MAIN-YYYY-WW or the CC-NEWS-YYYY-MM format, make sure your input is propely capitalized".to_string());
98+
let crawl_ref = crawl.to_uppercase();
99+
100+
if !(main_re.is_match(&crawl_ref) || news_re.is_match(&crawl_ref)) {
101+
return Err("Please use the CC-MAIN-YYYY-WW or the CC-NEWS-YYYY-MM format.".to_string());
100102
} else {
101-
return Ok(crawl.to_owned());
103+
return Ok(crawl_ref);
102104
}
103105
}

src/download.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,17 +104,21 @@ pub async fn download_paths(mut options: DownloadOptions<'_>) -> Result<(), Down
104104
let resp = client.head(url.as_str()).send().await?;
105105
match resp.status() {
106106
status if status.is_success() => (),
107-
status if status.is_client_error() => {
107+
status if status.as_u16() == 404 => {
108108
return Err(format!(
109-
"\n\nThe reference combination you requested:\n\tCRAWL: {}\n\tSUBSET: {}\n\tURL: {}\n\nDoesn't seem to exist or it is currently not accessible.\n\tError Code: {} {}",
109+
"\n\nThe reference combination you requested:\n\tCRAWL: {}\n\tSUBSET: {}\n\tURL: {}\n\nDoesn't seem to exist or it is currently not accessible.\n\tError code: {} {}",
110110
snapshot_original_ref, options.data_type, url, status.as_str(), status.canonical_reason().unwrap_or("")
111111
)
112112
.into());
113113
}
114-
_ => {
115-
return Err(
116-
format!("Couldn't download URL: {}. Error: {:?}", url, resp.status()).into(),
117-
);
114+
status => {
115+
return Err(format!(
116+
"Couldn't download URL: {}. Error code: {} {}",
117+
url,
118+
status.as_str(),
119+
status.canonical_reason().unwrap_or("")
120+
)
121+
.into());
118122
}
119123
}
120124

0 commit comments

Comments
 (0)