Skip to content

Commit c30ee73

Browse files
Sanitize file_id
we were not consistently handling non-UTF-8 characters here, especially after commit e4487ba. This also fixes #25
1 parent d3466b3 commit c30ee73

File tree

1 file changed

+2
-11
lines changed

1 file changed

+2
-11
lines changed

lib/wayback_machine_downloader.rb

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ class WaybackMachineDownloader
115115
include ArchiveAPI
116116
include SubdomainProcessor
117117

118-
VERSION = "2.3.12"
118+
VERSION = "2.3.11"
119119
DEFAULT_TIMEOUT = 30
120120
MAX_RETRIES = 3
121121
RETRY_DELAY = 2
@@ -352,16 +352,6 @@ def get_composite_snapshot_file_list(target_timestamp)
352352
file_versions.values
353353
end
354354

355-
# Returns a list of files for the composite snapshot
356-
def get_file_list_composite_snapshot(target_timestamp)
357-
file_list = get_composite_snapshot_file_list(target_timestamp)
358-
file_list = file_list.sort_by { |_,v| v[:timestamp].to_s }.reverse
359-
file_list.map do |file_remote_info|
360-
file_remote_info[1][:file_id] = file_remote_info[0]
361-
file_remote_info[1]
362-
end
363-
end
364-
365355
def get_file_list_curated
366356
file_list_curated = Hash.new
367357
get_all_snapshots_to_consider.each do |file_timestamp, file_url|
@@ -694,6 +684,7 @@ def download_file (file_remote_info, http)
694684
file_timestamp = file_remote_info[:timestamp]
695685

696686
# sanitize file_id to ensure it is a valid path component
687+
file_id = file_id.tidy_bytes if file_id
697688
raw_path_elements = file_id.split('/')
698689

699690
sanitized_path_elements = raw_path_elements.map do |element|

0 commit comments

Comments
 (0)