@@ -342,6 +342,7 @@ def get_composite_snapshot_file_list(target_timestamp)
342
342
next if file_timestamp . to_i > target_timestamp
343
343
file_id = file_url . split ( '/' ) [ 3 ..-1 ] . join ( '/' )
344
344
file_id = CGI ::unescape file_id
345
+ file_id . gsub! ( /<[^>]*>/ , '' ) # sanitize HTML tags
345
346
file_id = file_id . tidy_bytes unless file_id == ""
346
347
next if file_id . nil?
347
348
next if match_exclude_filter ( file_url )
@@ -370,9 +371,12 @@ def get_file_list_curated
370
371
next unless file_url . include? ( '/' )
371
372
file_id = file_url . split ( '/' ) [ 3 ..-1 ] . join ( '/' )
372
373
file_id = CGI ::unescape file_id
374
+ file_id . gsub! ( /<[^>]*>/ , '' ) # sanitize HTML tags
373
375
file_id = file_id . tidy_bytes unless file_id == ""
374
376
if file_id . nil?
375
377
puts "Malformed file url, ignoring: #{ file_url } "
378
+ elsif file_id . include? ( '<' ) || file_id . include? ( '>' )
379
+ puts "Invalid characters in file_id after sanitization, ignoring: #{ file_url } "
376
380
else
377
381
if match_exclude_filter ( file_url )
378
382
puts "File url matches exclude filter, ignoring: #{ file_url } "
@@ -397,9 +401,12 @@ def get_file_list_all_timestamps
397
401
file_id = file_url . split ( '/' ) [ 3 ..-1 ] . join ( '/' )
398
402
file_id_and_timestamp = [ file_timestamp , file_id ] . join ( '/' )
399
403
file_id_and_timestamp = CGI ::unescape file_id_and_timestamp
404
+ file_id_and_timestamp . gsub! ( /<[^>]*>/ , '' ) # sanitize HTML tags
400
405
file_id_and_timestamp = file_id_and_timestamp . tidy_bytes unless file_id_and_timestamp == ""
401
406
if file_id . nil?
402
407
puts "Malformed file url, ignoring: #{ file_url } "
408
+ elsif file_id_and_timestamp . include? ( '<' ) || file_id_and_timestamp . include? ( '>' )
409
+ puts "Invalid characters in file_id after sanitization, ignoring: #{ file_url } "
403
410
else
404
411
if match_exclude_filter ( file_url )
405
412
puts "File url matches exclude filter, ignoring: #{ file_url } "
0 commit comments