You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# kudos to Abhi Kumbar: https://medium.com/analytics-vidhya/the-wayback-machine-scraper-63238f6abb66
77
+
# this query's the wayback cdx api for possible instances of the saved jarchive page with the specified game id & returns the latest one
78
+
JArchive_url=f"j-archive.com/showgame.php?game_id={str(game_id)}"# use the url w/o the http:// or https:// to include both in query
79
+
url=f'http://web.archive.org/cdx/search/cdx?url={JArchive_url}&collapse=digest&limit=-2&fastLatest=true&output=json'# for some reason, using limit=-1 does not work
80
+
urls=requests.get(url).text
81
+
parse_url=json.loads(urls) # parses the JSON from urls.
82
+
iflen(parse_url) ==0: # if no results, return None
83
+
logging.info("no games found in wayback")
84
+
# alternative: use fallback to get game from scraping j-archive directly
85
+
raiseRetrievalException("no games found in wayback")
86
+
87
+
## Extracts timestamp and original columns from urls and compiles a url list.
88
+
url_list= []
89
+
foriinrange(1, len(parse_url)): # gets the wayback url
# kudos to Abhi Kumbar: https://medium.com/analytics-vidhya/the-wayback-machine-scraper-63238f6abb66
130
-
# this query's the wayback cdx api for possible instances of the saved jarchive page with the specified game id & returns the latest one
131
-
JArchive_url=f"j-archive.com/showgame.php?game_id={str(game_id)}"# use the url w/o the http:// or https:// to include both in query
132
-
url=f'http://web.archive.org/cdx/search/cdx?url={JArchive_url}&collapse=digest&limit=-2&fastLatest=true&output=json'# for some reason, using limit=-1 does not work
133
-
urls=requests.get(url).text
134
-
parse_url=json.loads(urls) # parses the JSON from urls.
135
-
iflen(parse_url) ==0: # if no results, return None
136
-
logging.info("no games found in wayback")
137
-
# alternative: use fallback to get game from scraping j-archive directly
138
-
raiseException("no games found in wayback")
139
-
140
-
## Extracts timestamp and original columns from urls and compiles a url list.
141
-
url_list= []
142
-
foriinrange(1, len(parse_url)): # gets the wayback url
0 commit comments