Skip to content

Commit 369bc08

Browse files
fix retrieve bugs
1 parent 6175900 commit 369bc08

File tree

2 files changed

+84
-54
lines changed

2 files changed

+84
-54
lines changed

jparty/retrieve.py

Lines changed: 50 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -49,33 +49,75 @@ def get_Gsheet_game(file_id):
4949
r3 = csv.reader(lines)
5050
return list_to_game(list(r3))
5151

52+
class RetrievalException(Exception):
53+
pass
54+
55+
class IncompleteException(Exception):
56+
pass
5257

5358
def get_game(game_id):
5459
if len(str(game_id)) < 7:
5560
try:
61+
logging.info("trying jarchive")
5662
return get_jarchive_game(game_id)
57-
except Exception as e:
63+
except RetrievalException as e:
5864
logging.error(e)
65+
logging.info("trying wayback")
5966
return get_wayback_game(game_id)
6067
else:
6168
return get_Gsheet_game(str(game_id))
6269

6370

64-
def findanswer(clue):
65-
return re.findall(r'correct_response">(.*?)</em', unescape(str(clue)))[0]
6671

6772
def get_jarchive_game(game_id):
6873
return get_generic_game(game_id, f"http://www.j-archive.com/showgame.php?game_id={game_id}")
6974

75+
def get_wayback_game(game_id):
76+
# kudos to Abhi Kumbar: https://medium.com/analytics-vidhya/the-wayback-machine-scraper-63238f6abb66
77+
# this query's the wayback cdx api for possible instances of the saved jarchive page with the specified game id & returns the latest one
78+
JArchive_url = f"j-archive.com/showgame.php?game_id={str(game_id)}" # use the url w/o the http:// or https:// to include both in query
79+
url = f'http://web.archive.org/cdx/search/cdx?url={JArchive_url}&collapse=digest&limit=-2&fastLatest=true&output=json' # for some reason, using limit=-1 does not work
80+
urls = requests.get(url).text
81+
parse_url = json.loads(urls) # parses the JSON from urls.
82+
if len(parse_url) == 0: # if no results, return None
83+
logging.info("no games found in wayback")
84+
# alternative: use fallback to get game from scraping j-archive directly
85+
raise RetrievalException("no games found in wayback")
86+
87+
## Extracts timestamp and original columns from urls and compiles a url list.
88+
url_list = []
89+
for i in range(1, len(parse_url)): # gets the wayback url
90+
orig_url = parse_url[i][2]
91+
tstamp = parse_url[i][1]
92+
waylink = tstamp + '/' + orig_url
93+
final_url = f'http://web.archive.org/web/{waylink}'
94+
url_list.append(final_url)
95+
latest_url = url_list[-1]
96+
return get_generic_game(game_id, latest_url)
97+
98+
def findanswer(clue):
99+
return re.findall(r'correct_response">(.*?)</em', unescape(str(clue)))[0]
100+
70101
def get_generic_game(game_id, url):
71102
logging.info(f"getting game {game_id} from url {url}")
72-
r = requests.get(url)
103+
try:
104+
r = requests.get(url, timeout=5)
105+
except requests.exceptions.ConnectTimeout as e:
106+
logging.info(repr(type(e)))
107+
raise RetrievalException(repr(e))
108+
109+
logging.info(f"returned status {r.status_code}")
110+
if r.status_code != 200:
111+
raise RetrievalException(f"{url} returned HTTP code {r.status_code}")
112+
73113
soup = BeautifulSoup(r.text, "html.parser")
74114
datesearch = re.search(
75-
r"- \w+, (.*?)$", soup.select("#game_title > h1")[0].contents[0]
115+
r"<h1>(.*?)</h1>", str(soup.select("#game_title > h1")[0])
76116
)
117+
77118
if datesearch is None:
78-
return None
119+
raise RetrievalException("Cannot get game summary")
120+
79121
date = datesearch.groups()[0]
80122
comments = soup.select("#game_comments")[0].contents
81123
comments = comments[0] if len(comments) > 0 else ""
@@ -90,8 +132,7 @@ def get_generic_game(game_id, url):
90132
for clue in ro.find_all(class_="clue"):
91133
text_obj = clue.find(class_="clue_text")
92134
if text_obj is None:
93-
logging.info("this game is incomplete")
94-
return None
135+
raise IncompleteException()
95136

96137
text = text_obj.text
97138
index_key = text_obj["id"]
@@ -114,8 +155,7 @@ def get_generic_game(game_id, url):
114155
clue = final_round_obj.find_all(class_="clue")[0]
115156
text_obj = clue.find(class_="clue_text")
116157
if text_obj is None:
117-
logging.info("this game is incomplete")
118-
return None
158+
raise IncompleteException()
119159

120160
text = text_obj.text
121161
answer = findanswer(final_round_obj)
@@ -125,29 +165,6 @@ def get_generic_game(game_id, url):
125165

126166
return GameData(boards, date, comments)
127167

128-
def get_wayback_game(game_id):
129-
# kudos to Abhi Kumbar: https://medium.com/analytics-vidhya/the-wayback-machine-scraper-63238f6abb66
130-
# this query's the wayback cdx api for possible instances of the saved jarchive page with the specified game id & returns the latest one
131-
JArchive_url = f"j-archive.com/showgame.php?game_id={str(game_id)}" # use the url w/o the http:// or https:// to include both in query
132-
url = f'http://web.archive.org/cdx/search/cdx?url={JArchive_url}&collapse=digest&limit=-2&fastLatest=true&output=json' # for some reason, using limit=-1 does not work
133-
urls = requests.get(url).text
134-
parse_url = json.loads(urls) # parses the JSON from urls.
135-
if len(parse_url) == 0: # if no results, return None
136-
logging.info("no games found in wayback")
137-
# alternative: use fallback to get game from scraping j-archive directly
138-
raise Exception("no games found in wayback")
139-
140-
## Extracts timestamp and original columns from urls and compiles a url list.
141-
url_list = []
142-
for i in range(1, len(parse_url)): # gets the wayback url
143-
orig_url = parse_url[i][2]
144-
tstamp = parse_url[i][1]
145-
waylink = tstamp + '/' + orig_url
146-
final_url = f'http://web.archive.org/web/{waylink}'
147-
url_list.append(final_url)
148-
latest_url = url_list[-1]
149-
return get_generic_game(game_id, latest_url)
150-
151168

152169
def get_game_sum(soup):
153170
date = re.search(

jparty/welcome_widget.py

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@
1313
import qrcode
1414
import time
1515
from threading import Thread
16+
import threading
1617
import logging
1718

1819
from jparty.version import version
19-
from jparty.retrieve import get_game, get_random_game
20+
from jparty.retrieve import get_game, get_random_game, IncompleteException
2021
from jparty.utils import resource_path, add_shadow, DynamicLabel, DynamicButton
2122
from jparty.helpmsg import helpmsg
2223
from jparty.style import WINDOWPAL
@@ -117,7 +118,7 @@ def __init__(self, game, parent=None):
117118
self.gameid_label.setOpenExternalLinks(True)
118119

119120
self.textbox = QLineEdit(self)
120-
self.textbox.textChanged.connect(self.show_summary)
121+
self.textbox.textEdited.connect(self.load_game)
121122
f = self.textbox.font()
122123
self.textbox.setFont(f)
123124

@@ -204,50 +205,62 @@ def __random(self):
204205
while True:
205206
game_id = get_random_game()
206207
logging.info(f"GAMEID {game_id}")
207-
self.game.data = get_game(game_id)
208-
if self.game.valid_game():
208+
try:
209+
self.game.data = get_game(game_id)
209210
break
210-
else:
211+
except IncompleteException as e:
212+
logging.error("this game is complete")
211213
time.sleep(0.25)
212214

213215
self.gameid_trigger.emit(str(game_id))
214-
self.summary_trigger.emit(self.game.data.date + "\n" + self.game.data.comments)
216+
summary_string = self.game.data.date + "\n" + self.game.data.comments
217+
215218

216219
except Exception as e:
217220
logging.error(e)
218-
self.summary_trigger.emit("Cannot get game")
221+
summary_string = "Cannot get game"
222+
223+
self.summary_trigger.emit(summary_string)
224+
self.check_start()
219225

220226
def random(self, checked):
221227
self.summary_trigger.emit("Loading...")
222228
t = Thread(target=self.__random)
223229
t.start()
224230

225-
def __show_summary(self):
231+
def __load_game(self):
232+
logging.info("textbox changed, reading ID")
226233
game_id = self.textbox.text()
227234
try:
228-
self.game.data = get_game(game_id)
229-
if self.game.valid_game():
230-
self.summary_trigger.emit(
231-
self.game.data.date + "\n" + self.game.data.comments
232-
)
233-
else:
234-
self.summary_trigger.emit("Game has blank questions")
235+
game_data = get_game(game_id)
236+
summary_string = game_data.date + "\n" + game_data.comments
237+
238+
except IncompleteException as e:
239+
game_data = None
240+
summary_string = "Game is incomplete"
235241

236242
except Exception as e:
237-
logging.error(e)
238-
self.summary_trigger.emit("Cannot get game")
243+
logging.error(repr(e))
244+
game_data = None
245+
summary_string = "Cannot get game"
246+
247+
# make sure the game is the one in the textbox
248+
if self.textbox.text() == game_id:
249+
self.game.data = game_data
250+
self.summary_trigger.emit(summary_string)
251+
self.check_start()
252+
239253

240-
self.check_start()
241254

242255
def set_summary(self, text):
243256
self.summary_label.setText(text)
244257

245258
def set_gameid(self, text):
246259
self.textbox.setText(text)
247260

248-
def show_summary(self, text=None):
261+
def load_game(self, text=None):
249262
self.summary_trigger.emit("Loading...")
250-
t = Thread(target=self.__show_summary)
263+
t = Thread(target=self.__load_game, name="retrieve")
251264
t.start()
252265

253266
self.check_start()
@@ -259,7 +272,7 @@ def check_start(self):
259272
self.start_button.setEnabled(False)
260273

261274
def restart(self):
262-
self.show_summary(self)
275+
self.load_game(self)
263276

264277

265278
class QRWidget(StartWidget):

0 commit comments

Comments
 (0)