Skip to content

Commit 894466d

Browse files
committed
[solve_stats] Allow running in parallel
I had to use multiprocessing here (see comments for explanation), but I'd say it's worth it, since it's only for plotting the solve stats 😄 The speedup on my machine (with 6 jobs) is from 14s to 4s 🚀 Before: ``` real 0m14.396s user 0m12.356s sys 0m0.214s ``` After: ``` real 0m4.200s user 0m12.138s sys 0m0.493s ```
1 parent c7aeb11 commit 894466d

File tree

2 files changed

+126
-68
lines changed

2 files changed

+126
-68
lines changed

bin/solve_stats.py

Lines changed: 125 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,124 @@
11
from os import makedirs
2+
from multiprocessing import Pool
23
from pathlib import Path
34

5+
import config
46
from contest import call_api, get_contest_id
57
from util import ProgressBar
68

9+
# Note on multiprocessing:
10+
# Our custom parallel module uses light-weight threads, which all compete for the global interpreter lock:
11+
# https://docs.python.org/3.10/glossary.html#term-global-interpreter-lock
12+
# But matplotlib.pyplot almost exclusively uses the interpreter, so light-weight threads would simply
13+
# wait on each other until they can obtain the lock.
14+
# Instead, multiprocessing spawns full-fledged Python processes and pickles the arguments and return values.
15+
# This means we cannot use closures or share global data, e.g. we cannot share `bar` instance between the processes.
16+
17+
bins = 120
18+
judgement_colors = {'AC': 'lime', 'WA': 'red', 'TLE': '#c0f', 'RTE': 'orange', '': 'skyblue'}
19+
20+
21+
def req(url: str):
22+
r = call_api('GET', url)
23+
r.raise_for_status()
24+
try:
25+
return r.json()
26+
except Exception as e:
27+
print(f'\nError in decoding JSON:\n{e}\n{r.text()}')
28+
29+
30+
# Turns an endpoint list result into an object, mapped by 'id'
31+
def req_assoc(url: str) -> dict[str, dict]:
32+
return {o['id']: o for o in req(url)}
33+
34+
35+
def time_string_to_minutes(time_string: str) -> float:
36+
hours, minutes, seconds = (time_string or '0:0:0').split(':')
37+
return int(hours) * 60 + int(minutes) + float(seconds) / 60
38+
39+
40+
def plot_problem(
41+
problem_id: str, minutes: list[dict[str, int]], label: str, judgement_types: dict[str, dict]
42+
):
43+
import matplotlib.pyplot as plt # Have to import it separately in multiprocessing worker.
44+
45+
fig, ax = plt.subplots(figsize=(12, 2))
46+
# Ugly accumulator. Matplotlib doesn't support negative stacked bars properly: https://stackoverflow.com/a/38900035
47+
neg_acc = [0 for m in minutes]
48+
# Reverse order, so that the order at the bottom is WA-TLE-RTE
49+
for jt in sorted(judgement_types, reverse=True):
50+
if jt == 'CE':
51+
continue
52+
is_neg = any(m[jt] < 0 for m in minutes)
53+
ax.bar(
54+
range(bins),
55+
[m[jt] for m in minutes],
56+
1,
57+
color=judgement_colors.get(jt) or judgement_colors['RTE'],
58+
bottom=neg_acc if is_neg else None,
59+
)
60+
if is_neg:
61+
neg_acc = [a + b for a, b in zip(neg_acc, (m[jt] for m in minutes))]
62+
ax.axhline(y=0, linewidth=1, color='gray')
63+
ax.autoscale(enable=True, axis='both', tight=True)
64+
fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
65+
ax.axis('off')
66+
fig.tight_layout(pad=0)
67+
fig.savefig(f'solve_stats/activity/{label}.pdf', bbox_inches='tight', transparent=True)
68+
769

870
def generate_solve_stats(post_freeze: bool):
971
# Import takes more than 1000 ms to evaluate, so only import inside function (when it is actually needed)
72+
import matplotlib
1073
import matplotlib.pyplot as plt
1174

75+
# Default back-end uses Qt, which cannot run in parallel threads
76+
# See: https://github.com/matplotlib/matplotlib/issues/13296
77+
matplotlib.use('pdf')
78+
79+
num_jobs = max(1, config.args.jobs)
80+
1281
contest_id = get_contest_id()
82+
url_prefix = f'/contests/{contest_id}/'
83+
84+
bar = ProgressBar('Fetching', count=3, max_len=len('Contest data'))
85+
86+
bar.start('Contest')
87+
contest = req(url_prefix)
88+
bar.done()
1389

14-
# The endpoint should not start with a slash
15-
def req(endpoint):
16-
url = f'/contests/{contest_id}/{endpoint}'
17-
bar.start(url)
18-
r = call_api('GET', url)
19-
r.raise_for_status()
20-
bar.done()
21-
try:
22-
return r.json()
23-
except Exception as e:
24-
print(f'\nError in decoding JSON:\n{e}\n{r.text()}')
25-
26-
# Turns an endpoint list result into an object, mapped by 'id'
27-
def req_assoc(endpoint) -> dict[str, dict]:
28-
return {o['id']: o for o in req(endpoint)}
29-
30-
def time_string_to_minutes(time_string: str) -> float:
31-
hours, minutes, seconds = (time_string or '0:0:0').split(':')
32-
return int(hours) * 60 + int(minutes) + float(seconds) / 60
33-
34-
bar = ProgressBar('Fetching', count=7, max_len=28 + len(contest_id))
35-
36-
contest = req('')
3790
freeze_duration = time_string_to_minutes(contest['scoreboard_freeze_duration'])
3891
contest_duration = time_string_to_minutes(contest['duration'])
39-
bins = 120
4092
scale = contest_duration / bins
4193

42-
problems = req_assoc('problems')
43-
submissions = req_assoc('submissions')
44-
teams = req_assoc(f'teams?public=1')
45-
languages = req_assoc('languages')
46-
judgement_types = req_assoc('judgement-types')
94+
bar.start('Contest data')
95+
with Pool(num_jobs) as p:
96+
problems, submissions, teams, languages, judgement_types = p.map(
97+
req_assoc,
98+
[
99+
url_prefix + endpoint
100+
for endpoint in [
101+
'problems',
102+
'submissions',
103+
'teams?public=1',
104+
'languages',
105+
'judgement-types',
106+
]
107+
],
108+
)
109+
bar.done()
110+
47111
judgement_types[''] = {'id': '', 'name': 'pending'}
48-
judgement_colors = {'AC': 'lime', 'WA': 'red', 'TLE': '#c0f', 'RTE': 'orange', '': 'skyblue'}
49112

50-
for j in req('judgements'):
113+
bar.start('Judgements')
114+
for j in req(url_prefix + 'judgements'):
51115
# Firstly, only one judgement should be 'valid': in case of rejudgings, this should be the "active" judgement.
52116
# Secondly, note that the submissions list only contains submissions that were submitted on time,
53117
# while the judgements list contains all judgements, therefore the submission might not exist.
54118
if j['valid'] and j['submission_id'] in submissions:
55119
# Add judgement to submission.
56120
submissions[j['submission_id']]['judgement'] = j
121+
bar.done()
57122

58123
bar.finalize()
59124

@@ -80,52 +145,44 @@ def time_string_to_minutes(time_string: str) -> float:
80145
stats_sum[s['problem_id']][jt] += 1
81146
language_stats[s['language_id']][jt] += 1
82147

83-
problem_stats = ''
84-
85-
bar = ProgressBar('Plotting', items=[*stats.keys(), 'Language Stats'])
148+
problem_stats = dict[str, str]()
86149

150+
bar = ProgressBar('Plotting', items=['Problem activity', 'Language stats'])
87151
makedirs(f'solve_stats/activity', exist_ok=True)
88-
for p, minutes in stats.items():
89-
bar.start(p)
90-
label = problems[p]['label']
91-
fig, ax = plt.subplots(figsize=(12, 2))
92-
# Ugly accumulator. Matplotlib doesn't support negative stacked bars properly: https://stackoverflow.com/a/38900035
93-
neg_acc = [0 for m in minutes]
94-
# Reverse order, so that the order at the bottom is WA-TLE-RTE
95-
for jt in sorted(judgement_types, reverse=True):
96-
if jt == 'CE':
97-
continue
98-
is_neg = any(m[jt] < 0 for m in minutes)
99-
ax.bar(
100-
range(bins),
101-
[m[jt] for m in minutes],
102-
1,
103-
color=judgement_colors.get(jt) or judgement_colors['RTE'],
104-
bottom=neg_acc if is_neg else None,
105-
)
106-
if is_neg:
107-
neg_acc = [a + b for a, b in zip(neg_acc, (m[jt] for m in minutes))]
108-
plt.axhline(y=0, linewidth=1, color='gray')
109-
ax.autoscale(enable=True, axis='both', tight=True)
110-
fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
111-
plt.axis('off')
112-
plt.tight_layout(pad=0)
113-
plt.savefig(f'solve_stats/activity/{label}.pdf', bbox_inches='tight', transparent=True)
114-
115-
problem_stats += (
152+
153+
bar.start('Problem activity')
154+
with Pool(num_jobs) as p:
155+
p.starmap(
156+
plot_problem,
157+
[
158+
# Passing all required data to plot_problem, because we dan't use closures (see comment at top of file)
159+
[problem_id, stats[problem_id], problems[problem_id]['label'], judgement_types]
160+
for problem_id in stats
161+
],
162+
)
163+
bar.done()
164+
165+
for problem_id in stats:
166+
problem_stats[problem_id] = (
116167
r'\providecommand{\solvestats'
117-
+ label
168+
+ problems[problem_id]['label']
118169
+ r'}{\printsolvestats{'
119170
+ '}{'.join(
120-
str(x) for x in [sum(stats_sum[p].values()), len(ac_teams[p]), stats_sum[p]['']]
171+
str(x)
172+
for x in [
173+
sum(stats_sum[problem_id].values()),
174+
len(ac_teams[problem_id]),
175+
stats_sum[problem_id][''],
176+
]
121177
)
122178
+ '}}\n'
123179
)
124-
bar.done()
125180

126-
Path('solve_stats/problem_stats.tex').write_text(problem_stats)
181+
Path('solve_stats/problem_stats.tex').write_text(
182+
''.join(problem_stats[p] for p in sorted(problem_stats.keys()))
183+
)
127184

128-
bar.start('Language Stats')
185+
bar.start('Language stats')
129186
fig, ax = plt.subplots(figsize=(8, 4))
130187
for j, (jt, color) in enumerate(judgement_colors.items()):
131188
ax.bar(
@@ -138,7 +195,7 @@ def time_string_to_minutes(time_string: str) -> float:
138195
ax.set_xticks(range(len(languages)), [l['name'] for l in languages.values()])
139196
ax.legend()
140197
fig.tight_layout()
141-
plt.savefig(f'solve_stats/language_stats.pdf', bbox_inches='tight', transparent=True)
198+
fig.savefig(f'solve_stats/language_stats.pdf', bbox_inches='tight', transparent=True)
142199
bar.done()
143200

144201
bar.finalize()

bin/tools.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -895,6 +895,7 @@ def run_parsed_arguments(args):
895895
if action == 'solve_stats':
896896
if level == 'problem':
897897
fatal('solve_stats only works for a contest')
898+
config.args.jobs = (os.cpu_count() or 1) // 2
898899
solve_stats.generate_solve_stats(config.args.post_freeze)
899900
return
900901

0 commit comments

Comments
 (0)