Skip to content

Commit 82b011c

Browse files
committed
Modularize eval results logging logic.
1 parent 6dd92f3 commit 82b011c

File tree

1 file changed

+40
-25
lines changed

1 file changed

+40
-25
lines changed

judgeval/run_evaluation.py

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,45 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
167167
error(f"Failed to check if eval run name exists: {str(e)}")
168168
raise JudgmentAPIError(f"Failed to check if eval run name exists: {str(e)}")
169169

170+
def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run: EvaluationRun) -> None:
171+
"""
172+
Logs evaluation results to the Judgment API database.
173+
174+
Args:
175+
merged_results (List[ScoringResult]): The results to log
176+
evaluation_run (EvaluationRun): The evaluation run containing project info and API key
177+
178+
Raises:
179+
JudgmentAPIError: If there's an API error during logging
180+
ValueError: If there's a validation error with the results
181+
"""
182+
try:
183+
res = requests.post(
184+
JUDGMENT_EVAL_LOG_API_URL,
185+
json={
186+
"results": [result.to_dict() for result in merged_results],
187+
"judgment_api_key": evaluation_run.judgment_api_key,
188+
"project_name": evaluation_run.project_name,
189+
"eval_name": evaluation_run.eval_name,
190+
}
191+
)
192+
193+
if not res.ok:
194+
response_data = res.json()
195+
error_message = response_data.get('detail', 'An unknown error occurred.')
196+
error(f"Error {res.status_code}: {error_message}")
197+
raise JudgmentAPIError(error_message)
198+
199+
if "ui_results_url" in res.json():
200+
rprint(f"\n🔍 You can view your evaluation results here: [rgb(106,0,255)]{res.json()['ui_results_url']}[/]\n")
201+
202+
except requests.exceptions.RequestException as e:
203+
error(f"Request failed while saving evaluation results to DB: {str(e)}")
204+
raise JudgmentAPIError(f"Request failed while saving evaluation results to DB: {str(e)}")
205+
except Exception as e:
206+
error(f"Failed to save evaluation results to DB: {str(e)}")
207+
raise ValueError(f"Failed to save evaluation results to DB: {str(e)}")
208+
170209
def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[ScoringResult]:
171210
"""
172211
Executes an evaluation of `Example`s using one or more `Scorer`s
@@ -311,31 +350,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
311350
info(f"Successfully merged {len(merged_results)} results")
312351

313352
if evaluation_run.log_results:
314-
try:
315-
res = requests.post(
316-
JUDGMENT_EVAL_LOG_API_URL,
317-
json={
318-
"results": [result.to_dict() for result in merged_results],
319-
"judgment_api_key": evaluation_run.judgment_api_key,
320-
"project_name": evaluation_run.project_name,
321-
"eval_name": evaluation_run.eval_name,
322-
}
323-
)
324-
if not res.ok:
325-
response_data = res.json()
326-
error_message = response_data.get('detail', 'An unknown error occurred.')
327-
error(f"Error {res.status_code}: {error_message}")
328-
raise Exception(f"Error {res.status_code}: {error_message}")
329-
else:
330-
if "ui_results_url" in res.json():
331-
rprint(f"\n🔍 You can view your evaluation results here: [rgb(106,0,255)]{res.json()['ui_results_url']}[/]\n")
332-
333-
except requests.exceptions.RequestException as e:
334-
error(f"Request failed while saving evaluation results to DB: {str(e)}")
335-
raise JudgmentAPIError(f"Request failed while saving evaluation results to DB: {str(e)}")
336-
except Exception as e:
337-
error(f"Failed to save evaluation results to DB: {str(e)}")
338-
raise ValueError(f"Failed to save evaluation results to DB: {str(e)}")
353+
log_evaluation_results(merged_results, evaluation_run)
339354

340355
for i, result in enumerate(merged_results):
341356
if not result.scorers_data: # none of the scorers could be executed on this example

0 commit comments

Comments
 (0)