@@ -167,6 +167,45 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
167
167
error (f"Failed to check if eval run name exists: { str (e )} " )
168
168
raise JudgmentAPIError (f"Failed to check if eval run name exists: { str (e )} " )
169
169
170
+ def log_evaluation_results (merged_results : List [ScoringResult ], evaluation_run : EvaluationRun ) -> None :
171
+ """
172
+ Logs evaluation results to the Judgment API database.
173
+
174
+ Args:
175
+ merged_results (List[ScoringResult]): The results to log
176
+ evaluation_run (EvaluationRun): The evaluation run containing project info and API key
177
+
178
+ Raises:
179
+ JudgmentAPIError: If there's an API error during logging
180
+ ValueError: If there's a validation error with the results
181
+ """
182
+ try :
183
+ res = requests .post (
184
+ JUDGMENT_EVAL_LOG_API_URL ,
185
+ json = {
186
+ "results" : [result .to_dict () for result in merged_results ],
187
+ "judgment_api_key" : evaluation_run .judgment_api_key ,
188
+ "project_name" : evaluation_run .project_name ,
189
+ "eval_name" : evaluation_run .eval_name ,
190
+ }
191
+ )
192
+
193
+ if not res .ok :
194
+ response_data = res .json ()
195
+ error_message = response_data .get ('detail' , 'An unknown error occurred.' )
196
+ error (f"Error { res .status_code } : { error_message } " )
197
+ raise JudgmentAPIError (error_message )
198
+
199
+ if "ui_results_url" in res .json ():
200
+ rprint (f"\n 🔍 You can view your evaluation results here: [rgb(106,0,255)]{ res .json ()['ui_results_url' ]} [/]\n " )
201
+
202
+ except requests .exceptions .RequestException as e :
203
+ error (f"Request failed while saving evaluation results to DB: { str (e )} " )
204
+ raise JudgmentAPIError (f"Request failed while saving evaluation results to DB: { str (e )} " )
205
+ except Exception as e :
206
+ error (f"Failed to save evaluation results to DB: { str (e )} " )
207
+ raise ValueError (f"Failed to save evaluation results to DB: { str (e )} " )
208
+
170
209
def run_eval (evaluation_run : EvaluationRun , override : bool = False ) -> List [ScoringResult ]:
171
210
"""
172
211
Executes an evaluation of `Example`s using one or more `Scorer`s
@@ -311,31 +350,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
311
350
info (f"Successfully merged { len (merged_results )} results" )
312
351
313
352
if evaluation_run .log_results :
314
- try :
315
- res = requests .post (
316
- JUDGMENT_EVAL_LOG_API_URL ,
317
- json = {
318
- "results" : [result .to_dict () for result in merged_results ],
319
- "judgment_api_key" : evaluation_run .judgment_api_key ,
320
- "project_name" : evaluation_run .project_name ,
321
- "eval_name" : evaluation_run .eval_name ,
322
- }
323
- )
324
- if not res .ok :
325
- response_data = res .json ()
326
- error_message = response_data .get ('detail' , 'An unknown error occurred.' )
327
- error (f"Error { res .status_code } : { error_message } " )
328
- raise Exception (f"Error { res .status_code } : { error_message } " )
329
- else :
330
- if "ui_results_url" in res .json ():
331
- rprint (f"\n 🔍 You can view your evaluation results here: [rgb(106,0,255)]{ res .json ()['ui_results_url' ]} [/]\n " )
332
-
333
- except requests .exceptions .RequestException as e :
334
- error (f"Request failed while saving evaluation results to DB: { str (e )} " )
335
- raise JudgmentAPIError (f"Request failed while saving evaluation results to DB: { str (e )} " )
336
- except Exception as e :
337
- error (f"Failed to save evaluation results to DB: { str (e )} " )
338
- raise ValueError (f"Failed to save evaluation results to DB: { str (e )} " )
353
+ log_evaluation_results (merged_results , evaluation_run )
339
354
340
355
for i , result in enumerate (merged_results ):
341
356
if not result .scorers_data : # none of the scorers could be executed on this example
0 commit comments