18
18
from judgeval .scorers .score import a_execute_scoring
19
19
20
20
from judgeval .constants import (
21
+ ROOT_API ,
21
22
JUDGMENT_EVAL_API_URL ,
22
23
JUDGMENT_EVAL_LOG_API_URL ,
23
24
APIScorer ,
@@ -56,6 +57,7 @@ def execute_api_eval(evaluation_run: EvaluationRun) -> List[Dict]:
56
57
details = response .json ().get ("detail" , "No details provided" )
57
58
raise JudgmentAPIError ("An error occurred while executing the Judgment API request: " + details )
58
59
# Check if the response status code is not 2XX
60
+ # Add check for the duplicate eval run name
59
61
if not response .ok :
60
62
error_message = response_data .get ('detail' , 'An unknown error occurred.' )
61
63
error (f"Error: { error_message = } " )
@@ -128,7 +130,83 @@ def check_missing_scorer_data(results: List[ScoringResult]) -> List[ScoringResul
128
130
)
129
131
return results
130
132
131
- def run_eval (evaluation_run : EvaluationRun ):
133
+ def check_eval_run_name_exists (eval_name : str , project_name : str , judgment_api_key : str ) -> None :
134
+ """
135
+ Checks if an evaluation run name already exists for a given project.
136
+
137
+ Args:
138
+ eval_name (str): Name of the evaluation run
139
+ project_name (str): Name of the project
140
+ judgment_api_key (str): API key for authentication
141
+
142
+ Raises:
143
+ ValueError: If the evaluation run name already exists
144
+ JudgmentAPIError: If there's an API error during the check
145
+ """
146
+ try :
147
+ response = requests .post (
148
+ f"{ ROOT_API } /eval-run-name-exists/" ,
149
+ json = {
150
+ "eval_name" : eval_name ,
151
+ "project_name" : project_name ,
152
+ "judgment_api_key" : judgment_api_key ,
153
+ }
154
+ )
155
+
156
+ if response .status_code == 409 :
157
+ error (f"Evaluation run name '{ eval_name } ' already exists for this project" )
158
+ raise ValueError (f"Evaluation run name '{ eval_name } ' already exists for this project" )
159
+
160
+ if not response .ok :
161
+ response_data = response .json ()
162
+ error_message = response_data .get ('detail' , 'An unknown error occurred.' )
163
+ error (f"Error checking eval run name: { error_message } " )
164
+ raise JudgmentAPIError (error_message )
165
+
166
+ except requests .exceptions .RequestException as e :
167
+ error (f"Failed to check if eval run name exists: { str (e )} " )
168
+ raise JudgmentAPIError (f"Failed to check if eval run name exists: { str (e )} " )
169
+
170
+ def log_evaluation_results (merged_results : List [ScoringResult ], evaluation_run : EvaluationRun ) -> None :
171
+ """
172
+ Logs evaluation results to the Judgment API database.
173
+
174
+ Args:
175
+ merged_results (List[ScoringResult]): The results to log
176
+ evaluation_run (EvaluationRun): The evaluation run containing project info and API key
177
+
178
+ Raises:
179
+ JudgmentAPIError: If there's an API error during logging
180
+ ValueError: If there's a validation error with the results
181
+ """
182
+ try :
183
+ res = requests .post (
184
+ JUDGMENT_EVAL_LOG_API_URL ,
185
+ json = {
186
+ "results" : [result .to_dict () for result in merged_results ],
187
+ "judgment_api_key" : evaluation_run .judgment_api_key ,
188
+ "project_name" : evaluation_run .project_name ,
189
+ "eval_name" : evaluation_run .eval_name ,
190
+ }
191
+ )
192
+
193
+ if not res .ok :
194
+ response_data = res .json ()
195
+ error_message = response_data .get ('detail' , 'An unknown error occurred.' )
196
+ error (f"Error { res .status_code } : { error_message } " )
197
+ raise JudgmentAPIError (error_message )
198
+
199
+ if "ui_results_url" in res .json ():
200
+ rprint (f"\n 🔍 You can view your evaluation results here: [rgb(106,0,255)]{ res .json ()['ui_results_url' ]} [/]\n " )
201
+
202
+ except requests .exceptions .RequestException as e :
203
+ error (f"Request failed while saving evaluation results to DB: { str (e )} " )
204
+ raise JudgmentAPIError (f"Request failed while saving evaluation results to DB: { str (e )} " )
205
+ except Exception as e :
206
+ error (f"Failed to save evaluation results to DB: { str (e )} " )
207
+ raise ValueError (f"Failed to save evaluation results to DB: { str (e )} " )
208
+
209
+ def run_eval (evaluation_run : EvaluationRun , override : bool = False ) -> List [ScoringResult ]:
132
210
"""
133
211
Executes an evaluation of `Example`s using one or more `Scorer`s
134
212
@@ -150,6 +228,15 @@ def run_eval(evaluation_run: EvaluationRun):
150
228
Returns:
151
229
List[ScoringResult]: The results of the evaluation. Each result is a dictionary containing the fields of a `ScoringResult` object.
152
230
"""
231
+
232
+ # Call endpoint to check to see if eval run name exists (if we DON'T want to override and DO want to log results)
233
+ if not override and evaluation_run .log_results :
234
+ check_eval_run_name_exists (
235
+ evaluation_run .eval_name ,
236
+ evaluation_run .project_name ,
237
+ evaluation_run .judgment_api_key
238
+ )
239
+
153
240
# Set example IDs if not already set
154
241
debug ("Initializing examples with IDs and timestamps" )
155
242
for idx , example in enumerate (evaluation_run .examples ):
@@ -262,39 +349,13 @@ def run_eval(evaluation_run: EvaluationRun):
262
349
263
350
info (f"Successfully merged { len (merged_results )} results" )
264
351
265
- actual_eval_run_name = evaluation_run .eval_name
266
352
if evaluation_run .log_results :
267
- try :
268
- res = requests .post (
269
- JUDGMENT_EVAL_LOG_API_URL ,
270
- json = {
271
- "results" : [result .to_dict () for result in merged_results ],
272
- "judgment_api_key" : evaluation_run .judgment_api_key ,
273
- "project_name" : evaluation_run .project_name ,
274
- "eval_name" : evaluation_run .eval_name ,
275
- }
276
- )
277
- if not res .ok :
278
- response_data = res .json ()
279
- error_message = response_data .get ('detail' , 'An unknown error occurred.' )
280
- error (f"Error { res .status_code } : { error_message } " )
281
- raise Exception (f"Error { res .status_code } : { error_message } " )
282
- else :
283
- actual_eval_run_name = res .json ()["eval_results_name" ]
284
- if "ui_results_url" in res .json ():
285
- rprint (f"\n 🔍 You can view your evaluation results here: [rgb(106,0,255)]{ res .json ()['ui_results_url' ]} [/]\n " )
286
-
287
- except requests .exceptions .RequestException as e :
288
- error (f"Request failed while saving evaluation results to DB: { str (e )} " )
289
- raise JudgmentAPIError (f"Request failed while saving evaluation results to DB: { str (e )} " )
290
- except Exception as e :
291
- error (f"Failed to save evaluation results to DB: { str (e )} " )
292
- raise ValueError (f"Failed to save evaluation results to DB: { str (e )} " )
353
+ log_evaluation_results (merged_results , evaluation_run )
293
354
294
355
for i , result in enumerate (merged_results ):
295
356
if not result .scorers_data : # none of the scorers could be executed on this example
296
357
info (f"None of the scorers could be executed on example { i } . This is usually because the Example is missing the fields needed by the scorers. Try checking that the Example has the necessary fields for your scorers." )
297
- return actual_eval_run_name , merged_results
358
+ return merged_results
298
359
299
360
300
361
if __name__ == "__main__" :
0 commit comments