@@ -471,77 +471,6 @@ async def get_evaluation_status(eval_name: str, project_name: str, judgment_api_
471
471
error (f"Failed to check evaluation status: { str (e )} " )
472
472
raise JudgmentAPIError (f"Failed to check evaluation status: { str (e )} " )
473
473
474
- async def wait_for_evaluation (eval_name : str , project_name : str , judgment_api_key : str , organization_id : str , timeout_seconds : int = 3600 , poll_interval_seconds : int = 5 ) -> List [ScoringResult ]:
475
- """
476
- Wait for an asynchronous evaluation to complete by polling the status endpoint.
477
-
478
- Args:
479
- eval_name (str): Name of the evaluation run
480
- project_name (str): Name of the project
481
- judgment_api_key (str): API key for authentication
482
- organization_id (str): Organization ID for the evaluation
483
- timeout_seconds (int, optional): Maximum time to wait in seconds. Defaults to 3600 (1 hour).
484
- poll_interval_seconds (int, optional): Time between status checks in seconds. Defaults to 5.
485
-
486
- Returns:
487
- List[ScoringResult]: The evaluation results when complete
488
-
489
- Raises:
490
- TimeoutError: If the evaluation doesn't complete within the timeout period
491
- JudgmentAPIError: If there's an API error or the evaluation fails
492
- """
493
- start_time = time .time ()
494
-
495
- while time .time () - start_time < timeout_seconds :
496
- status_response = await get_evaluation_status (
497
- eval_name = eval_name ,
498
- project_name = project_name ,
499
- judgment_api_key = judgment_api_key ,
500
- organization_id = organization_id
501
- )
502
-
503
- status = status_response .get ("status" )
504
-
505
- if status == "completed" :
506
- # Evaluation is complete, extract and convert results
507
- results_data = status_response .get ("results" , {})
508
- examples_data = results_data .get ("examples" , [])
509
-
510
- # Create ScoringResult objects from the raw data
511
- scoring_results = []
512
- for example_data in examples_data :
513
- scorer_data_list = []
514
- for raw_scorer_data in example_data .get ("scorer_data" , []):
515
- scorer_data_list .append (ScorerData (** raw_scorer_data ))
516
-
517
- # Create Example from example data (excluding scorer_data)
518
- example_dict = {k : v for k , v in example_data .items () if k != "scorer_data" }
519
- example = Example (** example_dict )
520
-
521
- # Create ScoringResult
522
- scoring_result = ScoringResult (
523
- success = True , # Assume success if we have results
524
- scorers_data = scorer_data_list ,
525
- data_object = example
526
- )
527
- scoring_results .append (scoring_result )
528
-
529
- return scoring_results
530
-
531
- elif status == "failed" :
532
- # Evaluation failed
533
- error_message = status_response .get ("error" , "Unknown error" )
534
- error (f"Evaluation failed: { error_message } " )
535
- raise JudgmentAPIError (f"Evaluation failed: { error_message } " )
536
-
537
- # Status is either "pending" or "running", continue polling
538
- info (f"Evaluation status: { status } . Waiting for completion..." )
539
- await asyncio .sleep (poll_interval_seconds )
540
-
541
- # If we get here, we've timed out
542
- error (f"Evaluation timed out after { timeout_seconds } seconds" )
543
- raise TimeoutError (f"Evaluation timed out after { timeout_seconds } seconds" )
544
-
545
474
async def _poll_evaluation_until_complete (eval_name : str , project_name : str , judgment_api_key : str , organization_id : str , poll_interval_seconds : int = 5 , original_examples : Optional [List [Example ]] = None ) -> List [ScoringResult ]:
546
475
"""
547
476
Polls until the evaluation is complete and returns the results.
0 commit comments