refactor outdated code

Mandolaro · Mandolaro · commit dc221c5d3b87 · 2025-05-19T18:16:20.000-07:00
diff --git a/src/judgeval/run_evaluation.py b/src/judgeval/run_evaluation.py
@@ -471,77 +471,6 @@ async def get_evaluation_status(eval_name: str, project_name: str, judgment_api_
         error(f"Failed to check evaluation status: {str(e)}")
         raise JudgmentAPIError(f"Failed to check evaluation status: {str(e)}")
 
-async def wait_for_evaluation(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str, timeout_seconds: int = 3600, poll_interval_seconds: int = 5) -> List[ScoringResult]:
-    """
-    Wait for an asynchronous evaluation to complete by polling the status endpoint.
-    
-    Args:
-        eval_name (str): Name of the evaluation run
-        project_name (str): Name of the project
-        judgment_api_key (str): API key for authentication
-        organization_id (str): Organization ID for the evaluation
-        timeout_seconds (int, optional): Maximum time to wait in seconds. Defaults to 3600 (1 hour).
-        poll_interval_seconds (int, optional): Time between status checks in seconds. Defaults to 5.
-        
-    Returns:
-        List[ScoringResult]: The evaluation results when complete
-        
-    Raises:
-        TimeoutError: If the evaluation doesn't complete within the timeout period
-        JudgmentAPIError: If there's an API error or the evaluation fails
-    """
-    start_time = time.time()
-    
-    while time.time() - start_time < timeout_seconds:
-        status_response = await get_evaluation_status(
-            eval_name=eval_name,
-            project_name=project_name,
-            judgment_api_key=judgment_api_key,
-            organization_id=organization_id
-        )
-        
-        status = status_response.get("status")
-        
-        if status == "completed":
-            # Evaluation is complete, extract and convert results
-            results_data = status_response.get("results", {})
-            examples_data = results_data.get("examples", [])
-            
-            # Create ScoringResult objects from the raw data
-            scoring_results = []
-            for example_data in examples_data:
-                scorer_data_list = []
-                for raw_scorer_data in example_data.get("scorer_data", []):
-                    scorer_data_list.append(ScorerData(**raw_scorer_data))
-                
-                # Create Example from example data (excluding scorer_data)
-                example_dict = {k: v for k, v in example_data.items() if k != "scorer_data"}
-                example = Example(**example_dict)
-                
-                # Create ScoringResult
-                scoring_result = ScoringResult(
-                    success=True,  # Assume success if we have results
-                    scorers_data=scorer_data_list,
-                    data_object=example
-                )
-                scoring_results.append(scoring_result)
-            
-            return scoring_results
-            
-        elif status == "failed":
-            # Evaluation failed
-            error_message = status_response.get("error", "Unknown error")
-            error(f"Evaluation failed: {error_message}")
-            raise JudgmentAPIError(f"Evaluation failed: {error_message}")
-            
-        # Status is either "pending" or "running", continue polling
-        info(f"Evaluation status: {status}. Waiting for completion...")
-        await asyncio.sleep(poll_interval_seconds)
-    
-    # If we get here, we've timed out
-    error(f"Evaluation timed out after {timeout_seconds} seconds")
-    raise TimeoutError(f"Evaluation timed out after {timeout_seconds} seconds")
-
 async def _poll_evaluation_until_complete(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str, poll_interval_seconds: int = 5, original_examples: Optional[List[Example]] = None) -> List[ScoringResult]:
     """
     Polls until the evaluation is complete and returns the results.