diff --git a/src/demo/sequence_test.py b/src/demo/sequence_test.py index 89c8d3d8..e1dfb9f5 100644 --- a/src/demo/sequence_test.py +++ b/src/demo/sequence_test.py @@ -146,24 +146,12 @@ def generate_itinerary(destination, start_date, end_date): } ] ) - example2 = Example( - input={"destination": "Tokyo", "start_date": "2025-06-01", "end_date": "2025-06-02"}, - expected_tools=[ - {"tool_name": "search_tavily", "parameters": {"query": "Best tourist attractions in Tokyo"}}, - {"tool_name": "search_tavily", "parameters": {"query": "Best hotels in Tokyo"}}, - {"tool_name": "search_tavily", "parameters": {"query": "Flights to Tokyo from major cities"}}, - {"tool_name": "search_tavily", "parameters": {"query": "Weather forecast for Tokyo from 2025-06-01 to 2025-06-03"}} - ] - ) judgment.assert_test( - project_name="travel_agent_demo", examples=[example], scorers=[ToolOrderScorer()], - model="gpt-4.1-mini", function=generate_itinerary, tracer=tracer, - override=True ) diff --git a/src/judgeval/run_evaluation.py b/src/judgeval/run_evaluation.py index 92e282a4..d8cf2f45 100644 --- a/src/judgeval/run_evaluation.py +++ b/src/judgeval/run_evaluation.py @@ -204,9 +204,9 @@ def check_missing_scorer_data(results: List[ScoringResult]) -> List[ScoringResul ) return results -def check_experiment_type(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str, is_sequence: bool) -> None: +def check_experiment_type(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str, is_trace: bool) -> None: """ - Checks if the current experiment, if one exists, has the same type (examples of sequences) + Checks if the current experiment, if one exists, has the same type (examples of traces) """ try: response = requests.post( @@ -220,7 +220,7 @@ def check_experiment_type(eval_name: str, project_name: str, judgment_api_key: s "eval_name": eval_name, "project_name": project_name, "judgment_api_key": judgment_api_key, - "is_sequence": is_sequence + "is_trace": is_trace }, verify=True ) @@ -382,7 +382,7 @@ def run_trace_eval(trace_run: TraceRun, override: bool = False, ignore_errors: b ) if trace_run.append: - # Check that the current experiment, if one exists, has the same type (examples of sequences) + # Check that the current experiment, if one exists, has the same type (examples or traces) check_experiment_type( trace_run.eval_name, trace_run.project_name, @@ -756,7 +756,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False, ignore_error ) if evaluation_run.append: - # Check that the current experiment, if one exists, has the same type (examples of sequences) + # Check that the current experiment, if one exists, has the same type (examples of traces) check_experiment_type( evaluation_run.eval_name, evaluation_run.project_name,