Skip to content

Commit e72e440

Browse files
alanzhang25JCamyre
andauthored
Sequence Removal (#267)
Co-authored-by: Joseph S Camyre <68767176+JCamyre@users.noreply.github.com>
1 parent 0feb1cf commit e72e440

File tree

2 files changed

+5
-17
lines changed

2 files changed

+5
-17
lines changed

src/demo/sequence_test.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -146,24 +146,12 @@ def generate_itinerary(destination, start_date, end_date):
146146
}
147147
]
148148
)
149-
example2 = Example(
150-
input={"destination": "Tokyo", "start_date": "2025-06-01", "end_date": "2025-06-02"},
151-
expected_tools=[
152-
{"tool_name": "search_tavily", "parameters": {"query": "Best tourist attractions in Tokyo"}},
153-
{"tool_name": "search_tavily", "parameters": {"query": "Best hotels in Tokyo"}},
154-
{"tool_name": "search_tavily", "parameters": {"query": "Flights to Tokyo from major cities"}},
155-
{"tool_name": "search_tavily", "parameters": {"query": "Weather forecast for Tokyo from 2025-06-01 to 2025-06-03"}}
156-
]
157-
)
158149

159150
judgment.assert_test(
160-
project_name="travel_agent_demo",
161151
examples=[example],
162152
scorers=[ToolOrderScorer()],
163-
model="gpt-4.1-mini",
164153
function=generate_itinerary,
165154
tracer=tracer,
166-
override=True
167155
)
168156

169157

src/judgeval/run_evaluation.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -204,9 +204,9 @@ def check_missing_scorer_data(results: List[ScoringResult]) -> List[ScoringResul
204204
)
205205
return results
206206

207-
def check_experiment_type(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str, is_sequence: bool) -> None:
207+
def check_experiment_type(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str, is_trace: bool) -> None:
208208
"""
209-
Checks if the current experiment, if one exists, has the same type (examples of sequences)
209+
Checks if the current experiment, if one exists, has the same type (examples of traces)
210210
"""
211211
try:
212212
response = requests.post(
@@ -220,7 +220,7 @@ def check_experiment_type(eval_name: str, project_name: str, judgment_api_key: s
220220
"eval_name": eval_name,
221221
"project_name": project_name,
222222
"judgment_api_key": judgment_api_key,
223-
"is_sequence": is_sequence
223+
"is_trace": is_trace
224224
},
225225
verify=True
226226
)
@@ -382,7 +382,7 @@ def run_trace_eval(trace_run: TraceRun, override: bool = False, ignore_errors: b
382382
)
383383

384384
if trace_run.append:
385-
# Check that the current experiment, if one exists, has the same type (examples of sequences)
385+
# Check that the current experiment, if one exists, has the same type (examples or traces)
386386
check_experiment_type(
387387
trace_run.eval_name,
388388
trace_run.project_name,
@@ -756,7 +756,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False, ignore_error
756756
)
757757

758758
if evaluation_run.append:
759-
# Check that the current experiment, if one exists, has the same type (examples of sequences)
759+
# Check that the current experiment, if one exists, has the same type (examples of traces)
760760
check_experiment_type(
761761
evaluation_run.eval_name,
762762
evaluation_run.project_name,

0 commit comments

Comments
 (0)