Skip to content

Commit 9f6480d

Browse files
committed
updates
1 parent 848766a commit 9f6480d

File tree

2 files changed

+20
-16
lines changed

2 files changed

+20
-16
lines changed

src/demo/sequence_test.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
tracer = Tracer(api_key=os.getenv("JUDGMENT_API_KEY"), project_name="travel_agent_demo")
1616

1717

18-
@tracer.observe(span_type="tool")
18+
# @tracer.observe(span_type="tool")
1919
def search_tavily(query):
2020
"""Fetch travel data using Tavily API."""
2121
# API_KEY = os.getenv("TAVILY_API_KEY")
@@ -24,28 +24,28 @@ def search_tavily(query):
2424
# return results
2525
return "The weather in Tokyo is sunny with a high of 75°F."
2626

27-
# @judgment.observe(span_type="tool")
27+
@tracer.observe(span_type="tool")
2828
def get_attractions(destination):
2929
"""Search for top attractions in the destination."""
3030
prompt = f"Best tourist attractions in {destination}"
3131
attractions_search = search_tavily(prompt)
3232
return attractions_search
3333

34-
# @judgment.observe(span_type="tool")
34+
@tracer.observe(span_type="tool")
3535
def get_hotels(destination):
3636
"""Search for hotels in the destination."""
3737
prompt = f"Best hotels in {destination}"
3838
hotels_search = search_tavily(prompt)
3939
return hotels_search
4040

41-
# @judgment.observe(span_type="tool")
41+
@tracer.observe(span_type="tool")
4242
def get_flights(destination):
4343
"""Search for flights to the destination."""
4444
prompt = f"Flights to {destination} from major cities"
4545
flights_search = search_tavily(prompt)
4646
return flights_search
4747

48-
# @judgment.observe(span_type="tool")
48+
@tracer.observe(span_type="tool")
4949
def get_weather(destination, start_date, end_date):
5050
"""Search for weather information."""
5151
prompt = f"Weather forecast for {destination} from {start_date} to {end_date}"
@@ -119,27 +119,29 @@ def generate_itinerary(destination, start_date, end_date):
119119
input={"destination": "Paris", "start_date": "2025-06-01", "end_date": "2025-06-02"},
120120
expected_tools=[
121121
{
122-
"tool_name": "search_tavily",
122+
"tool_name": "get_attractions",
123123
"parameters": {
124-
"query": "Best tourist attractions in Paris"
124+
"destination": "Paris"
125125
}
126126
},
127127
{
128-
"tool_name": "search_tavily",
128+
"tool_name": "get_hotels",
129129
"parameters": {
130-
"query": "Best hotels in Paris"
130+
"destination": "Paris"
131131
}
132132
},
133133
{
134-
"tool_name": "search_tavily",
134+
"tool_name": "get_flights",
135135
"parameters": {
136-
"query": "Flights to Paris from major cities"
136+
"destination": "Paris"
137137
}
138138
},
139139
{
140-
"tool_name": "search_tavily",
140+
"tool_name": "get_weather",
141141
"parameters": {
142-
"query": "Weather forecast for Paris from 2025-06-01 to 2025-06-02"
142+
"destination": "Paris",
143+
"start_date": "2025-06-01",
144+
"end_date": "2025-06-02"
143145
}
144146
}
145147
]
@@ -156,7 +158,7 @@ def generate_itinerary(destination, start_date, end_date):
156158

157159
judgment.assert_test(
158160
project_name="travel_agent_demo",
159-
examples=[example, example2],
161+
examples=[example],
160162
scorers=[ToolOrderScorer(threshold=0.5)],
161163
model="gpt-4.1-mini",
162164
function=generate_itinerary,

src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
# Internal imports
66
from judgeval.scorers.api_scorer import APIJudgmentScorer
77
from judgeval.constants import APIScorer
8-
8+
from typing import Optional, Dict
99
class ToolOrderScorer(APIJudgmentScorer):
10-
def __init__(self, threshold: float=1.0):
10+
kwargs: Optional[Dict] = None
11+
def __init__(self, threshold: float=1.0, exact_match: bool=False):
1112
super().__init__(
1213
threshold=threshold,
1314
score_type=APIScorer.TOOL_ORDER,
1415
)
16+
self.kwargs = {"exact_match": exact_match}
1517

1618
@property
1719
def __name__(self):

0 commit comments

Comments
 (0)