trace save

alanzhang25 · alanzhang25 · commit eb78eb3784f8 · 2025-05-18T11:18:38.000-07:00
diff --git a/src/demo/sequence_test.py b/src/demo/sequence_test.py
@@ -18,7 +18,7 @@
 @tracer.observe(span_type="tool")
 def search_tavily(query):
     """Fetch travel data using Tavily API."""
-    return "results"
+    return "The weather in Tokyo is sunny with a high of 75°F."
 
 # @judgment.observe(span_type="tool")
 def get_attractions(destination):
@@ -46,6 +46,15 @@ def get_weather(destination, start_date, end_date):
     """Search for weather information."""
     prompt = f"Weather forecast for {destination} from {start_date} to {end_date}"
     weather_search = search_tavily(prompt)
+    example = Example(
+        input="What is the weather in Tokyo?",
+        actual_output=weather_search
+    )
+    tracer.async_evaluate(
+        scorers=[AnswerRelevancyScorer(threshold=0.5)],
+        example=example,
+        model="gpt-4o-mini",
+    )
     return weather_search
 
 def research_destination(destination, start_date, end_date):
@@ -137,13 +146,13 @@ def generate_itinerary(destination, start_date, end_date):
             {"tool_name": "search_tavily", "parameters": {"query": "Best tourist attractions in Tokyo"}},
             {"tool_name": "search_tavily", "parameters": {"query": "Best hotels in Tokyo"}},
             {"tool_name": "search_tavily", "parameters": {"query": "Flights to Tokyo from major cities"}},
-            {"tool_name": "search_tavily", "parameters": {"query": "Weather forecast for Tokyo from 2025-06-01 to 2025-06-02"}}
+            {"tool_name": "search_tavily", "parameters": {"query": "Weather forecast for Tokyo from 2025-06-01 to 2025-06-03"}}
         ]
     )
 
     judgment.assert_test(
         project_name="travel_agent_demo",
-        examples=[example],
+        examples=[example, example2],
         scorers=[ToolOrderScorer(threshold=0.5)],
         model="gpt-4.1-mini",
         function=generate_itinerary,
diff --git a/src/judgeval/common/tracer.py b/src/judgeval/common/tracer.py
@@ -146,7 +146,7 @@ def fetch_trace(self, trace_id: str):
         
         return response.json()
 
-    def save_trace(self, trace_data: dict):
+    def save_trace(self, trace_data: dict, offline_mode: bool = False):
         """
         Saves a trace to the Judgment Supabase and optionally to S3 if configured.
 
@@ -183,7 +183,7 @@ def save_trace(self, trace_data: dict):
             except Exception as e:
                 warnings.warn(f"Failed to save trace to S3: {str(e)}")
         
-        if "ui_results_url" in response.json():
+        if not offline_mode and "ui_results_url" in response.json():
             pretty_str = f"\n🔍 You can view your trace data here: [rgb(106,0,255)][link={response.json()['ui_results_url']}]View Trace[/link]\n"
             rprint(pretty_str)
 
@@ -665,8 +665,7 @@ def save(self, overwrite: bool = False) -> Tuple[str, dict]:
             "parent_name": self.parent_name
         }        
         # --- Log trace data before saving ---
-        if not self.tracer.offline_mode:
-            self.trace_manager_client.save_trace(trace_data)
+        self.trace_manager_client.save_trace(trace_data, offline_mode=self.tracer.offline_mode)
 
         # upload annotations
         # TODO: batch to the log endpoint
diff --git a/src/judgeval/data/trace.py b/src/judgeval/data/trace.py
@@ -126,6 +126,7 @@ class Trace(BaseModel):
     duration: float
     entries: List[TraceSpan]
     overwrite: bool = False
+    offline_mode: bool = False
     rules: Optional[Dict[str, Any]] = None
     has_notification: Optional[bool] = False