diff --git a/docs/monitoring/tracing.mdx b/docs/monitoring/tracing.mdx index 7da12a71..a7ff2901 100644 --- a/docs/monitoring/tracing.mdx +++ b/docs/monitoring/tracing.mdx @@ -87,7 +87,7 @@ const client = wrap(new OpenAI()); #### `@observe` (Python) / `observe()` (Typescript) #### The `@observe` decorator (Python) or the `observe()` higher-order function (Typescript) wraps your functions/tools and captures metadata surrounding your function calls, such as: - Latency -- Input/Output +- Input/Output/Error - Span type (e.g. `retriever`, `tool`, `LLM call`, etc.) Here's an example of using the observer mechanism: diff --git a/src/judgeval/common/tracer.py b/src/judgeval/common/tracer.py index 5984a2cb..c58193b0 100644 --- a/src/judgeval/common/tracer.py +++ b/src/judgeval/common/tracer.py @@ -498,7 +498,15 @@ def record_output(self, output: Any): return span # Return the created entry # Removed else block - original didn't have one return None # Return None if no span_id found - + + def record_error(self, error: Any): + current_span_id = current_span_var.get() + if current_span_id: + span = self.span_id_to_span[current_span_id] + span.error = error + return span + return None + def add_span(self, span: TraceSpan): """Add a trace span to this trace context""" self.trace_spans.append(span) @@ -677,7 +685,17 @@ def save(self, overwrite: bool = False) -> Tuple[str, dict]: def delete(self): return self.trace_manager_client.delete_trace(self.trace_id) - +def _capture_exception_for_trace(current_trace: Optional['TraceClient'], exc_info: Tuple[Optional[type], Optional[BaseException], Optional[types.TracebackType]]): + if not current_trace: + return + + exc_type, exc_value, exc_traceback_obj = exc_info + formatted_exception = { + "type": exc_type.__name__ if exc_type else "UnknownExceptionType", + "message": str(exc_value) if exc_value else "No exception message", + "traceback": traceback.format_tb(exc_traceback_obj) if exc_traceback_obj else [] + } + current_trace.record_error(formatted_exception) class _DeepTracer: _instance: Optional["_DeepTracer"] = None _lock: threading.Lock = threading.Lock() @@ -869,16 +887,11 @@ def _trace(self, frame: types.FrameType, event: str, arg: Any): current_span_var.reset(frame.f_locals["_judgment_span_token"]) elif event == "exception": - exc_type, exc_value, exc_traceback = arg - formatted_exception = { - "type": exc_type.__name__, - "message": str(exc_value), - "traceback": traceback.format_tb(exc_traceback) - } - current_trace = current_trace_var.get() - current_trace.record_output({ - "error": formatted_exception - }) + exc_type = arg[0] + if issubclass(exc_type, (StopIteration, StopAsyncIteration, GeneratorExit)): + return + _capture_exception_for_trace(current_trace, arg) + return self._trace @@ -1154,8 +1167,12 @@ async def async_wrapper(*args, **kwargs): with _DeepTracer(): result = await func(*args, **kwargs) else: - result = await func(*args, **kwargs) - + try: + result = await func(*args, **kwargs) + except Exception as e: + _capture_exception_for_trace(current_trace, sys.exc_info()) + raise e + # Record output span.record_output(result) return result @@ -1175,7 +1192,11 @@ async def async_wrapper(*args, **kwargs): with _DeepTracer(): result = await func(*args, **kwargs) else: - result = await func(*args, **kwargs) + try: + result = await func(*args, **kwargs) + except Exception as e: + _capture_exception_for_trace(current_trace, sys.exc_info()) + raise e span.record_output(result) return result @@ -1221,7 +1242,11 @@ def wrapper(*args, **kwargs): with _DeepTracer(): result = func(*args, **kwargs) else: - result = func(*args, **kwargs) + try: + result = func(*args, **kwargs) + except Exception as e: + _capture_exception_for_trace(current_trace, sys.exc_info()) + raise e # Record output span.record_output(result) @@ -1243,7 +1268,11 @@ def wrapper(*args, **kwargs): with _DeepTracer(): result = func(*args, **kwargs) else: - result = func(*args, **kwargs) + try: + result = func(*args, **kwargs) + except Exception as e: + _capture_exception_for_trace(current_trace, sys.exc_info()) + raise e span.record_output(result) return result diff --git a/src/judgeval/data/trace.py b/src/judgeval/data/trace.py index 1c150481..c505e3bd 100644 --- a/src/judgeval/data/trace.py +++ b/src/judgeval/data/trace.py @@ -14,6 +14,7 @@ class TraceSpan(BaseModel): parent_span_id: Optional[str] = None span_type: Optional[str] = "span" inputs: Optional[Dict[str, Any]] = None + error: Optional[Dict[str, Any]] = None output: Optional[Any] = None duration: Optional[float] = None annotation: Optional[List[Dict[str, Any]]] = None @@ -26,10 +27,10 @@ def model_dump(self, **kwargs): "span_id": self.span_id, "trace_id": self.trace_id, "depth": self.depth, -# "created_at": datetime.fromtimestamp(self.created_at).isoformat(), "created_at": datetime.fromtimestamp(self.created_at, tz=timezone.utc).isoformat(), - "inputs": self._serialize_inputs(), - "output": self._serialize_output(), + "inputs": self._serialize_value(self.inputs), + "output": self._serialize_value(self.output), + "error": self._serialize_value(self.error), "evaluation_runs": [run.model_dump() for run in self.evaluation_runs] if self.evaluation_runs else [], "parent_span_id": self.parent_span_id, "function": self.function, @@ -42,30 +43,6 @@ def print_span(self): indent = " " * self.depth parent_info = f" (parent_id: {self.parent_span_id})" if self.parent_span_id else "" print(f"{indent}→ {self.function} (id: {self.span_id}){parent_info}") - - def _serialize_inputs(self) -> dict: - """Helper method to serialize input data safely.""" - if self.inputs is None: - return {} - - serialized_inputs = {} - for key, value in self.inputs.items(): - if isinstance(value, BaseModel): - serialized_inputs[key] = value.model_dump() - elif isinstance(value, (list, tuple)): - # Handle lists/tuples of arguments - serialized_inputs[key] = [ - item.model_dump() if isinstance(item, BaseModel) - else None if not self._is_json_serializable(item) - else item - for item in value - ] - else: - if self._is_json_serializable(value): - serialized_inputs[key] = value - else: - serialized_inputs[key] = self.safe_stringify(value, self.function) - return serialized_inputs def _is_json_serializable(self, obj: Any) -> bool: """Helper method to check if an object is JSON serializable.""" @@ -94,9 +71,9 @@ def safe_stringify(self, output, function_name): ) return None - def _serialize_output(self) -> Any: - """Helper method to serialize output data safely.""" - if self.output is None: + def _serialize_value(self, value: Any) -> Any: + """Helper method to deep serialize a value safely supporting Pydantic Models / regular PyObjects.""" + if value is None: return None def serialize_value(value): @@ -117,8 +94,8 @@ def serialize_value(value): # Fallback to safe stringification return self.safe_stringify(value, self.function) - # Start serialization with the top-level output - return serialize_value(self.output) + # Start serialization with the top-level value + return serialize_value(value) class Trace(BaseModel): trace_id: str