From 4d8484e6dea6bdd09d30696960af6fd78206de8c Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Tue, 20 May 2025 19:43:25 -0700 Subject: [PATCH 1/5] feat: start of new record_error --- src/judgeval/common/tracer.py | 10 ++++++++- src/judgeval/data/trace.py | 39 +++++++---------------------------- 2 files changed, 17 insertions(+), 32 deletions(-) diff --git a/src/judgeval/common/tracer.py b/src/judgeval/common/tracer.py index 5984a2cb..6eb2eff1 100644 --- a/src/judgeval/common/tracer.py +++ b/src/judgeval/common/tracer.py @@ -498,7 +498,15 @@ def record_output(self, output: Any): return span # Return the created entry # Removed else block - original didn't have one return None # Return None if no span_id found - + + def record_error(self, error: Any): + current_span_id = current_span_var.get() + if current_span_id: + span = self.span_id_to_span[current_span_id] + span.error = error + return span + return None + def add_span(self, span: TraceSpan): """Add a trace span to this trace context""" self.trace_spans.append(span) diff --git a/src/judgeval/data/trace.py b/src/judgeval/data/trace.py index 1c150481..277988f0 100644 --- a/src/judgeval/data/trace.py +++ b/src/judgeval/data/trace.py @@ -14,6 +14,7 @@ class TraceSpan(BaseModel): parent_span_id: Optional[str] = None span_type: Optional[str] = "span" inputs: Optional[Dict[str, Any]] = None + error: Optional[Dict[str, Any]] = None output: Optional[Any] = None duration: Optional[float] = None annotation: Optional[List[Dict[str, Any]]] = None @@ -26,10 +27,10 @@ def model_dump(self, **kwargs): "span_id": self.span_id, "trace_id": self.trace_id, "depth": self.depth, -# "created_at": datetime.fromtimestamp(self.created_at).isoformat(), "created_at": datetime.fromtimestamp(self.created_at, tz=timezone.utc).isoformat(), - "inputs": self._serialize_inputs(), - "output": self._serialize_output(), + "inputs": self._serialize_value(self.inputs), + "output": self._serialize_value(self.output), + "error": self._serialize_value(self.error), "evaluation_runs": [run.model_dump() for run in self.evaluation_runs] if self.evaluation_runs else [], "parent_span_id": self.parent_span_id, "function": self.function, @@ -42,30 +43,6 @@ def print_span(self): indent = " " * self.depth parent_info = f" (parent_id: {self.parent_span_id})" if self.parent_span_id else "" print(f"{indent}→ {self.function} (id: {self.span_id}){parent_info}") - - def _serialize_inputs(self) -> dict: - """Helper method to serialize input data safely.""" - if self.inputs is None: - return {} - - serialized_inputs = {} - for key, value in self.inputs.items(): - if isinstance(value, BaseModel): - serialized_inputs[key] = value.model_dump() - elif isinstance(value, (list, tuple)): - # Handle lists/tuples of arguments - serialized_inputs[key] = [ - item.model_dump() if isinstance(item, BaseModel) - else None if not self._is_json_serializable(item) - else item - for item in value - ] - else: - if self._is_json_serializable(value): - serialized_inputs[key] = value - else: - serialized_inputs[key] = self.safe_stringify(value, self.function) - return serialized_inputs def _is_json_serializable(self, obj: Any) -> bool: """Helper method to check if an object is JSON serializable.""" @@ -94,9 +71,9 @@ def safe_stringify(self, output, function_name): ) return None - def _serialize_output(self) -> Any: + def _serialize_value(self, value: Any) -> Any: """Helper method to serialize output data safely.""" - if self.output is None: + if value is None: return None def serialize_value(value): @@ -117,8 +94,8 @@ def serialize_value(value): # Fallback to safe stringification return self.safe_stringify(value, self.function) - # Start serialization with the top-level output - return serialize_value(self.output) + # Start serialization with the top-level value + return serialize_value(value) class Trace(BaseModel): trace_id: str From b93b65fe838f6560192c58bc6f4fb9d3fa0c9935 Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Wed, 21 May 2025 20:44:40 -0700 Subject: [PATCH 2/5] feat: error property on TraceSpan --- src/judgeval/common/tracer.py | 58 ++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/src/judgeval/common/tracer.py b/src/judgeval/common/tracer.py index 6eb2eff1..274fe6db 100644 --- a/src/judgeval/common/tracer.py +++ b/src/judgeval/common/tracer.py @@ -884,9 +884,7 @@ def _trace(self, frame: types.FrameType, event: str, arg: Any): "traceback": traceback.format_tb(exc_traceback) } current_trace = current_trace_var.get() - current_trace.record_output({ - "error": formatted_exception - }) + current_trace.record_error(formatted_exception) return self._trace @@ -1162,8 +1160,19 @@ async def async_wrapper(*args, **kwargs): with _DeepTracer(): result = await func(*args, **kwargs) else: - result = await func(*args, **kwargs) - + try: + result = await func(*args, **kwargs) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + formatted_exception = { + "type": exc_type.__name__, + "message": str(exc_value), + "traceback": traceback.format_tb(exc_traceback) + } + current_trace = current_trace_var.get() + current_trace.record_error(formatted_exception) + raise e + # Record output span.record_output(result) return result @@ -1183,7 +1192,18 @@ async def async_wrapper(*args, **kwargs): with _DeepTracer(): result = await func(*args, **kwargs) else: - result = await func(*args, **kwargs) + try: + result = await func(*args, **kwargs) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + formatted_exception = { + "type": exc_type.__name__, + "message": str(exc_value), + "traceback": traceback.format_tb(exc_traceback) + } + current_trace = current_trace_var.get() + current_trace.record_error(formatted_exception) + raise e span.record_output(result) return result @@ -1229,7 +1249,18 @@ def wrapper(*args, **kwargs): with _DeepTracer(): result = func(*args, **kwargs) else: - result = func(*args, **kwargs) + try: + result = func(*args, **kwargs) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + formatted_exception = { + "type": exc_type.__name__, + "message": str(exc_value), + "traceback": traceback.format_tb(exc_traceback) + } + current_trace = current_trace_var.get() + current_trace.record_error(formatted_exception) + raise e # Record output span.record_output(result) @@ -1251,7 +1282,18 @@ def wrapper(*args, **kwargs): with _DeepTracer(): result = func(*args, **kwargs) else: - result = func(*args, **kwargs) + try: + result = func(*args, **kwargs) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + formatted_exception = { + "type": exc_type.__name__, + "message": str(exc_value), + "traceback": traceback.format_tb(exc_traceback) + } + current_trace = current_trace_var.get() + current_trace.record_error(formatted_exception) + raise e span.record_output(result) return result From 0adb53418dcd24ff03f9eacabd59f671b755f10a Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Fri, 23 May 2025 17:24:47 -0700 Subject: [PATCH 3/5] chore(docs): update observe also records Error --- docs/monitoring/tracing.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/monitoring/tracing.mdx b/docs/monitoring/tracing.mdx index 7da12a71..a7ff2901 100644 --- a/docs/monitoring/tracing.mdx +++ b/docs/monitoring/tracing.mdx @@ -87,7 +87,7 @@ const client = wrap(new OpenAI()); #### `@observe` (Python) / `observe()` (Typescript) #### The `@observe` decorator (Python) or the `observe()` higher-order function (Typescript) wraps your functions/tools and captures metadata surrounding your function calls, such as: - Latency -- Input/Output +- Input/Output/Error - Span type (e.g. `retriever`, `tool`, `LLM call`, etc.) Here's an example of using the observer mechanism: From 1918bf7b662985f664a9761f041fac61b3602ba1 Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Fri, 23 May 2025 17:39:35 -0700 Subject: [PATCH 4/5] chore: cleanup capture exception impl --- src/judgeval/common/tracer.py | 61 ++++++++++++----------------------- 1 file changed, 20 insertions(+), 41 deletions(-) diff --git a/src/judgeval/common/tracer.py b/src/judgeval/common/tracer.py index 274fe6db..c58193b0 100644 --- a/src/judgeval/common/tracer.py +++ b/src/judgeval/common/tracer.py @@ -685,7 +685,17 @@ def save(self, overwrite: bool = False) -> Tuple[str, dict]: def delete(self): return self.trace_manager_client.delete_trace(self.trace_id) - +def _capture_exception_for_trace(current_trace: Optional['TraceClient'], exc_info: Tuple[Optional[type], Optional[BaseException], Optional[types.TracebackType]]): + if not current_trace: + return + + exc_type, exc_value, exc_traceback_obj = exc_info + formatted_exception = { + "type": exc_type.__name__ if exc_type else "UnknownExceptionType", + "message": str(exc_value) if exc_value else "No exception message", + "traceback": traceback.format_tb(exc_traceback_obj) if exc_traceback_obj else [] + } + current_trace.record_error(formatted_exception) class _DeepTracer: _instance: Optional["_DeepTracer"] = None _lock: threading.Lock = threading.Lock() @@ -877,14 +887,11 @@ def _trace(self, frame: types.FrameType, event: str, arg: Any): current_span_var.reset(frame.f_locals["_judgment_span_token"]) elif event == "exception": - exc_type, exc_value, exc_traceback = arg - formatted_exception = { - "type": exc_type.__name__, - "message": str(exc_value), - "traceback": traceback.format_tb(exc_traceback) - } - current_trace = current_trace_var.get() - current_trace.record_error(formatted_exception) + exc_type = arg[0] + if issubclass(exc_type, (StopIteration, StopAsyncIteration, GeneratorExit)): + return + _capture_exception_for_trace(current_trace, arg) + return self._trace @@ -1163,14 +1170,7 @@ async def async_wrapper(*args, **kwargs): try: result = await func(*args, **kwargs) except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - formatted_exception = { - "type": exc_type.__name__, - "message": str(exc_value), - "traceback": traceback.format_tb(exc_traceback) - } - current_trace = current_trace_var.get() - current_trace.record_error(formatted_exception) + _capture_exception_for_trace(current_trace, sys.exc_info()) raise e # Record output @@ -1195,14 +1195,7 @@ async def async_wrapper(*args, **kwargs): try: result = await func(*args, **kwargs) except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - formatted_exception = { - "type": exc_type.__name__, - "message": str(exc_value), - "traceback": traceback.format_tb(exc_traceback) - } - current_trace = current_trace_var.get() - current_trace.record_error(formatted_exception) + _capture_exception_for_trace(current_trace, sys.exc_info()) raise e span.record_output(result) @@ -1252,14 +1245,7 @@ def wrapper(*args, **kwargs): try: result = func(*args, **kwargs) except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - formatted_exception = { - "type": exc_type.__name__, - "message": str(exc_value), - "traceback": traceback.format_tb(exc_traceback) - } - current_trace = current_trace_var.get() - current_trace.record_error(formatted_exception) + _capture_exception_for_trace(current_trace, sys.exc_info()) raise e # Record output @@ -1285,14 +1271,7 @@ def wrapper(*args, **kwargs): try: result = func(*args, **kwargs) except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - formatted_exception = { - "type": exc_type.__name__, - "message": str(exc_value), - "traceback": traceback.format_tb(exc_traceback) - } - current_trace = current_trace_var.get() - current_trace.record_error(formatted_exception) + _capture_exception_for_trace(current_trace, sys.exc_info()) raise e span.record_output(result) From 781644fd0a86e7021cd9f9f98084042b772814b6 Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Fri, 23 May 2025 17:42:02 -0700 Subject: [PATCH 5/5] chore: better comment on _serialize_value --- src/judgeval/data/trace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/judgeval/data/trace.py b/src/judgeval/data/trace.py index 277988f0..c505e3bd 100644 --- a/src/judgeval/data/trace.py +++ b/src/judgeval/data/trace.py @@ -72,7 +72,7 @@ def safe_stringify(self, output, function_name): return None def _serialize_value(self, value: Any) -> Any: - """Helper method to serialize output data safely.""" + """Helper method to deep serialize a value safely supporting Pydantic Models / regular PyObjects.""" if value is None: return None