Skip to content

Ahh/new error format #273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/monitoring/tracing.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ const client = wrap(new OpenAI());
#### `@observe` (Python) / `observe()` (Typescript) ####
The `@observe` decorator (Python) or the `observe()` higher-order function (Typescript) wraps your functions/tools and captures metadata surrounding your function calls, such as:
- Latency
- Input/Output
- Input/Output/Error
- Span type (e.g. `retriever`, `tool`, `LLM call`, etc.)

Here's an example of using the observer mechanism:
Expand Down
63 changes: 46 additions & 17 deletions src/judgeval/common/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,15 @@ def record_output(self, output: Any):
return span # Return the created entry
# Removed else block - original didn't have one
return None # Return None if no span_id found


def record_error(self, error: Any):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The new record_error method is a good addition for specifically handling errors within a span. Could we add a docstring to explain its purpose, parameters, and what it returns? This would improve code clarity and maintainability for other developers.

current_span_id = current_span_var.get()
if current_span_id:
span = self.span_id_to_span[current_span_id]
span.error = error
return span
return None

def add_span(self, span: TraceSpan):
"""Add a trace span to this trace context"""
self.trace_spans.append(span)
Expand Down Expand Up @@ -677,7 +685,17 @@ def save(self, overwrite: bool = False) -> Tuple[str, dict]:
def delete(self):
return self.trace_manager_client.delete_trace(self.trace_id)


def _capture_exception_for_trace(current_trace: Optional['TraceClient'], exc_info: Tuple[Optional[type], Optional[BaseException], Optional[types.TracebackType]]):
if not current_trace:
return

exc_type, exc_value, exc_traceback_obj = exc_info
formatted_exception = {
"type": exc_type.__name__ if exc_type else "UnknownExceptionType",
"message": str(exc_value) if exc_value else "No exception message",
"traceback": traceback.format_tb(exc_traceback_obj) if exc_traceback_obj else []
}
current_trace.record_error(formatted_exception)
class _DeepTracer:
_instance: Optional["_DeepTracer"] = None
_lock: threading.Lock = threading.Lock()
Expand Down Expand Up @@ -869,16 +887,11 @@ def _trace(self, frame: types.FrameType, event: str, arg: Any):
current_span_var.reset(frame.f_locals["_judgment_span_token"])

elif event == "exception":
exc_type, exc_value, exc_traceback = arg
formatted_exception = {
"type": exc_type.__name__,
"message": str(exc_value),
"traceback": traceback.format_tb(exc_traceback)
}
current_trace = current_trace_var.get()
current_trace.record_output({
"error": formatted_exception
})
exc_type = arg[0]
if issubclass(exc_type, (StopIteration, StopAsyncIteration, GeneratorExit)):
return
_capture_exception_for_trace(current_trace, arg)


return self._trace

Expand Down Expand Up @@ -1154,8 +1167,12 @@ async def async_wrapper(*args, **kwargs):
with _DeepTracer():
result = await func(*args, **kwargs)
else:
result = await func(*args, **kwargs)

try:
result = await func(*args, **kwargs)
except Exception as e:
_capture_exception_for_trace(current_trace, sys.exc_info())
raise e

# Record output
span.record_output(result)
return result
Expand All @@ -1175,7 +1192,11 @@ async def async_wrapper(*args, **kwargs):
with _DeepTracer():
result = await func(*args, **kwargs)
else:
result = await func(*args, **kwargs)
try:
result = await func(*args, **kwargs)
except Exception as e:
_capture_exception_for_trace(current_trace, sys.exc_info())
raise e

span.record_output(result)
return result
Expand Down Expand Up @@ -1221,7 +1242,11 @@ def wrapper(*args, **kwargs):
with _DeepTracer():
result = func(*args, **kwargs)
else:
result = func(*args, **kwargs)
try:
result = func(*args, **kwargs)
except Exception as e:
_capture_exception_for_trace(current_trace, sys.exc_info())
raise e

# Record output
span.record_output(result)
Expand All @@ -1243,7 +1268,11 @@ def wrapper(*args, **kwargs):
with _DeepTracer():
result = func(*args, **kwargs)
else:
result = func(*args, **kwargs)
try:
result = func(*args, **kwargs)
except Exception as e:
_capture_exception_for_trace(current_trace, sys.exc_info())
raise e

span.record_output(result)
return result
Expand Down
41 changes: 9 additions & 32 deletions src/judgeval/data/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class TraceSpan(BaseModel):
parent_span_id: Optional[str] = None
span_type: Optional[str] = "span"
inputs: Optional[Dict[str, Any]] = None
error: Optional[Dict[str, Any]] = None
output: Optional[Any] = None
duration: Optional[float] = None
annotation: Optional[List[Dict[str, Any]]] = None
Expand All @@ -26,10 +27,10 @@ def model_dump(self, **kwargs):
"span_id": self.span_id,
"trace_id": self.trace_id,
"depth": self.depth,
# "created_at": datetime.fromtimestamp(self.created_at).isoformat(),
"created_at": datetime.fromtimestamp(self.created_at, tz=timezone.utc).isoformat(),
"inputs": self._serialize_inputs(),
"output": self._serialize_output(),
"inputs": self._serialize_value(self.inputs),
"output": self._serialize_value(self.output),
"error": self._serialize_value(self.error),
"evaluation_runs": [run.model_dump() for run in self.evaluation_runs] if self.evaluation_runs else [],
"parent_span_id": self.parent_span_id,
"function": self.function,
Expand All @@ -42,30 +43,6 @@ def print_span(self):
indent = " " * self.depth
parent_info = f" (parent_id: {self.parent_span_id})" if self.parent_span_id else ""
print(f"{indent}β†’ {self.function} (id: {self.span_id}){parent_info}")

def _serialize_inputs(self) -> dict:
"""Helper method to serialize input data safely."""
if self.inputs is None:
return {}

serialized_inputs = {}
for key, value in self.inputs.items():
if isinstance(value, BaseModel):
serialized_inputs[key] = value.model_dump()
elif isinstance(value, (list, tuple)):
# Handle lists/tuples of arguments
serialized_inputs[key] = [
item.model_dump() if isinstance(item, BaseModel)
else None if not self._is_json_serializable(item)
else item
for item in value
]
else:
if self._is_json_serializable(value):
serialized_inputs[key] = value
else:
serialized_inputs[key] = self.safe_stringify(value, self.function)
return serialized_inputs

def _is_json_serializable(self, obj: Any) -> bool:
"""Helper method to check if an object is JSON serializable."""
Expand Down Expand Up @@ -94,9 +71,9 @@ def safe_stringify(self, output, function_name):
)
return None

def _serialize_output(self) -> Any:
"""Helper method to serialize output data safely."""
if self.output is None:
def _serialize_value(self, value: Any) -> Any:
"""Helper method to deep serialize a value safely supporting Pydantic Models / regular PyObjects."""
if value is None:
return None

def serialize_value(value):
Expand All @@ -117,8 +94,8 @@ def serialize_value(value):
# Fallback to safe stringification
return self.safe_stringify(value, self.function)

# Start serialization with the top-level output
return serialize_value(self.output)
# Start serialization with the top-level value
return serialize_value(value)

class Trace(BaseModel):
trace_id: str
Expand Down
Loading