diff --git a/src/judgeval/common/tracer.py b/src/judgeval/common/tracer.py index d64b62c1..40dfac00 100644 --- a/src/judgeval/common/tracer.py +++ b/src/judgeval/common/tracer.py @@ -18,7 +18,7 @@ import json from contextlib import contextmanager, asynccontextmanager, AbstractAsyncContextManager, AbstractContextManager # Import context manager bases from dataclasses import dataclass, field -from datetime import datetime +from datetime import datetime, timezone from http import HTTPStatus from typing import ( Any, @@ -814,7 +814,7 @@ def save(self, overwrite: bool = False) -> Tuple[str, dict]: "trace_id": self.trace_id, "name": self.name, "project_name": self.project_name, - "created_at": datetime.utcfromtimestamp(self.start_time).isoformat(), + "created_at": datetime.fromtimestamp(self.start_time, timezone.utc).isoformat(), "duration": total_duration, "trace_spans": [span.model_dump() for span in self.trace_spans], "evaluation_runs": [run.model_dump() for run in self.evaluation_runs], diff --git a/src/judgeval/rules.py b/src/judgeval/rules.py index 91c8ed40..a03b4967 100644 --- a/src/judgeval/rules.py +++ b/src/judgeval/rules.py @@ -9,13 +9,13 @@ from concurrent.futures import ThreadPoolExecutor import time import uuid +import os +import re +import json +from datetime import datetime from judgeval.scorers import APIJudgmentScorer, JudgevalScorer - -class AlertStatus(str, Enum): - """Status of an alert evaluation.""" - TRIGGERED = "triggered" - NOT_TRIGGERED = "not_triggered" +from judgeval.utils.alerts import AlertStatus, AlertResult class Condition(BaseModel): """ @@ -144,7 +144,8 @@ def model_dump(self, **kwargs): # Create standardized metric representation needed by server API metric_data = { "score_type": "", - "threshold": 0.0 + "threshold": 0.0, + "name": "" } # First try to use object's own serialization methods @@ -182,6 +183,16 @@ def model_dump(self, **kwargs): # Use condition threshold if metric doesn't have one metric_data['threshold'] = self.conditions[i].threshold + # Make sure name is set + if not metric_data.get('name'): + if hasattr(metric_obj, '__name__'): + metric_data['name'] = metric_obj.__name__ + elif hasattr(metric_obj, 'name'): + metric_data['name'] = metric_obj.name + else: + # Fallback to score_type if available + metric_data['name'] = metric_data.get('score_type', str(metric_obj)) + # Update the condition with our properly serialized metric condition["metric"] = metric_data @@ -199,47 +210,6 @@ def validate_combine_type(cls, v): raise ValueError(f"combine_type must be 'all' or 'any', got: {v}") return v -class AlertResult(BaseModel): - """ - Result of evaluating a rule. - - Example: - { - "status": "triggered", - "rule_name": "Quality Check", - "conditions_result": [ - {"metric": "faithfulness", "value": 0.6, "threshold": 0.7, "passed": False}, - {"metric": "relevancy", "value": 0.9, "threshold": 0.8, "passed": True} - ], - "rule_id": "123e4567-e89b-12d3-a456-426614174000", - "metadata": { - "example_id": "example_123", - "timestamp": "20240321_123456" - }, - "notification": { - "enabled": true, - "communication_methods": ["slack", "email"], - "email_addresses": ["user1@example.com", "user2@example.com"] - } - } - """ - status: AlertStatus - rule_id: Optional[str] = None # The unique identifier of the rule - rule_name: str - conditions_result: List[Dict[str, Any]] - metadata: Dict[str, Any] = {} - notification: Optional[NotificationConfig] = None # Configuration for notifications - - @property - def example_id(self) -> Optional[str]: - """Get example_id from metadata for backward compatibility""" - return self.metadata.get("example_id") - - @property - def timestamp(self) -> Optional[str]: - """Get timestamp from metadata for backward compatibility""" - return self.metadata.get("timestamp") - class RulesEngine: """ Engine for creating and evaluating rules against metrics. @@ -406,7 +376,7 @@ def evaluate_rules(self, scores: Dict[str, float], example_metadata: Optional[Di # If rule has a notification config and the alert is triggered, include it in the result notification_config = rule.notification - # Set the alert status based on whether the rule was triggered + # Set the alert status based on whether the rule was triggered using proper enum values status = AlertStatus.TRIGGERED if triggered else AlertStatus.NOT_TRIGGERED # Create the alert result @@ -416,7 +386,10 @@ def evaluate_rules(self, scores: Dict[str, float], example_metadata: Optional[Di rule_name=rule.name, conditions_result=condition_results, notification=notification_config, - metadata=example_metadata or {} + metadata=example_metadata or {}, + combine_type=rule.combine_type, + project_id=example_metadata.get("project_id") if example_metadata else None, + trace_span_id=example_metadata.get("trace_span_id") if example_metadata else None ) results[rule_id] = alert_result diff --git a/src/judgeval/utils/alerts.py b/src/judgeval/utils/alerts.py index 53a9fb50..515ae286 100644 --- a/src/judgeval/utils/alerts.py +++ b/src/judgeval/utils/alerts.py @@ -20,12 +20,20 @@ class AlertResult(BaseModel): status: Status of the alert (triggered or not) conditions_result: List of condition evaluation results metadata: Dictionary containing example_id, timestamp, and other metadata + notification: Optional notification configuration for triggered alerts + combine_type: The combination type used ("all" or "any") + project_id: Optional project identifier + trace_span_id: Optional trace span identifier """ rule_name: str rule_id: Optional[str] = None # The unique identifier of the rule status: AlertStatus conditions_result: List[Dict[str, Any]] = [] metadata: Dict[str, Any] = {} + notification: Optional[Any] = None # NotificationConfig when triggered, None otherwise + combine_type: Optional[str] = None # "all" or "any" + project_id: Optional[str] = None # Project identifier + trace_span_id: Optional[str] = None # Trace span identifier @property def example_id(self) -> Optional[str]: diff --git a/src/tests/notification/test_notification_integration.py b/src/tests/notification/test_notification_integration.py index dfb979cc..e199f1f0 100644 --- a/src/tests/notification/test_notification_integration.py +++ b/src/tests/notification/test_notification_integration.py @@ -7,7 +7,8 @@ import json from typing import Dict, List, Optional -from judgeval.rules import Rule, Condition, NotificationConfig, AlertStatus, RulesEngine +from judgeval.rules import Rule, Condition, NotificationConfig, RulesEngine +from judgeval.utils.alerts import AlertStatus from judgeval.scorers import AnswerRelevancyScorer, FaithfulnessScorer, AnswerCorrectnessScorer from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer diff --git a/src/tests/notification/test_notification_serialization.py b/src/tests/notification/test_notification_serialization.py index 62e475cf..7d18cd31 100644 --- a/src/tests/notification/test_notification_serialization.py +++ b/src/tests/notification/test_notification_serialization.py @@ -8,10 +8,9 @@ from judgeval.rules import ( Rule, Condition, - NotificationConfig, - AlertStatus, - AlertResult + NotificationConfig ) +from judgeval.utils.alerts import AlertStatus, AlertResult from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer diff --git a/src/tests/notification/test_notification_unit.py b/src/tests/notification/test_notification_unit.py index dd599c02..5cbf8ef5 100644 --- a/src/tests/notification/test_notification_unit.py +++ b/src/tests/notification/test_notification_unit.py @@ -9,9 +9,9 @@ Rule, Condition, RulesEngine, - AlertStatus, NotificationConfig ) +from judgeval.utils.alerts import AlertStatus from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer from judgeval.judgment_client import JudgmentClient diff --git a/src/tests/test_rules_skip.py b/src/tests/test_rules_skip.py index a05e4207..c8f0daed 100644 --- a/src/tests/test_rules_skip.py +++ b/src/tests/test_rules_skip.py @@ -4,7 +4,8 @@ import pytest from uuid import uuid4 -from judgeval.rules import Rule, Condition, AlertStatus, RulesEngine +from judgeval.rules import Rule, Condition, RulesEngine +from judgeval.utils.alerts import AlertStatus from judgeval.scorers import APIJudgmentScorer diff --git a/src/tests/test_rules_with_api_scorers.py b/src/tests/test_rules_with_api_scorers.py index 4d7d3d95..d7b5757a 100644 --- a/src/tests/test_rules_with_api_scorers.py +++ b/src/tests/test_rules_with_api_scorers.py @@ -3,7 +3,8 @@ import pytest from unittest.mock import MagicMock, patch -from judgeval.rules import Rule, Condition, RulesEngine, AlertStatus +from judgeval.rules import Rule, Condition, RulesEngine +from judgeval.utils.alerts import AlertStatus from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer from judgeval.judgment_client import JudgmentClient