Skip to content

Alert refactor #305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/judgeval/common/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import json
from contextlib import contextmanager, asynccontextmanager, AbstractAsyncContextManager, AbstractContextManager # Import context manager bases
from dataclasses import dataclass, field
from datetime import datetime
from datetime import datetime, timezone
from http import HTTPStatus
from typing import (
Any,
Expand Down Expand Up @@ -814,7 +814,7 @@ def save(self, overwrite: bool = False) -> Tuple[str, dict]:
"trace_id": self.trace_id,
"name": self.name,
"project_name": self.project_name,
"created_at": datetime.utcfromtimestamp(self.start_time).isoformat(),
"created_at": datetime.fromtimestamp(self.start_time, timezone.utc).isoformat(),
"duration": total_duration,
"trace_spans": [span.model_dump() for span in self.trace_spans],
"evaluation_runs": [run.model_dump() for run in self.evaluation_runs],
Expand Down
71 changes: 22 additions & 49 deletions src/judgeval/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
from concurrent.futures import ThreadPoolExecutor
import time
import uuid
import os
import re
import json
from datetime import datetime

from judgeval.scorers import APIJudgmentScorer, JudgevalScorer

class AlertStatus(str, Enum):
"""Status of an alert evaluation."""
TRIGGERED = "triggered"
NOT_TRIGGERED = "not_triggered"
from judgeval.utils.alerts import AlertStatus, AlertResult

class Condition(BaseModel):
"""
Expand Down Expand Up @@ -144,7 +144,8 @@ def model_dump(self, **kwargs):
# Create standardized metric representation needed by server API
metric_data = {
"score_type": "",
"threshold": 0.0
"threshold": 0.0,
"name": ""
}

# First try to use object's own serialization methods
Expand Down Expand Up @@ -182,6 +183,16 @@ def model_dump(self, **kwargs):
# Use condition threshold if metric doesn't have one
metric_data['threshold'] = self.conditions[i].threshold

# Make sure name is set
if not metric_data.get('name'):
if hasattr(metric_obj, '__name__'):
metric_data['name'] = metric_obj.__name__
elif hasattr(metric_obj, 'name'):
metric_data['name'] = metric_obj.name
else:
# Fallback to score_type if available
metric_data['name'] = metric_data.get('score_type', str(metric_obj))

# Update the condition with our properly serialized metric
condition["metric"] = metric_data

Expand All @@ -199,47 +210,6 @@ def validate_combine_type(cls, v):
raise ValueError(f"combine_type must be 'all' or 'any', got: {v}")
return v

class AlertResult(BaseModel):
"""
Result of evaluating a rule.

Example:
{
"status": "triggered",
"rule_name": "Quality Check",
"conditions_result": [
{"metric": "faithfulness", "value": 0.6, "threshold": 0.7, "passed": False},
{"metric": "relevancy", "value": 0.9, "threshold": 0.8, "passed": True}
],
"rule_id": "123e4567-e89b-12d3-a456-426614174000",
"metadata": {
"example_id": "example_123",
"timestamp": "20240321_123456"
},
"notification": {
"enabled": true,
"communication_methods": ["slack", "email"],
"email_addresses": ["user1@example.com", "user2@example.com"]
}
}
"""
status: AlertStatus
rule_id: Optional[str] = None # The unique identifier of the rule
rule_name: str
conditions_result: List[Dict[str, Any]]
metadata: Dict[str, Any] = {}
notification: Optional[NotificationConfig] = None # Configuration for notifications

@property
def example_id(self) -> Optional[str]:
"""Get example_id from metadata for backward compatibility"""
return self.metadata.get("example_id")

@property
def timestamp(self) -> Optional[str]:
"""Get timestamp from metadata for backward compatibility"""
return self.metadata.get("timestamp")

class RulesEngine:
"""
Engine for creating and evaluating rules against metrics.
Expand Down Expand Up @@ -406,7 +376,7 @@ def evaluate_rules(self, scores: Dict[str, float], example_metadata: Optional[Di
# If rule has a notification config and the alert is triggered, include it in the result
notification_config = rule.notification

# Set the alert status based on whether the rule was triggered
# Set the alert status based on whether the rule was triggered using proper enum values
status = AlertStatus.TRIGGERED if triggered else AlertStatus.NOT_TRIGGERED

# Create the alert result
Expand All @@ -416,7 +386,10 @@ def evaluate_rules(self, scores: Dict[str, float], example_metadata: Optional[Di
rule_name=rule.name,
conditions_result=condition_results,
notification=notification_config,
metadata=example_metadata or {}
metadata=example_metadata or {},
combine_type=rule.combine_type,
project_id=example_metadata.get("project_id") if example_metadata else None,
trace_span_id=example_metadata.get("trace_span_id") if example_metadata else None
)

results[rule_id] = alert_result
Expand Down
8 changes: 8 additions & 0 deletions src/judgeval/utils/alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,20 @@ class AlertResult(BaseModel):
status: Status of the alert (triggered or not)
conditions_result: List of condition evaluation results
metadata: Dictionary containing example_id, timestamp, and other metadata
notification: Optional notification configuration for triggered alerts
combine_type: The combination type used ("all" or "any")
project_id: Optional project identifier
trace_span_id: Optional trace span identifier
"""
rule_name: str
rule_id: Optional[str] = None # The unique identifier of the rule
status: AlertStatus
conditions_result: List[Dict[str, Any]] = []
metadata: Dict[str, Any] = {}
notification: Optional[Any] = None # NotificationConfig when triggered, None otherwise
combine_type: Optional[str] = None # "all" or "any"
project_id: Optional[str] = None # Project identifier
trace_span_id: Optional[str] = None # Trace span identifier

@property
def example_id(self) -> Optional[str]:
Expand Down
3 changes: 2 additions & 1 deletion src/tests/notification/test_notification_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import json
from typing import Dict, List, Optional

from judgeval.rules import Rule, Condition, NotificationConfig, AlertStatus, RulesEngine
from judgeval.rules import Rule, Condition, NotificationConfig, RulesEngine
from judgeval.utils.alerts import AlertStatus
from judgeval.scorers import AnswerRelevancyScorer, FaithfulnessScorer, AnswerCorrectnessScorer
from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer
from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer
Expand Down
5 changes: 2 additions & 3 deletions src/tests/notification/test_notification_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@
from judgeval.rules import (
Rule,
Condition,
NotificationConfig,
AlertStatus,
AlertResult
NotificationConfig
)
from judgeval.utils.alerts import AlertStatus, AlertResult
from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer
from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer

Expand Down
2 changes: 1 addition & 1 deletion src/tests/notification/test_notification_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
Rule,
Condition,
RulesEngine,
AlertStatus,
NotificationConfig
)
from judgeval.utils.alerts import AlertStatus
from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer
from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer
from judgeval.judgment_client import JudgmentClient
Expand Down
3 changes: 2 additions & 1 deletion src/tests/test_rules_skip.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

import pytest
from uuid import uuid4
from judgeval.rules import Rule, Condition, AlertStatus, RulesEngine
from judgeval.rules import Rule, Condition, RulesEngine
from judgeval.utils.alerts import AlertStatus
from judgeval.scorers import APIJudgmentScorer


Expand Down
3 changes: 2 additions & 1 deletion src/tests/test_rules_with_api_scorers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import pytest
from unittest.mock import MagicMock, patch

from judgeval.rules import Rule, Condition, RulesEngine, AlertStatus
from judgeval.rules import Rule, Condition, RulesEngine
from judgeval.utils.alerts import AlertStatus
from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer
from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer
from judgeval.judgment_client import JudgmentClient
Expand Down
Loading