Skip to content

Commit 10597eb

Browse files
MandolaroJCamyre
andauthored
Alert refactor (#305)
* fix the rules * fix alert types * fix tests * fixing naming --------- Co-authored-by: Joseph S Camyre <68767176+JCamyre@users.noreply.github.com>
1 parent 56869bf commit 10597eb

File tree

8 files changed

+41
-58
lines changed

8 files changed

+41
-58
lines changed

src/judgeval/common/tracer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import json
1919
from contextlib import contextmanager, asynccontextmanager, AbstractAsyncContextManager, AbstractContextManager # Import context manager bases
2020
from dataclasses import dataclass, field
21-
from datetime import datetime
21+
from datetime import datetime, timezone
2222
from http import HTTPStatus
2323
from typing import (
2424
Any,
@@ -814,7 +814,7 @@ def save(self, overwrite: bool = False) -> Tuple[str, dict]:
814814
"trace_id": self.trace_id,
815815
"name": self.name,
816816
"project_name": self.project_name,
817-
"created_at": datetime.utcfromtimestamp(self.start_time).isoformat(),
817+
"created_at": datetime.fromtimestamp(self.start_time, timezone.utc).isoformat(),
818818
"duration": total_duration,
819819
"trace_spans": [span.model_dump() for span in self.trace_spans],
820820
"evaluation_runs": [run.model_dump() for run in self.evaluation_runs],

src/judgeval/rules.py

Lines changed: 22 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
from concurrent.futures import ThreadPoolExecutor
1010
import time
1111
import uuid
12+
import os
13+
import re
14+
import json
15+
from datetime import datetime
1216

1317
from judgeval.scorers import APIJudgmentScorer, JudgevalScorer
14-
15-
class AlertStatus(str, Enum):
16-
"""Status of an alert evaluation."""
17-
TRIGGERED = "triggered"
18-
NOT_TRIGGERED = "not_triggered"
18+
from judgeval.utils.alerts import AlertStatus, AlertResult
1919

2020
class Condition(BaseModel):
2121
"""
@@ -144,7 +144,8 @@ def model_dump(self, **kwargs):
144144
# Create standardized metric representation needed by server API
145145
metric_data = {
146146
"score_type": "",
147-
"threshold": 0.0
147+
"threshold": 0.0,
148+
"name": ""
148149
}
149150

150151
# First try to use object's own serialization methods
@@ -182,6 +183,16 @@ def model_dump(self, **kwargs):
182183
# Use condition threshold if metric doesn't have one
183184
metric_data['threshold'] = self.conditions[i].threshold
184185

186+
# Make sure name is set
187+
if not metric_data.get('name'):
188+
if hasattr(metric_obj, '__name__'):
189+
metric_data['name'] = metric_obj.__name__
190+
elif hasattr(metric_obj, 'name'):
191+
metric_data['name'] = metric_obj.name
192+
else:
193+
# Fallback to score_type if available
194+
metric_data['name'] = metric_data.get('score_type', str(metric_obj))
195+
185196
# Update the condition with our properly serialized metric
186197
condition["metric"] = metric_data
187198

@@ -199,47 +210,6 @@ def validate_combine_type(cls, v):
199210
raise ValueError(f"combine_type must be 'all' or 'any', got: {v}")
200211
return v
201212

202-
class AlertResult(BaseModel):
203-
"""
204-
Result of evaluating a rule.
205-
206-
Example:
207-
{
208-
"status": "triggered",
209-
"rule_name": "Quality Check",
210-
"conditions_result": [
211-
{"metric": "faithfulness", "value": 0.6, "threshold": 0.7, "passed": False},
212-
{"metric": "relevancy", "value": 0.9, "threshold": 0.8, "passed": True}
213-
],
214-
"rule_id": "123e4567-e89b-12d3-a456-426614174000",
215-
"metadata": {
216-
"example_id": "example_123",
217-
"timestamp": "20240321_123456"
218-
},
219-
"notification": {
220-
"enabled": true,
221-
"communication_methods": ["slack", "email"],
222-
"email_addresses": ["user1@example.com", "user2@example.com"]
223-
}
224-
}
225-
"""
226-
status: AlertStatus
227-
rule_id: Optional[str] = None # The unique identifier of the rule
228-
rule_name: str
229-
conditions_result: List[Dict[str, Any]]
230-
metadata: Dict[str, Any] = {}
231-
notification: Optional[NotificationConfig] = None # Configuration for notifications
232-
233-
@property
234-
def example_id(self) -> Optional[str]:
235-
"""Get example_id from metadata for backward compatibility"""
236-
return self.metadata.get("example_id")
237-
238-
@property
239-
def timestamp(self) -> Optional[str]:
240-
"""Get timestamp from metadata for backward compatibility"""
241-
return self.metadata.get("timestamp")
242-
243213
class RulesEngine:
244214
"""
245215
Engine for creating and evaluating rules against metrics.
@@ -406,7 +376,7 @@ def evaluate_rules(self, scores: Dict[str, float], example_metadata: Optional[Di
406376
# If rule has a notification config and the alert is triggered, include it in the result
407377
notification_config = rule.notification
408378

409-
# Set the alert status based on whether the rule was triggered
379+
# Set the alert status based on whether the rule was triggered using proper enum values
410380
status = AlertStatus.TRIGGERED if triggered else AlertStatus.NOT_TRIGGERED
411381

412382
# Create the alert result
@@ -416,7 +386,10 @@ def evaluate_rules(self, scores: Dict[str, float], example_metadata: Optional[Di
416386
rule_name=rule.name,
417387
conditions_result=condition_results,
418388
notification=notification_config,
419-
metadata=example_metadata or {}
389+
metadata=example_metadata or {},
390+
combine_type=rule.combine_type,
391+
project_id=example_metadata.get("project_id") if example_metadata else None,
392+
trace_span_id=example_metadata.get("trace_span_id") if example_metadata else None
420393
)
421394

422395
results[rule_id] = alert_result

src/judgeval/utils/alerts.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,20 @@ class AlertResult(BaseModel):
2020
status: Status of the alert (triggered or not)
2121
conditions_result: List of condition evaluation results
2222
metadata: Dictionary containing example_id, timestamp, and other metadata
23+
notification: Optional notification configuration for triggered alerts
24+
combine_type: The combination type used ("all" or "any")
25+
project_id: Optional project identifier
26+
trace_span_id: Optional trace span identifier
2327
"""
2428
rule_name: str
2529
rule_id: Optional[str] = None # The unique identifier of the rule
2630
status: AlertStatus
2731
conditions_result: List[Dict[str, Any]] = []
2832
metadata: Dict[str, Any] = {}
33+
notification: Optional[Any] = None # NotificationConfig when triggered, None otherwise
34+
combine_type: Optional[str] = None # "all" or "any"
35+
project_id: Optional[str] = None # Project identifier
36+
trace_span_id: Optional[str] = None # Trace span identifier
2937

3038
@property
3139
def example_id(self) -> Optional[str]:

src/tests/notification/test_notification_integration.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
import json
88
from typing import Dict, List, Optional
99

10-
from judgeval.rules import Rule, Condition, NotificationConfig, AlertStatus, RulesEngine
10+
from judgeval.rules import Rule, Condition, NotificationConfig, RulesEngine
11+
from judgeval.utils.alerts import AlertStatus
1112
from judgeval.scorers import AnswerRelevancyScorer, FaithfulnessScorer, AnswerCorrectnessScorer
1213
from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer
1314
from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer

src/tests/notification/test_notification_serialization.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@
88
from judgeval.rules import (
99
Rule,
1010
Condition,
11-
NotificationConfig,
12-
AlertStatus,
13-
AlertResult
11+
NotificationConfig
1412
)
13+
from judgeval.utils.alerts import AlertStatus, AlertResult
1514
from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer
1615
from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer
1716

src/tests/notification/test_notification_unit.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
Rule,
1010
Condition,
1111
RulesEngine,
12-
AlertStatus,
1312
NotificationConfig
1413
)
14+
from judgeval.utils.alerts import AlertStatus
1515
from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer
1616
from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer
1717
from judgeval.judgment_client import JudgmentClient

src/tests/test_rules_skip.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
import pytest
66
from uuid import uuid4
7-
from judgeval.rules import Rule, Condition, AlertStatus, RulesEngine
7+
from judgeval.rules import Rule, Condition, RulesEngine
8+
from judgeval.utils.alerts import AlertStatus
89
from judgeval.scorers import APIJudgmentScorer
910

1011

src/tests/test_rules_with_api_scorers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
import pytest
44
from unittest.mock import MagicMock, patch
55

6-
from judgeval.rules import Rule, Condition, RulesEngine, AlertStatus
6+
from judgeval.rules import Rule, Condition, RulesEngine
7+
from judgeval.utils.alerts import AlertStatus
78
from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer
89
from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer
910
from judgeval.judgment_client import JudgmentClient

0 commit comments

Comments
 (0)