1
+ #!/usr/bin/env python3
2
+ """
3
+ Examples demonstrating how to use async evaluation in multiple ways.
4
+ """
5
+
6
+ import asyncio
7
+ import os
8
+ import time
9
+ from typing import List
10
+
11
+ from judgeval .data import Example , ScoringResult
12
+ from judgeval .judgment_client import JudgmentClient
13
+
14
+ # Get Judgment API key from environment (replace with your actual API key)
15
+ JUDGMENT_API_KEY = os .environ .get ("JUDGMENT_API_KEY" , "your_api_key_here" )
16
+ ORGANIZATION_ID = os .environ .get ("ORGANIZATION_ID" , "your_organization_id_here" )
17
+
18
+ # Initialize the JudgmentClient
19
+ judgment_client = JudgmentClient (judgment_api_key = JUDGMENT_API_KEY , organization_id = ORGANIZATION_ID )
20
+
21
+
22
+ async def example_direct_await ():
23
+ """
24
+ Example of directly awaiting the Task returned by run_evaluation with async_execution=True.
25
+ This is the simplest approach and blocks until evaluation is complete.
26
+ """
27
+ print ("\n === Example: Direct Await ===" )
28
+
29
+ # Create example list
30
+ examples = [
31
+ Example (
32
+ input = "What is the capital of France?" ,
33
+ actual_output = "The capital of France is Paris." ,
34
+ expected_output = "Paris"
35
+ ),
36
+ Example (
37
+ input = "What is the capital of Italy?" ,
38
+ actual_output = "Rome is the capital of Italy." ,
39
+ expected_output = "Rome"
40
+ )
41
+ ]
42
+
43
+ # Set up scorers
44
+ from judgeval .scorers import AnswerCorrectnessScorer
45
+ scorers = [AnswerCorrectnessScorer (threshold = 0.9 )]
46
+
47
+ # Start evaluation asynchronously and get a Task object
48
+ print ("Starting evaluation..." )
49
+ task = judgment_client .run_evaluation (
50
+ examples = examples ,
51
+ scorers = scorers ,
52
+ model = "gpt-4o-mini" ,
53
+ project_name = "async-examples" ,
54
+ eval_run_name = "async-example-direct" ,
55
+ override = True ,
56
+ async_execution = True
57
+ )
58
+
59
+ # Directly await the task - this will block until the evaluation is done
60
+ print ("Awaiting results..." )
61
+ results = await task
62
+
63
+ print (f"Evaluation completed! Received { len (results )} results" )
64
+
65
+ # Process the results
66
+ print (results )
67
+
68
+
69
+ async def example_with_other_work ():
70
+ """
71
+ Example of running other work while evaluation is in progress.
72
+ Shows how to check task status and get results when ready.
73
+ """
74
+ print ("\n === Example: Do Other Work While Evaluating ===" )
75
+
76
+ # Create example list
77
+ examples = [
78
+ Example (
79
+ input = "What is the tallest mountain in the world?" ,
80
+ actual_output = "Mount Everest is the tallest mountain in the world." ,
81
+ expected_output = "Mount Everest"
82
+ ),
83
+ Example (
84
+ input = "What is the largest ocean?" ,
85
+ actual_output = "The Pacific Ocean is the largest ocean on Earth." ,
86
+ expected_output = "Pacific Ocean"
87
+ )
88
+ ]
89
+
90
+ # Set up scorers
91
+ from judgeval .scorers import AnswerCorrectnessScorer
92
+ scorers = [AnswerCorrectnessScorer (threshold = 0.9 )]
93
+
94
+ # Start evaluation asynchronously and get a Task object
95
+ print ("Starting evaluation..." )
96
+ task = judgment_client .run_evaluation (
97
+ examples = examples ,
98
+ scorers = scorers ,
99
+ model = "gpt-4o-mini" ,
100
+ project_name = "async-examples" ,
101
+ eval_run_name = "async-example-other-work" ,
102
+ override = True ,
103
+ async_execution = True
104
+ )
105
+
106
+ # Do other work while evaluation is running
107
+ print ("Doing other work while evaluation runs in the background..." )
108
+
109
+ # Simulate other work with a few iterations
110
+ for i in range (1 , 4 ):
111
+ print (f" Doing work iteration { i } ..." )
112
+ await asyncio .sleep (2 ) # Simulate work with a delay
113
+
114
+ # Check if the evaluation is done
115
+ if task .done ():
116
+ print (" Evaluation completed during other work!" )
117
+ break
118
+ else :
119
+ print (" Evaluation still running..." )
120
+
121
+ # Get the results when ready
122
+ try :
123
+ if not task .done ():
124
+ print ("Waiting for evaluation to complete..." )
125
+
126
+ results = await task # Will return immediately if already done
127
+
128
+ print (results )
129
+
130
+ except Exception as e :
131
+ print (f"Error in evaluation: { str (e )} " )
132
+ if task .exception ():
133
+ print (f"Task exception: { task .exception ()} " )
134
+
135
+
136
+ async def main ():
137
+ """Run the examples."""
138
+ # Run the first example: direct await
139
+ await example_direct_await ()
140
+
141
+ # Run the second example: do other work while evaluating
142
+ await example_with_other_work ()
143
+
144
+
145
+ if __name__ == "__main__" :
146
+ asyncio .run (main ())
0 commit comments