|
| 1 | +from openai import OpenAI |
| 2 | +from together import Together |
| 3 | +from anthropic import Anthropic |
| 4 | +from judgeval.common.tracer import Tracer, wrap |
1 | 5 |
|
2 |
| -from judgeval.common.tracer import tracer |
| 6 | +import time |
3 | 7 |
|
| 8 | +# Initialize the tracer and clients |
| 9 | +judgment = Tracer(api_key=os.getenv("JUDGMENT_API_KEY")) |
| 10 | +openai_client = wrap(OpenAI()) |
| 11 | +anthropic_client = wrap(Anthropic()) |
4 | 12 |
|
5 |
| -# @tracer.observe(name="generate_movie_review", top_level=True) |
6 |
| -def generate_movie_review(summary: str) -> str: |
| 13 | +@judgment.observe |
| 14 | +def make_upper(input): |
| 15 | + return input.upper() |
7 | 16 |
|
8 |
| - trace = tracer.start_trace() |
9 |
| - # Analyze key elements |
10 |
| - plot_quality = analyze_plot(summary) |
11 |
| - trace.print_trace() |
12 |
| - engagement = analyze_engagement(summary) |
13 |
| - originality = analyze_originality(summary) |
14 |
| - trace.print_trace() |
15 |
| - |
16 |
| - # Generate final review |
17 |
| - review = compose_review(plot_quality, engagement, originality) |
18 |
| - return review |
19 |
| - |
20 |
| -@tracer.observe(name="analyze_plot") |
21 |
| -def analyze_plot(summary: str) -> dict: |
22 |
| - # Analyze plot elements like structure, pacing, coherence |
23 |
| - return { |
24 |
| - "structure": 8, # 1-10 rating |
25 |
| - "pacing": 7, |
26 |
| - "coherence": 9, |
27 |
| - "notes": "Well structured plot with good pacing" |
28 |
| - } |
29 |
| - |
30 |
| -@tracer.observe(name="analyze_engagement") |
31 |
| -def analyze_engagement(summary: str) -> dict: |
32 |
| - # Analyze how engaging/interesting the story seems |
33 |
| - return { |
34 |
| - "interest_level": 8, |
35 |
| - "emotional_impact": 7, |
36 |
| - "memorability": 8, |
37 |
| - "notes": "Engaging story with emotional resonance" |
38 |
| - } |
39 |
| - |
40 |
| -@tracer.observe(name="analyze_originality") |
41 |
| -def analyze_originality(summary: str) -> dict: |
42 |
| - # Analyze uniqueness and creativity |
43 |
| - return { |
44 |
| - "uniqueness": 6, |
45 |
| - "creativity": 7, |
46 |
| - "innovation": 5, |
47 |
| - "notes": "Some fresh elements but follows familiar patterns" |
48 |
| - } |
| 17 | +@judgment.observe |
| 18 | +def make_lower(input): |
| 19 | + return input.lower() |
49 | 20 |
|
50 |
| -@tracer.observe(name="compose_review") |
51 |
| -def compose_review(plot: dict, engagement: dict, originality: dict) -> str: |
52 |
| - # Calculate overall score |
53 |
| - plot_score = sum([plot["structure"], plot["pacing"], plot["coherence"]]) / 3 |
54 |
| - engagement_score = sum([engagement["interest_level"], |
55 |
| - engagement["emotional_impact"], |
56 |
| - engagement["memorability"]]) / 3 |
57 |
| - originality_score = sum([originality["uniqueness"], |
58 |
| - originality["creativity"], |
59 |
| - originality["innovation"]]) / 3 |
| 21 | +@judgment.observe |
| 22 | +def make_poem(input): |
60 | 23 |
|
61 |
| - overall_score = (plot_score + engagement_score + originality_score) / 3 |
| 24 | + # Using Anthropic API |
| 25 | + anthropic_response = anthropic_client.messages.create( |
| 26 | + model="claude-3-sonnet-20240229", |
| 27 | + messages=[{ |
| 28 | + "role": "user", |
| 29 | + "content": input |
| 30 | + }], |
| 31 | + max_tokens=30 |
| 32 | + ) |
| 33 | + anthropic_result = anthropic_response.content[0].text |
62 | 34 |
|
63 |
| - # Generate review text |
64 |
| - review = f"""Movie Review: |
65 |
| -Plot: {plot['notes']} ({plot_score:.1f}/10) |
66 |
| -Engagement: {engagement['notes']} ({engagement_score:.1f}/10) |
67 |
| -Originality: {originality['notes']} ({originality_score:.1f}/10) |
68 |
| -
|
69 |
| -Overall Score: {overall_score:.1f}/10 |
70 |
| -""" |
71 |
| - return review |
72 |
| - |
73 |
| -# Test the workflow |
74 |
| -summary = """ |
75 |
| -A brilliant mathematician discovers a pattern that could predict global catastrophes. |
76 |
| -As she races to convince authorities of the impending doom, she must confront her own |
77 |
| -past traumas and decide whether to trust the pattern or her instincts. The fate of |
78 |
| -millions hangs in the balance as time runs out. |
79 |
| -""" |
| 35 | + # Using OpenAI API |
| 36 | + openai_response = openai_client.chat.completions.create( |
| 37 | + model="gpt-4o-mini", |
| 38 | + messages=[ |
| 39 | + {"role": "system", "content": "Make a short sentence with the input."}, |
| 40 | + {"role": "user", "content": input} |
| 41 | + ] |
| 42 | + ) |
| 43 | + openai_result = openai_response.choices[0].message.content |
| 44 | + print(openai_result) |
| 45 | + |
| 46 | + return make_lower(anthropic_result + openai_result) |
80 | 47 |
|
81 |
| -result = generate_movie_review(summary) |
| 48 | +def test_evaluation_mixed(input): |
| 49 | + with judgment.trace("test_evaluation") as trace: |
| 50 | + result = make_poem(make_upper(input)) |
82 | 51 |
|
83 |
| -print(type(result)) |
84 |
| -assert isinstance(result, str) |
85 |
| -# assert "Movie Review:" in result |
86 |
| -# assert "Overall Score:" in result |
| 52 | + trace.print() |
| 53 | + trace.save() |
| 54 | + return result |
87 | 55 |
|
88 |
| -# Print the trace |
89 |
| -# result.print_trace() |
| 56 | +result3 = test_evaluation_mixed("hello the world is flat") |
0 commit comments