|
1 | 1 | """
|
| 2 | +** WARNING **: This script relies on creating a ClassifierScorer on the Judgment platform. |
| 3 | +If you haven't created one yet, you can do so by following the instructions in this YouTube guide: https://www.youtube.com/watch?v=LNEX-RkeQQI |
| 4 | +
|
2 | 5 | This script implements a custom scorer to evaluate customer support responses.
|
3 | 6 |
|
4 | 7 | It checks if a support response mentions competitors (like Adidas, Reebok, etc.) in a positive way.
|
|
8 | 11 |
|
9 | 12 |
|
10 | 13 | from judgeval import JudgmentClient
|
11 |
| -from judgeval.scorers import ClassifierScorer |
12 | 14 | from judgeval.data import Example
|
13 | 15 |
|
14 | 16 |
|
15 |
| -competitor_mentions_scorer = ClassifierScorer( |
16 |
| - "Competitor Mentions", |
17 |
| - slug="competitor_mentions-487126418", |
18 |
| - threshold=1.0, |
19 |
| - conversation=[{ |
20 |
| - "role": "system", |
21 |
| - "content": """Does the following customer support response discuss any of the following competitors in a positive way? (Y/N). |
22 |
| - |
23 |
| - Competitors: Adidas, Reebok, Hoka, ON, Converse |
24 |
| - |
25 |
| - Customer Question: {{input}} |
26 |
| - Customer Support Response: {{actual_output}} |
27 |
| - """ |
28 |
| - }], |
29 |
| - options={ |
30 |
| - "Y": 0.0, |
31 |
| - "N": 1.0 |
32 |
| - } |
33 |
| -) |
34 |
| - |
35 |
| - |
36 | 17 | if __name__ == "__main__":
|
37 | 18 | client = JudgmentClient()
|
38 | 19 |
|
39 | 20 | positive_example = Example(
|
40 | 21 | input="What are the best shoes for running priced under $130?",
|
41 |
| - actual_output="You'd want to check out the newest Nike Vaporfly, it's only $120 and built for performance. " |
| 22 | + actual_output="You'd want to check out the newest Nike Vaporfly, it's only $120 and built for performance." |
42 | 23 | )
|
43 | 24 |
|
44 | 25 | negative_example = Example(
|
45 | 26 | input="What are the best shoes for running priced under $130?",
|
46 | 27 | actual_output="The Nike Vaporfly is a great shoe built for performance. Other great options include the Adidas Ultraboost and the Reebok Nano X which are affordable and speedy."
|
47 | 28 | )
|
48 | 29 |
|
| 30 | + competitor_mentions_scorer = client.fetch_classifier_scorer("<YOUR_SLUG_HERE>") # replace with slug, see video guide above |
| 31 | + |
49 | 32 | client.run_evaluation(
|
50 | 33 | examples=[positive_example, negative_example],
|
51 | 34 | scorers=[competitor_mentions_scorer],
|
52 | 35 | model="gpt-4o-mini",
|
53 | 36 | project_name="competitor_mentions",
|
54 |
| - eval_run_name="competitor_mentions_test", |
| 37 | + eval_run_name="competitor_brand_demo", |
55 | 38 | )
|
56 | 39 |
|
57 | 40 |
|
|
0 commit comments