JudgmentLabs · JCamyre · Jan 2, 2025 · Dec 30, 2024 · Dec 30, 2024 · Dec 31, 2024
diff --git a/judgeval/data/datasets/20241111_175859.csv b/judgeval/data/datasets/20241111_175859.csv
diff --git a/judgeval/data/datasets/20241111_175859.json b/judgeval/data/datasets/20241111_175859.json
diff --git a/judgeval/data/datasets/dataset.py b/judgeval/data/datasets/dataset.py
@@ -158,7 +158,64 @@ def add_from_json(self, file_path: str) -> None:
         Adds examples and ground truths from a JSON file.
 
         The format of the JSON file is expected to be a dictionary with two keys: "examples" and "ground_truths". 
-        The value of each key is a list of dictionaries, where each dictionary represents an example or ground truth. 
+        The value of each key is a list of dictionaries, where each dictionary represents an example or ground truth.
+
+        The JSON file is expected to have the following format:
+        {
+            "ground_truths": [
+                {
+                    "input": "test input",
+                    "actual_output": null,
+                    "expected_output": "expected output",
+                    "context": [
+                    "context1"
+                ],
+                "retrieval_context": [
+                    "retrieval1"
+                ],
+                "additional_metadata": {
+                    "key": "value"
+                },
+                "comments": "test comment",
+                "tools_called": [
+                    "tool1"
+                ],
+                "expected_tools": [
+                    "tool1"
+                ],
+                "source_file": "test.py",
+                "trace_id": "094121"
+            }
+        ],
+        "examples": [
+            {
+                "input": "test input",
+                "actual_output": "test output",
+                "expected_output": "expected output",
+                "context": [
+                    "context1",
+                    "context2"
+                ],
+                "retrieval_context": [
+                    "retrieval1"
+                ],
+                "additional_metadata": {
+                    "key": "value"
+                },
+                "tools_called": [
+                    "tool1"
+                ],
+                "expected_tools": [
+                    "tool1",
+                    "tool2"
+                ],
+                "name": "test example",
+                "example_id": null,
+                "timestamp": "20241230_160117",
+                "trace_id": "123"
+            }
+            ]
+        }
         """
         try:
             with open(file_path, "r") as file:
@@ -195,17 +252,22 @@ def add_from_csv(
                 "Please install pandas to use this method. 'pip install pandas'"
             )
 
-        df = pd.read_csv(file_path)
+        # Pandas naturally reads numbers in data files as ints, not strings (can lead to unexpected behavior)
+        df = pd.read_csv(file_path, dtype={'trace_id': str})
         """
         Expect the CSV to have headers
 
         "input", "actual_output", "expected_output", "context", \
         "retrieval_context", "additional_metadata", "tools_called", \
-        "expected_tools", "name", "comments", "source_file", "example"
+        "expected_tools", "name", "comments", "source_file", "example", \
+        "trace_id"
 
         We want to collect the examples and ground truths separately which can
         be determined by the "example" column. If the value is True, then it is an
         example, otherwise it is a ground truth.
+
+        We also assume that if there are multiple retrieval contexts or contexts, they are separated by semicolons.
+        This can be adjusted using the `context_delimiter` and `retrieval_context_delimiter` parameters.
         """
         examples, ground_truths = [], []
 
@@ -219,8 +281,8 @@ def add_from_csv(
                 "additional_metadata": ast.literal_eval(row["additional_metadata"]) if pd.notna(row["additional_metadata"]) else dict(),
                 "tools_called": row["tools_called"].split(";") if pd.notna(row["tools_called"]) else [],
                 "expected_tools": row["expected_tools"].split(";") if pd.notna(row["expected_tools"]) else [],
+                "trace_id": row["trace_id"] if pd.notna(row["trace_id"]) else None
             }
-
             if row["example"]:
                 data["name"] = row["name"] if pd.notna(row["name"]) else None
                 # every Example has `input` and `actual_output` fields
@@ -230,6 +292,7 @@ def add_from_csv(
                 else:
                     raise ValueError("Every example must have an 'input' and 'actual_output' field.")
             else:
+                # GroundTruthExample has `comments` and `source_file` fields
                 data["comments"] = row["comments"] if pd.notna(row["comments"]) else None
                 data["source_file"] = row["source_file"] if pd.notna(row["source_file"]) else None
                 # every GroundTruthExample has `input` field
@@ -281,7 +344,8 @@ def save_as(self, file_type: Literal["json", "csv"], dir_path: str, save_name: s
                 writer.writerow([
                     "input", "actual_output", "expected_output", "context", \
                     "retrieval_context", "additional_metadata", "tools_called", \
-                    "expected_tools", "name", "comments", "source_file", "example"
+                    "expected_tools", "name", "comments", "source_file", "example", \
+                    "trace_id"
                 ])
                 for e in self.examples:
                     writer.writerow(
@@ -298,6 +362,7 @@ def save_as(self, file_type: Literal["json", "csv"], dir_path: str, save_name: s
                             None,  # Example does not have comments
                             None,  # Example does not have source file
                             True,  # Adding an Example
+                            e.trace_id
                         ]
                     )
 
@@ -316,6 +381,7 @@ def save_as(self, file_type: Literal["json", "csv"], dir_path: str, save_name: s
                             g.comments,
                             g.source_file,
                             False,  # Adding a GroundTruthExample, not an Example
+                            g.trace_id
                         ]
                     )
         else:

diff --git a/judgeval/data/datasets/ground_truth.py b/judgeval/data/datasets/ground_truth.py
@@ -19,6 +19,7 @@ class GroundTruthExample(BaseModel):
     tools_called: Optional[List[str]] = None
     expected_tools: Optional[List[str]] = None
     source_file: Optional[str] = None
+    trace_id: Optional[str] = None
 
     def to_dict(self):
         return {
@@ -32,6 +33,7 @@ def to_dict(self):
             "tools_called": self.tools_called,
             "expected_tools": self.expected_tools,
             "source_file": self.source_file,
+            "trace_id": self.trace_id,
         }
 
     def __str__(self):
@@ -46,6 +48,7 @@ def __str__(self):
             f"comments={self.comments}, "
             f"tools_called={self.tools_called}, "
             f"expected_tools={self.expected_tools}, "
-            f"source_file={self.source_file}"
+            f"source_file={self.source_file}, "
+            f"trace_id={self.trace_id}"
             f")"
-        )
+        )
diff --git a/judgeval/data/datasets/sample.csv b/judgeval/data/datasets/sample.csv
diff --git a/judgeval/data/example.py b/judgeval/data/example.py
@@ -56,6 +56,7 @@ def to_dict(self):
             "name": self.name,
             "example_id": self.example_id,
             "timestamp": self.timestamp,
+            "trace_id": self.trace_id
         }
 
     def __str__(self):

diff --git a/judgeval/scorers/prompt_scorer.py b/judgeval/scorers/prompt_scorer.py
@@ -47,6 +47,10 @@ class PromptScorer(CustomScorer, BaseModel):
     score_type: str
     threshold: float = Field(default=0.5)
     using_native_model: bool = Field(default=True)
+
+    # DO NOT SET THESE FIELDS MANUALLY, THEY ARE SET BY THE SCORE_EXAMPLE METHOD
+    response: Optional[dict] = None
+    result: Optional[float] = None
 
     def __init__(
         self,
@@ -295,6 +299,7 @@ def __init__(self, name: str, slug: str, conversation: List[dict], options: Mapp
         BaseModel.__init__(
             self,
             name=name,
+            slug=slug,
             score_type=name,
             conversation=conversation,
             options=options,

diff --git a/tests/data/datasets/sample_data/dataset.csv b/tests/data/datasets/sample_data/dataset.csv
@@ -0,0 +1,3 @@
+input,actual_output,expected_output,context,retrieval_context,additional_metadata,tools_called,expected_tools,name,comments,source_file,example,trace_id
+test input,test output,expected output,context1;context2,retrieval1,{'key': 'value'},tool1,tool1;tool2,test example,,,True,123
+test input,,expected output,context1,retrieval1,{'key': 'value'},tool1,tool1,,test comment,test.py,False,094121
diff --git a/tests/data/datasets/sample_data/dataset.json b/tests/data/datasets/sample_data/dataset.json
@@ -0,0 +1,55 @@
+{
+    "ground_truths": [
+        {
+            "input": "test input",
+            "actual_output": null,
+            "expected_output": "expected output",
+            "context": [
+                "context1"
+            ],
+            "retrieval_context": [
+                "retrieval1"
+            ],
+            "additional_metadata": {
+                "key": "value"
+            },
+            "comments": "test comment",
+            "tools_called": [
+                "tool1"
+            ],
+            "expected_tools": [
+                "tool1"
+            ],
+            "source_file": "test.py",
+            "trace_id": "094121"
+        }
+    ],
+    "examples": [
+        {
+            "input": "test input",
+            "actual_output": "test output",
+            "expected_output": "expected output",
+            "context": [
+                "context1",
+                "context2"
+            ],
+            "retrieval_context": [
+                "retrieval1"
+            ],
+            "additional_metadata": {
+                "key": "value"
+            },
+            "tools_called": [
+                "tool1"
+            ],
+            "expected_tools": [
+                "tool1",
+                "tool2"
+            ],
+            "name": "test example",
+            "example_id": null,
+            "timestamp": "20241230_160117",
+            "trace_id": "123"
+        }
+    ]
+}