-
Notifications
You must be signed in to change notification settings - Fork 83
Add trace ID to datasets, update UTs accordingly #31
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3d8f2db
0e42284
4559393
75fe8e2
5bed97f
07f06f3
b20a058
bc372ab
5322400
b64e0e8
caeaa0b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -158,7 +158,64 @@ def add_from_json(self, file_path: str) -> None: | |
Adds examples and ground truths from a JSON file. | ||
The format of the JSON file is expected to be a dictionary with two keys: "examples" and "ground_truths". | ||
The value of each key is a list of dictionaries, where each dictionary represents an example or ground truth. | ||
The value of each key is a list of dictionaries, where each dictionary represents an example or ground truth. | ||
The JSON file is expected to have the following format: | ||
{ | ||
"ground_truths": [ | ||
{ | ||
"input": "test input", | ||
"actual_output": null, | ||
"expected_output": "expected output", | ||
"context": [ | ||
"context1" | ||
], | ||
"retrieval_context": [ | ||
"retrieval1" | ||
], | ||
"additional_metadata": { | ||
"key": "value" | ||
}, | ||
"comments": "test comment", | ||
"tools_called": [ | ||
"tool1" | ||
], | ||
"expected_tools": [ | ||
"tool1" | ||
], | ||
"source_file": "test.py", | ||
"trace_id": "094121" | ||
} | ||
], | ||
"examples": [ | ||
{ | ||
"input": "test input", | ||
"actual_output": "test output", | ||
"expected_output": "expected output", | ||
"context": [ | ||
"context1", | ||
"context2" | ||
], | ||
"retrieval_context": [ | ||
"retrieval1" | ||
], | ||
"additional_metadata": { | ||
"key": "value" | ||
}, | ||
"tools_called": [ | ||
"tool1" | ||
], | ||
"expected_tools": [ | ||
"tool1", | ||
"tool2" | ||
], | ||
"name": "test example", | ||
"example_id": null, | ||
"timestamp": "20241230_160117", | ||
"trace_id": "123" | ||
} | ||
] | ||
} | ||
""" | ||
try: | ||
with open(file_path, "r") as file: | ||
|
@@ -195,17 +252,22 @@ def add_from_csv( | |
"Please install pandas to use this method. 'pip install pandas'" | ||
) | ||
|
||
df = pd.read_csv(file_path) | ||
# Pandas naturally reads numbers in data files as ints, not strings (can lead to unexpected behavior) | ||
df = pd.read_csv(file_path, dtype={'trace_id': str}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What else can There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pandas naturally reads numbers in data files as ints, not strings (can lead to unexpected behavior) |
||
""" | ||
Expect the CSV to have headers | ||
"input", "actual_output", "expected_output", "context", \ | ||
"retrieval_context", "additional_metadata", "tools_called", \ | ||
"expected_tools", "name", "comments", "source_file", "example" | ||
"expected_tools", "name", "comments", "source_file", "example", \ | ||
"trace_id" | ||
We want to collect the examples and ground truths separately which can | ||
be determined by the "example" column. If the value is True, then it is an | ||
example, otherwise it is a ground truth. | ||
We also assume that if there are multiple retrieval contexts or contexts, they are separated by semicolons. | ||
This can be adjusted using the `context_delimiter` and `retrieval_context_delimiter` parameters. | ||
""" | ||
examples, ground_truths = [], [] | ||
|
||
|
@@ -219,8 +281,8 @@ def add_from_csv( | |
"additional_metadata": ast.literal_eval(row["additional_metadata"]) if pd.notna(row["additional_metadata"]) else dict(), | ||
"tools_called": row["tools_called"].split(";") if pd.notna(row["tools_called"]) else [], | ||
"expected_tools": row["expected_tools"].split(";") if pd.notna(row["expected_tools"]) else [], | ||
"trace_id": row["trace_id"] if pd.notna(row["trace_id"]) else None | ||
} | ||
|
||
if row["example"]: | ||
data["name"] = row["name"] if pd.notna(row["name"]) else None | ||
# every Example has `input` and `actual_output` fields | ||
|
@@ -230,6 +292,7 @@ def add_from_csv( | |
else: | ||
raise ValueError("Every example must have an 'input' and 'actual_output' field.") | ||
else: | ||
# GroundTruthExample has `comments` and `source_file` fields | ||
data["comments"] = row["comments"] if pd.notna(row["comments"]) else None | ||
data["source_file"] = row["source_file"] if pd.notna(row["source_file"]) else None | ||
# every GroundTruthExample has `input` field | ||
|
@@ -281,7 +344,8 @@ def save_as(self, file_type: Literal["json", "csv"], dir_path: str, save_name: s | |
writer.writerow([ | ||
"input", "actual_output", "expected_output", "context", \ | ||
"retrieval_context", "additional_metadata", "tools_called", \ | ||
"expected_tools", "name", "comments", "source_file", "example" | ||
"expected_tools", "name", "comments", "source_file", "example", \ | ||
"trace_id" | ||
]) | ||
for e in self.examples: | ||
writer.writerow( | ||
|
@@ -298,6 +362,7 @@ def save_as(self, file_type: Literal["json", "csv"], dir_path: str, save_name: s | |
None, # Example does not have comments | ||
None, # Example does not have source file | ||
True, # Adding an Example | ||
e.trace_id | ||
] | ||
) | ||
|
||
|
@@ -316,6 +381,7 @@ def save_as(self, file_type: Literal["json", "csv"], dir_path: str, save_name: s | |
g.comments, | ||
g.source_file, | ||
False, # Adding a GroundTruthExample, not an Example | ||
g.trace_id | ||
] | ||
) | ||
else: | ||
|
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
input,actual_output,expected_output,context,retrieval_context,additional_metadata,tools_called,expected_tools,name,comments,source_file,example,trace_id | ||
test input,test output,expected output,context1;context2,retrieval1,{'key': 'value'},tool1,tool1;tool2,test example,,,True,123 | ||
test input,,expected output,context1,retrieval1,{'key': 'value'},tool1,tool1,,test comment,test.py,False,094121 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
{ | ||
"ground_truths": [ | ||
{ | ||
"input": "test input", | ||
"actual_output": null, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the difference between There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep |
||
"expected_output": "expected output", | ||
"context": [ | ||
"context1" | ||
], | ||
"retrieval_context": [ | ||
"retrieval1" | ||
], | ||
"additional_metadata": { | ||
"key": "value" | ||
}, | ||
"comments": "test comment", | ||
"tools_called": [ | ||
"tool1" | ||
], | ||
"expected_tools": [ | ||
"tool1" | ||
], | ||
"source_file": "test.py", | ||
"trace_id": "094121" | ||
} | ||
], | ||
"examples": [ | ||
{ | ||
"input": "test input", | ||
"actual_output": "test output", | ||
"expected_output": "expected output", | ||
"context": [ | ||
"context1", | ||
"context2" | ||
], | ||
"retrieval_context": [ | ||
"retrieval1" | ||
], | ||
"additional_metadata": { | ||
"key": "value" | ||
}, | ||
"tools_called": [ | ||
"tool1" | ||
], | ||
"expected_tools": [ | ||
"tool1", | ||
"tool2" | ||
], | ||
"name": "test example", | ||
"example_id": null, | ||
"timestamp": "20241230_160117", | ||
"trace_id": "123" | ||
} | ||
] | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What does ground truths mean in relation to the examples, I thought the examples has the ground truth (with the
actual_output
field)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks