@@ -90,15 +90,23 @@ def test_pull_all_user_dataset_stats(self, client: JudgmentClient):
90
90
91
91
def test_edit_dataset (self , client : JudgmentClient ):
92
92
"""Test dataset editing"""
93
- dataset : EvalDataset = client .create_dataset ()
94
- dataset .add_example (Example (input = "input 1" , actual_output = "output 1" ))
95
- dataset .add_example (Example (input = "input 2" , actual_output = "output 2" ))
96
- dataset .add_ground_truth (GroundTruthExample (input = "input 1" , actual_output = "output 1" ))
97
- dataset .add_ground_truth (GroundTruthExample (input = "input 2" , actual_output = "output 2" ))
98
- client .push_dataset (alias = "test_dataset_6" , dataset = dataset , overwrite = False )
93
+ dataset = client .pull_dataset (alias = "test_dataset_7" )
94
+ assert dataset , "Failed to pull dataset"
99
95
100
-
101
-
96
+ initial_example_count = len (dataset .examples )
97
+ initial_ground_truth_count = len (dataset .ground_truths )
98
+
99
+ client .edit_dataset (
100
+ alias = "test_dataset_7" ,
101
+ examples = [Example (input = "input 3" , actual_output = "output 3" )],
102
+ ground_truths = [GroundTruthExample (input = "input 3" , actual_output = "output 3" )]
103
+ )
104
+ dataset = client .pull_dataset (alias = "test_dataset_7" )
105
+ assert dataset , "Failed to pull dataset"
106
+ assert len (dataset .examples ) == initial_example_count + 1 , \
107
+ f"Dataset should have { initial_example_count + 1 } examples, but has { len (dataset .examples )} "
108
+ assert len (dataset .ground_truths ) == initial_ground_truth_count + 1 , \
109
+ f"Dataset should have { initial_ground_truth_count + 1 } ground truths, but has { len (dataset .ground_truths )} "
102
110
103
111
104
112
def run_eval_helper (self , client : JudgmentClient , project_name : str , eval_run_name : str ):
0 commit comments