1
+ #!/usr/bin/env python3
2
+
3
+ import os
4
+ import re
5
+ import csv
6
+ import pandas as pd
7
+ from collections import defaultdict
8
+ import argparse
9
+
10
+ def normalize_action (action_input ):
11
+ """Extract the base action name from a full action string. Returns None if not a valid action format."""
12
+ if pd .isna (action_input ): # Handle NaN first
13
+ return None
14
+
15
+ action_str = str (action_input ).strip () # Convert to string and strip whitespace
16
+
17
+ if not action_str or action_str .lower () == 'none' : # Handle empty or "None" strings
18
+ return None
19
+
20
+ # Only accept verbs followed by '(' to identify real actions
21
+ match = re .match (r'^([a-zA-Z_][a-zA-Z0-9_]*)\s*\(' , action_str )
22
+ if match :
23
+ return match .group (1 ) # Return the matched group (the action verb)
24
+
25
+ return None # If no match (e.g., starts with number, or is not an action verb format)
26
+
27
+ def process_log_file (file_path ):
28
+ """Process a single task_stepwise_log.csv file and return action counts by shop."""
29
+ try :
30
+ df = pd .read_csv (file_path , sep = '\t ' )
31
+
32
+ # Make sure required columns exist
33
+ if 'shop_id' not in df .columns or 'action' not in df .columns :
34
+ print (f"Warning: Missing required columns in { file_path } " )
35
+ return {}
36
+
37
+ # Initialize counter dictionary
38
+ shop_action_counts = defaultdict (lambda : defaultdict (int ))
39
+
40
+ # Process each row
41
+ for _ , row in df .iterrows ():
42
+ action_input = row ['action' ] # Get the raw action value
43
+
44
+ normalized_action = normalize_action (action_input ) # Normalize it
45
+
46
+ # If normalize_action returns None, it means it's not a valid/countable action
47
+ if normalized_action is None :
48
+ continue
49
+
50
+ shop_id_val = row ['shop_id' ]
51
+ if pd .isna (shop_id_val ):
52
+ shop_id_str = 'none'
53
+ else :
54
+ shop_id_str = str (shop_id_val )
55
+
56
+ # Increment counter
57
+ shop_action_counts [shop_id_str ][normalized_action ] += 1
58
+
59
+ return shop_action_counts
60
+
61
+ except Exception as e :
62
+ print (f"Error processing { file_path } : { e } " )
63
+ return {}
64
+
65
+ def aggregate_statistics (study_dir ):
66
+ """
67
+ Aggregate action statistics from all task_stepwise_log.csv files in a study directory.
68
+
69
+ Args:
70
+ study_dir (str): Path to the study directory
71
+
72
+ Returns:
73
+ dict: Aggregated action counts by shop
74
+ """
75
+ # Counter for all actions by shop
76
+ all_shop_actions = defaultdict (lambda : defaultdict (int ))
77
+ all_actions = set ()
78
+ all_shops = set ()
79
+
80
+ # Walk through the directory and find all task_stepwise_log.csv files
81
+ log_files = []
82
+ for root , _ , files in os .walk (study_dir ):
83
+ for file in files :
84
+ if file == 'task_stepwise_log.csv' :
85
+ log_files .append (os .path .join (root , file ))
86
+
87
+ # Process each log file
88
+ for log_file in log_files :
89
+ shop_action_counts = process_log_file (log_file )
90
+
91
+ # Merge counts
92
+ for shop , actions in shop_action_counts .items ():
93
+ all_shops .add (shop )
94
+ for action , count in actions .items ():
95
+ all_actions .add (action )
96
+ all_shop_actions [shop ][action ] += count
97
+
98
+ return all_shop_actions , sorted (all_shops ), sorted (all_actions )
99
+
100
+ def save_results_to_csv (shop_action_counts , all_shops , all_actions , output_path ):
101
+ """Save the aggregated results to a CSV file."""
102
+ # all_actions should be clean by now due to changes in normalize_action and process_log_file
103
+ with open (output_path , 'w' , newline = '' ) as csvfile :
104
+ writer = csv .writer (csvfile )
105
+
106
+ # Write header
107
+ # Ensure all_actions is a list of unique, sorted strings
108
+ header = ['shop_id' ] + sorted (list (set (all_actions )))
109
+ writer .writerow (header )
110
+
111
+ # Write data for each shop
112
+ for shop in all_shops : # all_shops should already be sorted as per aggregate_statistics
113
+ row_data = [shop ]
114
+ for action_name in header [1 :]: # Iterate through the sorted action names in the header
115
+ row_data .append (shop_action_counts [shop ].get (action_name , 0 )) # Use .get for safety, though keys should exist
116
+ writer .writerow (row_data )
117
+
118
+ def process_study_directory (study_dir ):
119
+ """Process a single study directory and save results to it."""
120
+ print (f"Processing study directory: { study_dir } " )
121
+
122
+ # Define output path within the study directory
123
+ output_path = os .path .join (study_dir , 'action_statistics.csv' )
124
+
125
+ # Aggregate statistics
126
+ shop_action_counts , all_shops , all_actions = aggregate_statistics (study_dir )
127
+
128
+ if not all_shops or not all_actions :
129
+ print (f"No valid data found in log files for study: { os .path .basename (study_dir )} " )
130
+ return False
131
+
132
+ # Save results
133
+ save_results_to_csv (shop_action_counts , all_shops , all_actions , output_path )
134
+ print (f"Results saved to { output_path } " )
135
+ return True
136
+
137
+ def main ():
138
+ # Base directory containing all study results
139
+ base_dir = "../AgentLab/study_results"
140
+
141
+ if not os .path .isdir (base_dir ):
142
+ print (f"Error: Base directory { base_dir } does not exist" )
143
+ return 1
144
+
145
+ # Find all subdirectories in the base directory
146
+ study_dirs = [os .path .join (base_dir , d ) for d in os .listdir (base_dir )
147
+ if os .path .isdir (os .path .join (base_dir , d ))]
148
+
149
+ if not study_dirs :
150
+ print (f"No study directories found in { base_dir } " )
151
+ return 1
152
+
153
+ # Process each study directory
154
+ successful = 0
155
+ for study_dir in study_dirs :
156
+ if process_study_directory (study_dir ):
157
+ successful += 1
158
+
159
+ print (f"Completed processing { successful } out of { len (study_dirs )} study directories" )
160
+ return 0
161
+
162
+ if __name__ == "__main__" :
163
+ exit (main ())
0 commit comments