6
6
import pandas as pd
7
7
8
8
9
- DEFAULT_INPUT = Path ("../performance_log/performance_metrics.csv" )
10
- DEFAULT_OUTPUT = Path ("total_overhead_summary.csv" )
9
+ # Default paths (modify as needed from CLI with -i / -o)
10
+ DEFAULT_INPUT = Path ("../performance_log/performance_metrics_v5.csv" )
11
+ DEFAULT_OUTPUT = Path ("overhead_summary.csv" )
11
12
12
13
13
14
def parse_and_average (cell : str | float | int | pd .NA ) -> float | np .float64 :
14
15
"""
15
16
Convert a semicolon-separated list of numbers (seconds) to the mean
16
- in **milliseconds**.
17
+ in **milliseconds**. If the cell is empty or NaN → np.nan.
17
18
"""
18
19
if pd .isna (cell ):
19
20
return np .nan
@@ -25,9 +26,9 @@ def parse_and_average(cell: str | float | int | pd.NA) -> float | np.float64:
25
26
26
27
27
28
def build_parser () -> argparse .ArgumentParser :
28
- """ Builds the command-line argument parser for the script."""
29
+ """Builds the command-line argument parser for the script."""
29
30
parser = argparse .ArgumentParser (
30
- description = "Analyse PerformanceMonitor CSV and create a summary." )
31
+ description = "Analyse PerformanceMonitor CSV and create an overhead summary." )
31
32
parser .add_argument (
32
33
"-i" , "--input" , type = Path , default = DEFAULT_INPUT ,
33
34
help = f"Path to the raw PerformanceMonitor CSV (default: { DEFAULT_INPUT } )" )
@@ -40,23 +41,56 @@ def build_parser() -> argparse.ArgumentParser:
40
41
def main () -> None :
41
42
args = build_parser ().parse_args ()
42
43
df = pd .read_csv (args .input )
43
- df .columns = df .columns .str .strip () # remove any whitespace in headers
44
+ df .columns = df .columns .str .strip () # Remove spaces from column names
44
45
45
- required_cols = {"Client Protocol" , "Simulation Type" , "Total Overheads" }
46
+ # Check that required columns exist
47
+ required_cols = {
48
+ "Client Protocol" ,
49
+ "Simulation Type" ,
50
+ "Input Overhead" ,
51
+ "Output Overheads" ,
52
+ "Total Overheads" ,
53
+ }
46
54
missing = required_cols - set (df .columns )
47
55
if missing :
48
- raise KeyError (f"Missing columns in CSV: { ', ' .join (missing )} " )
56
+ raise KeyError (f"Missing columns in CSV: { ', ' .join (sorted ( missing ) )} " )
49
57
50
- # Compute mean Total Overhead per operation
51
- df ["Avg Total Overhead" ] = df ["Total Overheads" ].apply (parse_and_average )
58
+ # Convert the three overhead columns to milliseconds (average per row)
59
+ df ["Avg Input Overhead" ] = df ["Input Overhead" ].apply (parse_and_average )
60
+ df ["Avg Output Overhead" ] = df ["Output Overheads" ].apply (parse_and_average )
61
+ df ["Avg Total Overhead" ] = df ["Total Overheads" ].apply (parse_and_average )
52
62
53
- # Group and aggregate statistics
63
+ # Group by Client Protocol + Simulation Type and calculate statistics
54
64
groups = df .groupby (["Client Protocol" , "Simulation Type" ])
55
- summary = groups ["Avg Total Overhead" ].agg (
56
- Median = "median" ,
57
- StdDev = "std" ,
58
- Pct5 = lambda x : np .percentile (x .dropna (), 5 ),
59
- Pct95 = lambda x : np .percentile (x .dropna (), 95 ),
65
+
66
+ summary = groups .agg (
67
+ # Input Overhead
68
+ Input_Median = pd .NamedAgg (
69
+ column = "Avg Input Overhead" , aggfunc = "median" ),
70
+ Input_StdDev = pd .NamedAgg (
71
+ column = "Avg Input Overhead" , aggfunc = "std" ),
72
+ Input_Pct5 = pd .NamedAgg (
73
+ column = "Avg Input Overhead" , aggfunc = lambda x : np .percentile (x .dropna (), 5 )),
74
+ Input_Pct95 = pd .NamedAgg (
75
+ column = "Avg Input Overhead" , aggfunc = lambda x : np .percentile (x .dropna (), 95 )),
76
+ # Output Overhead
77
+ Output_Median = pd .NamedAgg (
78
+ column = "Avg Output Overhead" , aggfunc = "median" ),
79
+ Output_StdDev = pd .NamedAgg (
80
+ column = "Avg Output Overhead" , aggfunc = "std" ),
81
+ Output_Pct5 = pd .NamedAgg (
82
+ column = "Avg Output Overhead" , aggfunc = lambda x : np .percentile (x .dropna (), 5 )),
83
+ Output_Pct95 = pd .NamedAgg (
84
+ column = "Avg Output Overhead" , aggfunc = lambda x : np .percentile (x .dropna (), 95 )),
85
+ # Total Overhead
86
+ Total_Median = pd .NamedAgg (
87
+ column = "Avg Total Overhead" , aggfunc = "median" ),
88
+ Total_StdDev = pd .NamedAgg (
89
+ column = "Avg Total Overhead" , aggfunc = "std" ),
90
+ Total_Pct5 = pd .NamedAgg (
91
+ column = "Avg Total Overhead" , aggfunc = lambda x : np .percentile (x .dropna (), 5 )),
92
+ Total_Pct95 = pd .NamedAgg (
93
+ column = "Avg Total Overhead" , aggfunc = lambda x : np .percentile (x .dropna (), 95 )),
60
94
).reset_index ()
61
95
62
96
summary .to_csv (args .output , index = False )
0 commit comments