Skip to content

Commit 0fb99fc

Browse files
committed
performance analysis improvements
1 parent 6fc8418 commit 0fb99fc

File tree

1 file changed

+50
-16
lines changed

1 file changed

+50
-16
lines changed

performance/performance_analysis.py

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@
66
import pandas as pd
77

88

9-
DEFAULT_INPUT = Path("../performance_log/performance_metrics.csv")
10-
DEFAULT_OUTPUT = Path("total_overhead_summary.csv")
9+
# Default paths (modify as needed from CLI with -i / -o)
10+
DEFAULT_INPUT = Path("../performance_log/performance_metrics_v5.csv")
11+
DEFAULT_OUTPUT = Path("overhead_summary.csv")
1112

1213

1314
def parse_and_average(cell: str | float | int | pd.NA) -> float | np.float64:
1415
"""
1516
Convert a semicolon-separated list of numbers (seconds) to the mean
16-
in **milliseconds**.
17+
in **milliseconds**. If the cell is empty or NaN → np.nan.
1718
"""
1819
if pd.isna(cell):
1920
return np.nan
@@ -25,9 +26,9 @@ def parse_and_average(cell: str | float | int | pd.NA) -> float | np.float64:
2526

2627

2728
def build_parser() -> argparse.ArgumentParser:
28-
""" Builds the command-line argument parser for the script."""
29+
"""Builds the command-line argument parser for the script."""
2930
parser = argparse.ArgumentParser(
30-
description="Analyse PerformanceMonitor CSV and create a summary.")
31+
description="Analyse PerformanceMonitor CSV and create an overhead summary.")
3132
parser.add_argument(
3233
"-i", "--input", type=Path, default=DEFAULT_INPUT,
3334
help=f"Path to the raw PerformanceMonitor CSV (default: {DEFAULT_INPUT})")
@@ -40,23 +41,56 @@ def build_parser() -> argparse.ArgumentParser:
4041
def main() -> None:
4142
args = build_parser().parse_args()
4243
df = pd.read_csv(args.input)
43-
df.columns = df.columns.str.strip() # remove any whitespace in headers
44+
df.columns = df.columns.str.strip() # Remove spaces from column names
4445

45-
required_cols = {"Client Protocol", "Simulation Type", "Total Overheads"}
46+
# Check that required columns exist
47+
required_cols = {
48+
"Client Protocol",
49+
"Simulation Type",
50+
"Input Overhead",
51+
"Output Overheads",
52+
"Total Overheads",
53+
}
4654
missing = required_cols - set(df.columns)
4755
if missing:
48-
raise KeyError(f"Missing columns in CSV: {', '.join(missing)}")
56+
raise KeyError(f"Missing columns in CSV: {', '.join(sorted(missing))}")
4957

50-
# Compute mean Total Overhead per operation
51-
df["Avg Total Overhead"] = df["Total Overheads"].apply(parse_and_average)
58+
# Convert the three overhead columns to milliseconds (average per row)
59+
df["Avg Input Overhead"] = df["Input Overhead"].apply(parse_and_average)
60+
df["Avg Output Overhead"] = df["Output Overheads"].apply(parse_and_average)
61+
df["Avg Total Overhead"] = df["Total Overheads"].apply(parse_and_average)
5262

53-
# Group and aggregate statistics
63+
# Group by Client Protocol + Simulation Type and calculate statistics
5464
groups = df.groupby(["Client Protocol", "Simulation Type"])
55-
summary = groups["Avg Total Overhead"].agg(
56-
Median="median",
57-
StdDev="std",
58-
Pct5=lambda x: np.percentile(x.dropna(), 5),
59-
Pct95=lambda x: np.percentile(x.dropna(), 95),
65+
66+
summary = groups.agg(
67+
# Input Overhead
68+
Input_Median = pd.NamedAgg(
69+
column="Avg Input Overhead", aggfunc="median"),
70+
Input_StdDev = pd.NamedAgg(
71+
column="Avg Input Overhead", aggfunc="std"),
72+
Input_Pct5 = pd.NamedAgg(
73+
column="Avg Input Overhead", aggfunc=lambda x: np.percentile(x.dropna(), 5)),
74+
Input_Pct95 = pd.NamedAgg(
75+
column="Avg Input Overhead", aggfunc=lambda x: np.percentile(x.dropna(), 95)),
76+
# Output Overhead
77+
Output_Median = pd.NamedAgg(
78+
column="Avg Output Overhead", aggfunc="median"),
79+
Output_StdDev = pd.NamedAgg(
80+
column="Avg Output Overhead", aggfunc="std"),
81+
Output_Pct5 = pd.NamedAgg(
82+
column="Avg Output Overhead", aggfunc=lambda x: np.percentile(x.dropna(), 5)),
83+
Output_Pct95 = pd.NamedAgg(
84+
column="Avg Output Overhead", aggfunc=lambda x: np.percentile(x.dropna(), 95)),
85+
# Total Overhead
86+
Total_Median = pd.NamedAgg(
87+
column="Avg Total Overhead", aggfunc="median"),
88+
Total_StdDev = pd.NamedAgg(
89+
column="Avg Total Overhead", aggfunc="std"),
90+
Total_Pct5 = pd.NamedAgg(
91+
column="Avg Total Overhead", aggfunc=lambda x: np.percentile(x.dropna(), 5)),
92+
Total_Pct95 = pd.NamedAgg(
93+
column="Avg Total Overhead", aggfunc=lambda x: np.percentile(x.dropna(), 95)),
6094
).reset_index()
6195

6296
summary.to_csv(args.output, index=False)

0 commit comments

Comments
 (0)