Skip to content

Commit bc84c20

Browse files
committed
refactor(metadata_tracking_dashboard.py): update metadata file list and file open usage
Refactor metadata file handling by replacing sorted() with an explicit list that now includes version v8 and simplifying file opening syntax.
1 parent 8963928 commit bc84c20

File tree

1 file changed

+38
-34
lines changed

1 file changed

+38
-34
lines changed

dependencies/metadata/metadata_tracking_dashboard.py

Lines changed: 38 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,39 @@
11
import json
22
import os
3-
import plotly.express as px
4-
import plotly.graph_objects as go
3+
54
import pandas as pd
6-
from dash import Dash, dcc, html, dash_table
5+
import plotly.express as px
6+
from dash import Dash, dash_table, dcc, html
77

88
# Modify these paths as needed or dynamically discover them.
99
project_root = os.getenv("PROJECT_ROOT")
1010
if not project_root:
1111
raise ValueError
1212

13-
METADATA_FILES = sorted([
14-
os.path.join(project_root,"data/v0/v0_metadata.json"),
15-
os.path.join(project_root,"data/v1/v1_metadata.json"),
16-
os.path.join(project_root,"data/v2/v2_metadata.json"),
17-
os.path.join(project_root,"data/v3/v3_metadata.json"),
18-
os.path.join(project_root,"data/v4/v4_metadata.json"),
19-
os.path.join(project_root,"data/v5/v5_metadata.json"),
20-
os.path.join(project_root,"data/v5_1/v5_1_metadata.json"),
21-
os.path.join(project_root,"data/v5_2/v5_2_metadata.json"),
22-
os.path.join(project_root,"data/v6/v6_metadata.json"),
23-
os.path.join(project_root,"data/v7/v7_metadata.json"),
24-
os.path.join(project_root,"data/v9/v9_metadata.json"),
25-
os.path.join(project_root,"data/v10/v10_metadata.json"),
26-
os.path.join(project_root,"data/v11/v11_metadata.json"),
27-
os.path.join(project_root,"data/v12/v12_metadata.json"),
28-
os.path.join(project_root,"data/v13/v13_metadata.json"),
29-
])
13+
METADATA_FILES = [
14+
os.path.join(project_root, "data/v0/v0_metadata.json"),
15+
os.path.join(project_root, "data/v1/v1_metadata.json"),
16+
os.path.join(project_root, "data/v2/v2_metadata.json"),
17+
os.path.join(project_root, "data/v3/v3_metadata.json"),
18+
os.path.join(project_root, "data/v4/v4_metadata.json"),
19+
os.path.join(project_root, "data/v5/v5_metadata.json"),
20+
os.path.join(project_root, "data/v5_1/v5_1_metadata.json"),
21+
os.path.join(project_root, "data/v5_2/v5_2_metadata.json"),
22+
os.path.join(project_root, "data/v6/v6_metadata.json"),
23+
os.path.join(project_root, "data/v7/v7_metadata.json"),
24+
os.path.join(project_root, "data/v8/v8_metadata.json"),
25+
os.path.join(project_root, "data/v9/v9_metadata.json"),
26+
os.path.join(project_root, "data/v10/v10_metadata.json"),
27+
os.path.join(project_root, "data/v11/v11_metadata.json"),
28+
os.path.join(project_root, "data/v12/v12_metadata.json"),
29+
os.path.join(project_root, "data/v13/v13_metadata.json"),
30+
]
31+
3032

3133
def load_metadata(file_paths):
3234
records = []
3335
for p in file_paths:
34-
with open(p, "r") as f:
36+
with open(p) as f:
3537
meta = json.load(f)
3638
version = os.path.basename(p).split("_metadata.json")[0] # e.g. "v10"
3739
record = {
@@ -47,7 +49,8 @@ def load_metadata(file_paths):
4749
records.append(record)
4850
return pd.DataFrame(records)
4951

50-
df_main = load_metadata(METADATA_FILES).sort_values("version")
52+
53+
df_main = load_metadata(METADATA_FILES)
5154

5255
# Create figures
5356
fig_file_size = px.bar(
@@ -90,31 +93,32 @@ def load_metadata(file_paths):
9093
)
9194

9295
# Build detail about columns from each version
93-
# We'll flatten out each version's column metadata into a long DataFrame
9496
details_records = []
9597
for p in METADATA_FILES:
96-
with open(p, "r") as f:
98+
with open(p) as f:
9799
meta = json.load(f)
98100
version = os.path.basename(p).split("_metadata.json")[0]
99101
columns_meta = meta.get("columns", {})
100102
for col_name, col_info in columns_meta.items():
101-
details_records.append({
102-
"version": version,
103-
"column_name": col_name,
104-
"data_type": col_info.get("data_type"),
105-
"num_missing": col_info.get("num_missing"),
106-
"unique_values": col_info.get("unique_values"),
107-
"memory_usage_bytes": col_info.get("memory_usage_bytes"),
108-
})
103+
details_records.append(
104+
{
105+
"version": version,
106+
"column_name": col_name,
107+
"data_type": col_info.get("data_type"),
108+
"num_missing": col_info.get("num_missing"),
109+
"unique_values": col_info.get("unique_values"),
110+
"memory_usage_bytes": col_info.get("memory_usage_bytes"),
111+
}
112+
)
109113

110-
df_details = pd.DataFrame(details_records).sort_values(["version", "column_name"])
114+
df_details = pd.DataFrame(details_records)
111115

112116
fig_missing = px.box(
113117
df_details,
114118
x="version",
115119
y="num_missing",
116120
points="all",
117-
title="Distribution of 'num_missing' per Column by Version"
121+
title="Distribution of 'num_missing' per Column by Version",
118122
)
119123
fig_missing.update_layout(xaxis_title="Version", yaxis_title="num_missing")
120124

0 commit comments

Comments
 (0)