Skip to content

Commit c8ff363

Browse files
committed
Merge branch 'upgrade-trust-integration' into main-controller-in-docker
2 parents 1bdb54c + edab875 commit c8ff363

File tree

25 files changed

+7630
-415
lines changed

25 files changed

+7630
-415
lines changed

nebula/addons/trustworthiness/benchmarks/CPU_benchmarks_v4.csv

Lines changed: 3836 additions & 0 deletions
Large diffs are not rendered by default.

nebula/addons/trustworthiness/benchmarks/GPU_benchmarks_v7.csv

Lines changed: 2318 additions & 0 deletions
Large diffs are not rendered by default.

nebula/addons/trustworthiness/calculation.py

Lines changed: 22 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from scipy.stats import variation
1818
from torch import nn, optim
1919

20+
from nebula.addons.trustworthiness.utils import read_csv
21+
2022
dirname = os.path.dirname(__file__)
2123
logger = logging.getLogger(__name__)
2224

@@ -243,19 +245,17 @@ def get_global_privacy_risk(dp, epsilon, n):
243245
return 1
244246

245247

246-
def get_elapsed_time(scenario):
248+
def get_elapsed_time(start_time, end_time):
247249
"""
248250
Calculates the elapsed time during the execution of the scenario.
249251
250252
Args:
251-
scenario (object): Scenario required.
253+
start_time (datetime): Start datetime.
254+
end_time (datetime): End datetime.
252255
253256
Returns:
254257
float: The elapsed time.
255258
"""
256-
start_time = scenario[1]
257-
end_time = scenario[2]
258-
259259
start_date = datetime.strptime(start_time, "%d/%m/%Y %H:%M:%S")
260260
end_date = datetime.strptime(end_time, "%d/%m/%Y %H:%M:%S")
261261

@@ -287,7 +287,7 @@ def get_bytes_models(models_files):
287287
return avg_model_size
288288

289289

290-
def get_bytes_sent_recv(bytes_sent_files, bytes_recv_files):
290+
def get_bytes_sent_recv(scenario_name):
291291
"""
292292
Calculates the mean bytes sent and received of the nodes.
293293
@@ -300,29 +300,23 @@ def get_bytes_sent_recv(bytes_sent_files, bytes_recv_files):
300300
"""
301301
total_upload_bytes = 0
302302
total_download_bytes = 0
303-
number_files = len(bytes_sent_files)
304303

305-
for file_bytes_sent, file_bytes_recv in zip(bytes_sent_files, bytes_recv_files, strict=False):
306-
with open(file_bytes_sent) as f:
307-
bytes_sent = f.read()
304+
data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
308305

309-
with open(file_bytes_recv) as f:
310-
bytes_recv = f.read()
306+
data = read_csv(data_file)
311307

312-
total_upload_bytes += int(bytes_sent)
313-
total_download_bytes += int(bytes_recv)
308+
number_files = len(data)
314309

310+
total_upload_bytes = int(data["bytes_sent"].sum())
311+
total_download_bytes = int(data["bytes_recv"].sum())
312+
315313
avg_upload_bytes = total_upload_bytes / number_files
316314
avg_download_bytes = total_download_bytes / number_files
317-
return (
318-
total_upload_bytes,
319-
total_download_bytes,
320-
avg_upload_bytes,
321-
avg_download_bytes,
322-
)
323315

316+
return total_upload_bytes, total_download_bytes, avg_upload_bytes, avg_download_bytes
324317

325-
def get_avg_loss_accuracy(loss_files, accuracy_files):
318+
319+
def get_avg_loss_accuracy(scenario_name):
326320
"""
327321
Calculates the mean accuracy and loss models of the nodes.
328322
@@ -335,28 +329,22 @@ def get_avg_loss_accuracy(loss_files, accuracy_files):
335329
"""
336330
total_accuracy = 0
337331
total_loss = 0
338-
number_files = len(loss_files)
339-
accuracies = []
340332

341-
for file_loss, file_accuracy in zip(loss_files, accuracy_files, strict=False):
342-
with open(file_loss) as f:
343-
loss = f.read()
333+
data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
344334

345-
with open(file_accuracy) as f:
346-
accuracy = f.read()
335+
data = read_csv(data_file)
347336

348-
total_loss += float(loss)
349-
total_accuracy += float(accuracy)
350-
accuracies.append(float(accuracy))
337+
number_files = len(data)
351338

339+
total_loss = data["loss"].sum()
340+
total_accuracy = data["accuracy"].sum()
341+
352342
avg_loss = total_loss / number_files
353343
avg_accuracy = total_accuracy / number_files
354-
355-
std_accuracy = statistics.stdev(accuracies)
344+
std_accuracy = statistics.stdev(data["accuracy"])
356345

357346
return avg_loss, avg_accuracy, std_accuracy
358347

359-
360348
def get_feature_importance_cv(model, test_sample):
361349
"""
362350
Calculates the coefficient of variation of the feature importance.

nebula/addons/trustworthiness/factsheet.py

Lines changed: 68 additions & 136 deletions
Large diffs are not rendered by default.
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
from abc import ABC
2+
import logging
3+
import torch
4+
import os
5+
import pickle
6+
import lightning as pl
7+
from torchmetrics.classification import MulticlassAccuracy, MulticlassRecall, MulticlassPrecision, MulticlassF1Score, MulticlassConfusionMatrix
8+
from torchmetrics import MetricCollection
9+
import seaborn as sns
10+
import matplotlib.pyplot as plt
11+
import json
12+
import pandas as pd
13+
14+
from nebula.core.utils.nebulalogger_tensorboard import NebulaTensorBoardLogger
15+
16+
logging.basicConfig(level=logging.INFO)
17+
18+
class Graphics():
19+
def __init__(
20+
self,
21+
scenario_start_time,
22+
scenario_name
23+
):
24+
self.scenario_start_time = scenario_start_time
25+
self.scenario_name = scenario_name
26+
log_dir = os.path.join(os.environ["NEBULA_LOGS_DIR"], scenario_name)
27+
self.nebulalogger = NebulaTensorBoardLogger(scenario_start_time, f"{log_dir}", name="metrics", version=f"trust", log_graph=True)
28+
29+
def __log_figure(self, df, pillar, color, notion_y_pos = -0.4, figsize=(10,6)):
30+
filtered_df = df[df['Pillar'] == pillar].copy()
31+
32+
filtered_df.loc[:, 'Metric'] = filtered_df['Metric'].astype(str).str.replace('_', ' ')
33+
filtered_df.loc[:, 'Metric'] = filtered_df['Metric'].apply(lambda x: str(x).title())
34+
35+
filtered_df.loc[:, 'Notion'] = filtered_df['Notion'].astype(str).str.replace('_', ' ')
36+
filtered_df.loc[:, 'Notion'] = filtered_df['Notion'].apply(lambda x: str(x).title())
37+
38+
unique_notion_count = filtered_df['Notion'].nunique()
39+
palette = [color] * unique_notion_count
40+
41+
plt.figure(figsize=figsize)
42+
ax = sns.barplot(data=filtered_df, x='Metric', y='Metric Score', hue='Notion', palette=palette, dodge=False)
43+
44+
x_positions = range(len(filtered_df))
45+
46+
notion_scores = {}
47+
48+
for i in range(len(filtered_df)):
49+
row = filtered_df.iloc[i]
50+
notion = row['Notion']
51+
notion_score = row['Notion Score']
52+
metric_score = row['Metric Score']
53+
54+
if notion not in notion_scores:
55+
metrics_for_notion = filtered_df[filtered_df['Notion'] == notion]['Metric']
56+
start_pos = x_positions[i]
57+
end_pos = x_positions[i + len(metrics_for_notion) - 1]
58+
59+
notion_x_pos = (start_pos + end_pos) / 2
60+
ax.axhline(notion_score, ls='--', color='black', lw=0.5, xmin=start_pos/len(x_positions), xmax=(end_pos+1)/len(x_positions))
61+
ax.text(notion_x_pos, notion_score + 0.01, f"{notion_score:.2f}", ha='center', va='bottom', fontsize=10, color='black') # Color negro
62+
notion_scores[notion] = notion_score
63+
64+
ax.set_xticks(x_positions)
65+
ax.set_xticklabels(filtered_df['Metric'], rotation=45, ha='right', fontsize=10)
66+
67+
seen_notions = set()
68+
for i, (metric, notion) in enumerate(zip(filtered_df['Metric'], filtered_df['Notion'])):
69+
if notion not in seen_notions:
70+
metrics_for_notion = filtered_df[filtered_df['Notion'] == notion]['Metric']
71+
start_pos = x_positions[i]
72+
end_pos = x_positions[i + len(metrics_for_notion) - 1]
73+
74+
notion_x_pos = (start_pos + end_pos) / 2
75+
76+
ax.text(notion_x_pos, notion_y_pos, notion, ha='center', va='center', fontsize=10, color='black')
77+
78+
seen_notions.add(notion)
79+
80+
for i, v in enumerate(filtered_df['Metric Score']):
81+
ax.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=10, color='black')
82+
83+
plt.xlabel('Metrics and notions', labelpad=35)
84+
plt.ylabel('Score')
85+
plt.title(f'Metrics and notion scores for the {pillar} pillar')
86+
87+
ax.legend_.remove()
88+
89+
plt.tight_layout()
90+
91+
self.nebulalogger.log_figure(ax.get_figure(), 0, f"Trust/Pillar/{pillar}")
92+
plt.close()
93+
94+
def graphics(self):
95+
results_file = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), self.scenario_name, "trustworthiness", "nebula_trust_results.json")
96+
with open(results_file, 'r') as f:
97+
results = json.load(f)
98+
99+
pillars_list = []
100+
notion_names = []
101+
notion_scores = []
102+
metric_names = []
103+
metric_scores = []
104+
105+
for pillar in results["pillars"]:
106+
for key, value in pillar.items():
107+
pillar_name = key
108+
if "notions" in value:
109+
for notion in value["notions"]:
110+
for notion_key, notion_value in notion.items():
111+
notion_name = notion_key
112+
notion_score = notion_value["score"]
113+
for metric in notion_value["metrics"]:
114+
for metric_key, metric_value in metric.items():
115+
metric_name = metric_key
116+
metric_score = metric_value["score"]
117+
118+
pillars_list.append(pillar_name)
119+
notion_names.append(notion_name)
120+
notion_scores.append(notion_score)
121+
metric_names.append(metric_name)
122+
metric_scores.append(metric_score)
123+
124+
df = pd.DataFrame({
125+
"Pillar": pillars_list,
126+
"Notion": notion_names,
127+
"Notion Score": notion_scores,
128+
"Metric": metric_names,
129+
"Metric Score": metric_scores
130+
})
131+
132+
self.__log_figure(df, 'robustness', "#F8D3DF")
133+
self.__log_figure(df, "privacy", "#DA8D8B", -0.2)
134+
self.__log_figure(df, "fairness", "#DDDDDD")
135+
self.__log_figure(df, "explainability", "#FCEFC3")
136+
self.__log_figure(df, "accountability", "#8FAADC", -0.3)
137+
self.__log_figure(df, "architectural_soundness", "#DBB9FA", -0.3)
138+
self.__log_figure(df, "sustainability", "#BBFDAF", -0.5, figsize=(12,8))
139+
140+
categories = [
141+
"robustness",
142+
"privacy",
143+
"fairness",
144+
"explainability",
145+
"accountability",
146+
"architectural_soundness",
147+
"sustainability"
148+
]
149+
150+
scores = [results["pillars"][i][category]["score"] for i, category in enumerate(categories)]
151+
152+
trust_score = results["trust_score"]
153+
categories.append("trust_score")
154+
scores.append(trust_score)
155+
156+
palette = ["#F8D3DF", "#DA8D8B", "#DDDDDD", "#FCEFC3", "#8FAADC", "#DBB9FA", "#BBFDAF", "#BF9000"]
157+
158+
plt.figure(figsize=(10, 8))
159+
ax = sns.barplot(x=categories, y=scores, palette=palette, hue=categories, legend=False)
160+
ax.set_xlabel("Pillar")
161+
ax.set_ylabel("Score")
162+
ax.set_title("Pillars and trust scores")
163+
164+
for i, v in enumerate(scores):
165+
ax.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=10)
166+
167+
name_labels = [
168+
"Robustness",
169+
"Privacy",
170+
"Fairness",
171+
"Explainability",
172+
"Accountability",
173+
"Architectural Soundness",
174+
"Sustainability",
175+
"Trust Score"
176+
]
177+
178+
ax.set_xticks(range(len(categories)))
179+
ax.set_xticklabels(name_labels, rotation=45)
180+
181+
self.nebulalogger.log_figure(ax.get_figure(), 0, f"Trust/AllPillars")
182+
plt.close()

nebula/addons/trustworthiness/metric.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import logging
33
import os
44

5+
from nebula.addons.trustworthiness.graphics import Graphics
56
from nebula.addons.trustworthiness.pillar import TrustPillar
67
from nebula.addons.trustworthiness.utils import write_results_json
78

@@ -15,12 +16,13 @@ class TrustMetricManager:
1516
Manager class to help store the output directory and handle calls from the FL framework.
1617
"""
1718

18-
def __init__(self):
19+
def __init__(self, scenario_start_time):
1920
self.factsheet_file_nm = "factsheet.json"
2021
self.eval_metrics_file_nm = "eval_metrics.json"
2122
self.nebula_trust_results_nm = "nebula_trust_results.json"
23+
self.scenario_start_time = scenario_start_time
2224

23-
def evaluate(self, scenario, weights, use_weights=False):
25+
def evaluate(self, experiment_name, weights, use_weights=False):
2426
"""
2527
Evaluates the trustworthiness score.
2628
@@ -30,10 +32,10 @@ def evaluate(self, scenario, weights, use_weights=False):
3032
use_weights (bool): True to turn on the weights in the metric config file, default to False.
3133
"""
3234
# Get scenario name
33-
scenario_name = scenario[0]
34-
factsheet_file = os.path.join(dirname, f"files/{scenario_name}/{self.factsheet_file_nm}")
35-
metrics_cfg_file = os.path.join(dirname, f"configs/{self.eval_metrics_file_nm}")
36-
results_file = os.path.join(dirname, f"files/{scenario_name}/{self.nebula_trust_results_nm}")
35+
scenario_name = experiment_name
36+
factsheet_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", self.factsheet_file_nm)
37+
metrics_cfg_file = os.path.join(dirname, "configs", self.eval_metrics_file_nm)
38+
results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", self.nebula_trust_results_nm)
3739

3840
if not os.path.exists(factsheet_file):
3941
logger.error(f"{factsheet_file} is missing! Please check documentation.")
@@ -43,7 +45,7 @@ def evaluate(self, scenario, weights, use_weights=False):
4345
logger.error(f"{metrics_cfg_file} is missing! Please check documentation.")
4446
return
4547

46-
with open(factsheet_file) as f, open(metrics_cfg_file) as m:
48+
with open(factsheet_file, "r") as f, open(metrics_cfg_file, "r") as m:
4749
factsheet = json.load(f)
4850
metrics_cfg = json.load(m)
4951
metrics = metrics_cfg.items()
@@ -55,10 +57,13 @@ def evaluate(self, scenario, weights, use_weights=False):
5557
for key, value in metrics:
5658
pillar = TrustPillar(key, value, input_docs, use_weights)
5759
score, result = pillar.evaluate()
58-
weight = weights.get(key)
60+
weight = weights.get(key) / 100
5961
final_score += weight * score
6062
result_print.append([key, score])
6163
result_json["pillars"].append(result)
6264
final_score = round(final_score, 2)
6365
result_json["trust_score"] = final_score
6466
write_results_json(results_file, result_json)
67+
68+
graphics = Graphics(self.scenario_start_time, scenario_name)
69+
graphics.graphics()

0 commit comments

Comments
 (0)