CyberDataLab
diff --git a/‎nebula/addons/trustworthiness/benchmarks/CPU_benchmarks_v4.csv
Lines changed: 3836 additions & 0 deletions b/‎nebula/addons/trustworthiness/benchmarks/CPU_benchmarks_v4.csv
Lines changed: 3836 additions & 0 deletions
diff --git a/‎nebula/addons/trustworthiness/benchmarks/GPU_benchmarks_v7.csv
Lines changed: 2318 additions & 0 deletions b/‎nebula/addons/trustworthiness/benchmarks/GPU_benchmarks_v7.csv
Lines changed: 2318 additions & 0 deletions
diff --git a/‎nebula/addons/trustworthiness/calculation.py
Lines changed: 22 additions & 34 deletions b/‎nebula/addons/trustworthiness/calculation.py
Lines changed: 22 additions & 34 deletions
diff --git a/‎nebula/addons/trustworthiness/factsheet.py
Lines changed: 68 additions & 136 deletions b/‎nebula/addons/trustworthiness/factsheet.py
Lines changed: 68 additions & 136 deletions
diff --git a/‎nebula/addons/trustworthiness/graphics.py
Lines changed: 182 additions & 0 deletions b/‎nebula/addons/trustworthiness/graphics.py
Lines changed: 182 additions & 0 deletions
diff --git a/‎nebula/addons/trustworthiness/metric.py
Lines changed: 13 additions & 8 deletions b/‎nebula/addons/trustworthiness/metric.py
Lines changed: 13 additions & 8 deletions
@@ -17,6 +17,8 @@
 from scipy.stats import variation
 from torch import nn, optim
 
+from nebula.addons.trustworthiness.utils import read_csv
+
 dirname = os.path.dirname(__file__)
 logger = logging.getLogger(__name__)
 
@@ -243,19 +245,17 @@ def get_global_privacy_risk(dp, epsilon, n):
         return 1
 
 
-def get_elapsed_time(scenario):
+def get_elapsed_time(start_time, end_time):
     """
     Calculates the elapsed time during the execution of the scenario.
 
     Args:
-        scenario (object): Scenario required.
+        start_time (datetime): Start datetime.
+        end_time (datetime): End datetime.
 
     Returns:
         float: The elapsed time.
     """
-    start_time = scenario[1]
-    end_time = scenario[2]
-
     start_date = datetime.strptime(start_time, "%d/%m/%Y %H:%M:%S")
     end_date = datetime.strptime(end_time, "%d/%m/%Y %H:%M:%S")
 
@@ -287,7 +287,7 @@ def get_bytes_models(models_files):
     return avg_model_size
 
 
-def get_bytes_sent_recv(bytes_sent_files, bytes_recv_files):
+def get_bytes_sent_recv(scenario_name):
     """
     Calculates the mean bytes sent and received of the nodes.
 
@@ -300,29 +300,23 @@ def get_bytes_sent_recv(bytes_sent_files, bytes_recv_files):
     """
     total_upload_bytes = 0
     total_download_bytes = 0
-    number_files = len(bytes_sent_files)
 
-    for file_bytes_sent, file_bytes_recv in zip(bytes_sent_files, bytes_recv_files, strict=False):
-        with open(file_bytes_sent) as f:
-            bytes_sent = f.read()
+    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
 
-        with open(file_bytes_recv) as f:
-            bytes_recv = f.read()
+    data = read_csv(data_file)
 
-        total_upload_bytes += int(bytes_sent)
-        total_download_bytes += int(bytes_recv)
+    number_files = len(data)
 
+    total_upload_bytes = int(data["bytes_sent"].sum())
+    total_download_bytes = int(data["bytes_recv"].sum())
+    
     avg_upload_bytes = total_upload_bytes / number_files
     avg_download_bytes = total_download_bytes / number_files
-    return (
-        total_upload_bytes,
-        total_download_bytes,
-        avg_upload_bytes,
-        avg_download_bytes,
-    )
 
+    return total_upload_bytes, total_download_bytes, avg_upload_bytes, avg_download_bytes
 
-def get_avg_loss_accuracy(loss_files, accuracy_files):
+
+def get_avg_loss_accuracy(scenario_name):
     """
     Calculates the mean accuracy and loss models of the nodes.
 
@@ -335,28 +329,22 @@ def get_avg_loss_accuracy(loss_files, accuracy_files):
     """
     total_accuracy = 0
     total_loss = 0
-    number_files = len(loss_files)
-    accuracies = []
 
-    for file_loss, file_accuracy in zip(loss_files, accuracy_files, strict=False):
-        with open(file_loss) as f:
-            loss = f.read()
+    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
 
-        with open(file_accuracy) as f:
-            accuracy = f.read()
+    data = read_csv(data_file)
 
-        total_loss += float(loss)
-        total_accuracy += float(accuracy)
-        accuracies.append(float(accuracy))
+    number_files = len(data)
 
+    total_loss = data["loss"].sum()
+    total_accuracy = data["accuracy"].sum()
+    
     avg_loss = total_loss / number_files
     avg_accuracy = total_accuracy / number_files
-
-    std_accuracy = statistics.stdev(accuracies)
+    std_accuracy = statistics.stdev(data["accuracy"])
 
     return avg_loss, avg_accuracy, std_accuracy
 
-
 def get_feature_importance_cv(model, test_sample):
     """
     Calculates the coefficient of variation of the feature importance.
 
@@ -0,0 +1,182 @@
+from abc import ABC
+import logging
+import torch
+import os
+import pickle
+import lightning as pl
+from torchmetrics.classification import MulticlassAccuracy, MulticlassRecall, MulticlassPrecision, MulticlassF1Score, MulticlassConfusionMatrix
+from torchmetrics import MetricCollection
+import seaborn as sns
+import matplotlib.pyplot as plt
+import json
+import pandas as pd
+
+from nebula.core.utils.nebulalogger_tensorboard import NebulaTensorBoardLogger
+
+logging.basicConfig(level=logging.INFO)
+
+class Graphics():
+    def __init__(
+        self,
+        scenario_start_time,
+        scenario_name
+    ):
+        self.scenario_start_time = scenario_start_time
+        self.scenario_name = scenario_name
+        log_dir = os.path.join(os.environ["NEBULA_LOGS_DIR"], scenario_name)
+        self.nebulalogger = NebulaTensorBoardLogger(scenario_start_time, f"{log_dir}", name="metrics", version=f"trust", log_graph=True)
+        
+    def __log_figure(self, df, pillar, color, notion_y_pos = -0.4, figsize=(10,6)):
+        filtered_df = df[df['Pillar'] == pillar].copy()
+
+        filtered_df.loc[:, 'Metric'] = filtered_df['Metric'].astype(str).str.replace('_', ' ')
+        filtered_df.loc[:, 'Metric'] = filtered_df['Metric'].apply(lambda x: str(x).title())
+
+        filtered_df.loc[:, 'Notion'] = filtered_df['Notion'].astype(str).str.replace('_', ' ')
+        filtered_df.loc[:, 'Notion'] = filtered_df['Notion'].apply(lambda x: str(x).title())
+
+        unique_notion_count = filtered_df['Notion'].nunique()
+        palette = [color] * unique_notion_count 
+
+        plt.figure(figsize=figsize)
+        ax = sns.barplot(data=filtered_df, x='Metric', y='Metric Score', hue='Notion', palette=palette, dodge=False)
+
+        x_positions = range(len(filtered_df))
+
+        notion_scores = {}
+
+        for i in range(len(filtered_df)):
+            row = filtered_df.iloc[i]
+            notion = row['Notion']
+            notion_score = row['Notion Score']
+            metric_score = row['Metric Score']
+            
+            if notion not in notion_scores:
+                metrics_for_notion = filtered_df[filtered_df['Notion'] == notion]['Metric']
+                start_pos = x_positions[i]
+                end_pos = x_positions[i + len(metrics_for_notion) - 1]
+                
+                notion_x_pos = (start_pos + end_pos) / 2
+                ax.axhline(notion_score, ls='--', color='black', lw=0.5, xmin=start_pos/len(x_positions), xmax=(end_pos+1)/len(x_positions))
+                ax.text(notion_x_pos, notion_score + 0.01, f"{notion_score:.2f}", ha='center', va='bottom', fontsize=10, color='black')  # Color negro
+                notion_scores[notion] = notion_score
+
+        ax.set_xticks(x_positions)
+        ax.set_xticklabels(filtered_df['Metric'], rotation=45, ha='right', fontsize=10)
+
+        seen_notions = set()
+        for i, (metric, notion) in enumerate(zip(filtered_df['Metric'], filtered_df['Notion'])):
+            if notion not in seen_notions:
+                metrics_for_notion = filtered_df[filtered_df['Notion'] == notion]['Metric']
+                start_pos = x_positions[i]
+                end_pos = x_positions[i + len(metrics_for_notion) - 1]
+                
+                notion_x_pos = (start_pos + end_pos) / 2
+                
+                ax.text(notion_x_pos, notion_y_pos, notion, ha='center', va='center', fontsize=10, color='black') 
+                
+                seen_notions.add(notion)  
+
+        for i, v in enumerate(filtered_df['Metric Score']):
+            ax.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=10, color='black') 
+
+        plt.xlabel('Metrics and notions', labelpad=35)
+        plt.ylabel('Score')
+        plt.title(f'Metrics and notion scores for the {pillar} pillar')
+
+        ax.legend_.remove()
+
+        plt.tight_layout()
+        
+        self.nebulalogger.log_figure(ax.get_figure(), 0, f"Trust/Pillar/{pillar}")
+        plt.close()
+
+    def graphics(self):
+        results_file = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), self.scenario_name, "trustworthiness", "nebula_trust_results.json")
+        with open(results_file, 'r') as f:
+            results = json.load(f)
+
+        pillars_list = []
+        notion_names = []
+        notion_scores = []
+        metric_names = []
+        metric_scores = []
+
+        for pillar in results["pillars"]:
+            for key, value in pillar.items():
+                pillar_name = key
+                if "notions" in value:
+                    for notion in value["notions"]:
+                        for notion_key, notion_value in notion.items():
+                            notion_name = notion_key
+                            notion_score = notion_value["score"]
+                            for metric in notion_value["metrics"]:
+                                for metric_key, metric_value in metric.items():
+                                    metric_name = metric_key
+                                    metric_score = metric_value["score"]
+
+                                    pillars_list.append(pillar_name)
+                                    notion_names.append(notion_name)
+                                    notion_scores.append(notion_score)
+                                    metric_names.append(metric_name)
+                                    metric_scores.append(metric_score)
+
+        df = pd.DataFrame({
+            "Pillar": pillars_list,
+            "Notion": notion_names,
+            "Notion Score": notion_scores,
+            "Metric": metric_names,
+            "Metric Score": metric_scores
+        })
+
+        self.__log_figure(df, 'robustness', "#F8D3DF")
+        self.__log_figure(df, "privacy", "#DA8D8B", -0.2)
+        self.__log_figure(df, "fairness", "#DDDDDD")
+        self.__log_figure(df, "explainability", "#FCEFC3")
+        self.__log_figure(df, "accountability", "#8FAADC", -0.3)
+        self.__log_figure(df, "architectural_soundness", "#DBB9FA", -0.3)
+        self.__log_figure(df, "sustainability", "#BBFDAF", -0.5, figsize=(12,8))
+
+        categories = [
+            "robustness",
+            "privacy",
+            "fairness",
+            "explainability",
+            "accountability",
+            "architectural_soundness",
+            "sustainability"
+        ]
+
+        scores = [results["pillars"][i][category]["score"] for i, category in enumerate(categories)]
+
+        trust_score = results["trust_score"]
+        categories.append("trust_score")
+        scores.append(trust_score)
+
+        palette = ["#F8D3DF", "#DA8D8B", "#DDDDDD", "#FCEFC3", "#8FAADC", "#DBB9FA", "#BBFDAF", "#BF9000"]
+
+        plt.figure(figsize=(10, 8))
+        ax = sns.barplot(x=categories, y=scores, palette=palette, hue=categories, legend=False)
+        ax.set_xlabel("Pillar")
+        ax.set_ylabel("Score")
+        ax.set_title("Pillars and trust scores")
+
+        for i, v in enumerate(scores):
+            ax.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=10)
+
+        name_labels = [
+            "Robustness",
+            "Privacy",
+            "Fairness",
+            "Explainability",
+            "Accountability",
+            "Architectural Soundness",
+            "Sustainability",
+            "Trust Score"
+        ]
+
+        ax.set_xticks(range(len(categories)))
+        ax.set_xticklabels(name_labels, rotation=45)
+
+        self.nebulalogger.log_figure(ax.get_figure(), 0, f"Trust/AllPillars")
+        plt.close()
@@ -2,6 +2,7 @@
 import logging
 import os
 
+from nebula.addons.trustworthiness.graphics import Graphics
 from nebula.addons.trustworthiness.pillar import TrustPillar
 from nebula.addons.trustworthiness.utils import write_results_json
 
@@ -15,12 +16,13 @@ class TrustMetricManager:
     Manager class to help store the output directory and handle calls from the FL framework.
     """
 
-    def __init__(self):
+    def __init__(self, scenario_start_time):
         self.factsheet_file_nm = "factsheet.json"
         self.eval_metrics_file_nm = "eval_metrics.json"
         self.nebula_trust_results_nm = "nebula_trust_results.json"
+        self.scenario_start_time = scenario_start_time
 
-    def evaluate(self, scenario, weights, use_weights=False):
+    def evaluate(self, experiment_name, weights, use_weights=False):
         """
         Evaluates the trustworthiness score.
 
@@ -30,10 +32,10 @@ def evaluate(self, scenario, weights, use_weights=False):
             use_weights (bool): True to turn on the weights in the metric config file, default to False.
         """
         # Get scenario name
-        scenario_name = scenario[0]
-        factsheet_file = os.path.join(dirname, f"files/{scenario_name}/{self.factsheet_file_nm}")
-        metrics_cfg_file = os.path.join(dirname, f"configs/{self.eval_metrics_file_nm}")
-        results_file = os.path.join(dirname, f"files/{scenario_name}/{self.nebula_trust_results_nm}")
+        scenario_name = experiment_name
+        factsheet_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", self.factsheet_file_nm)
+        metrics_cfg_file = os.path.join(dirname, "configs", self.eval_metrics_file_nm)
+        results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", self.nebula_trust_results_nm)
 
         if not os.path.exists(factsheet_file):
             logger.error(f"{factsheet_file} is missing! Please check documentation.")
@@ -43,7 +45,7 @@ def evaluate(self, scenario, weights, use_weights=False):
             logger.error(f"{metrics_cfg_file} is missing! Please check documentation.")
             return
 
-        with open(factsheet_file) as f, open(metrics_cfg_file) as m:
+        with open(factsheet_file, "r") as f, open(metrics_cfg_file, "r") as m:
             factsheet = json.load(f)
             metrics_cfg = json.load(m)
             metrics = metrics_cfg.items()
@@ -55,10 +57,13 @@ def evaluate(self, scenario, weights, use_weights=False):
             for key, value in metrics:
                 pillar = TrustPillar(key, value, input_docs, use_weights)
                 score, result = pillar.evaluate()
-                weight = weights.get(key)
+                weight = weights.get(key) / 100
                 final_score += weight * score
                 result_print.append([key, score])
                 result_json["pillars"].append(result)
             final_score = round(final_score, 2)
             result_json["trust_score"] = final_score
             write_results_json(results_file, result_json)
+            
+            graphics = Graphics(self.scenario_start_time, scenario_name)
+            graphics.graphics()