fix trust

AlejandroAvilesSerrano · AlejandroAvilesSerrano · commit d5bd4dd62f41 · 2025-06-05T13:08:13.000+02:00
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
@@ -12,7 +12,7 @@
 from nebula.core.models.mnist.mlp import MNISTModelMLP
 from nebula.core.models.mnist.cnn import MNISTModelCNN
 from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv
-from nebula.addons.trustworthiness.utils import count_class_samples, read_csv, check_field_filled, get_entropy
+from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_entropy, get_all_data_entropy
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
 
 dirname = os.path.dirname(__file__)
@@ -160,16 +160,20 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, cla
                 train_model_file = f"{files_dir}/participant_1_train_model.pk"
                 emissions_file = os.path.join(files_dir, "emissions.csv")
 
-                # Entropy
-                i = 0
-                for file in dataloaders_files:
-                    with open(file, "rb") as file:
-                        dataloader = pickle.load(file)
-                    get_entropy(i, scenario_name, dataloader)
-                    i += 1
+                # # Entropy
+                # i = 0
+                # for file in dataloaders_files:
+                #     with open(file, "rb") as file:
+                #         dataloader = pickle.load(file)
+                #     get_entropy(i, scenario_name, dataloader)
+                #     i += 1
+                
+                get_all_data_entropy(scenario_name)
 
                 with open(f"{files_dir}/entropy.json", "r") as file:
                     entropy_distribution = json.load(file)
+                      
+                logging.info(f"[ALEX] entropy_distribution: {entropy_distribution}")
 
                 values = np.array(list(entropy_distribution.values()))
 
@@ -197,12 +201,14 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, cla
 
                 factsheet["fairness"]["selection_cv"] = 1
 
-                count_class_samples(scenario_name, dataloaders_files, class_counter)
+                count_all_class_samples(scenario_name)
                 
-                # FER
+                # # FER
 
                 with open(f"{files_dir}/count_class.json", "r") as file:
                     class_distribution = json.load(file)
+                    
+                logging.info(f"[ALEX] class_distribution: {class_distribution}")
 
                 class_samples_sizes = [x for x in class_distribution.values()]
                 class_imbalance = get_cv(list=class_samples_sizes)
@@ -220,6 +226,8 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, cla
                 # else:
                 #     model = CIFAR10ModelCNN()
 
+                logging.info(f"[ALEX] parte de training hecha")
+                
                 model.load_state_dict(lightning_model.state_dict())
 
                 with open(test_dataloader_file, "rb") as file:
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
@@ -11,6 +11,7 @@
 from nebula.addons.trustworthiness.utils import save_results_csv
 from codecarbon import EmissionsTracker
 import asyncio
+from collections import Counter
 
 """                                                     ##############################
                                                         #       TRUST WORKLOADS      #
@@ -115,6 +116,8 @@ async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):
         # Save model in trustworthy dir
         with open(model_file, 'wb') as f:
             pickle.dump(self._engine.trainer.model, f)
+        
+        
     
 class TrustWorkloadServer(TrustWorkload):
     
@@ -199,8 +202,9 @@ async def _generate_factsheet(self, trust_config, experiment_name):
             }
 
             trust_metric_manager = TrustMetricManager(self._start_time)
-            # trust_metric_manager.evaluate(experiment_name, weights, use_weights=True)
-            logging.info("[FER] evaluation done")
+            trust_metric_manager.evaluate(experiment_name, weights, use_weights=True)
+            #logging.info("[FER] evaluation done")
+        logging.info("Trust work DONE")
     
     async def _process_test_metrics_event(self, tme: TestMetricsEvent):
         cur_loss, cur_acc = await tme.get_event_data()
@@ -262,6 +266,10 @@ async def _create_trustworthiness_directory(self):
         logging.info("log2")
         
     async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
+        from nebula.addons.trustworthiness.utils import save_class_count_per_participant
+        class_counter = self._engine.trainer.datamodule.get_samples_per_label()
+        save_class_count_per_participant(self._experiment_name, class_counter, self._idx)
+        
         await self.tw.finish_experiment_role_pre_actions()
         
         last_loss, last_accuracy = self.tw.get_metrics()
@@ -283,7 +291,7 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_loss, last_accuracy)
         stop_emissions_tracking_and_save(self._tracker, self._trust_dir_files, self._emissions_file, self._role.value, workload, sample_size)
         await self.tw.finish_experiment_role_post_actions(self._trust_config, self._experiment_name)
-
+        
     def _factory_trust_workload(self, role: Role, engine: Engine, idx, trust_files_route) -> TrustWorkload:  
         trust_workloads = {
             Role.TRAINER: TrustWorkloadTrainer, 
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
@@ -17,6 +17,36 @@
 dirname = os.path.dirname(__file__)
 
 
+def save_class_count_per_participant(experiment_name, class_counter: Counter, idx):
+    class_count = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(idx)}_class_count.json")
+    result = {hashids.encode(int(class_id)): count for class_id, count in class_counter.items()}
+    with open(class_count, "w") as f:
+        json.dump(result, f)
+
+def count_all_class_samples(experiment_name):
+    participant_id = 0
+    global_class_count = {}
+
+    while True:
+        data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_id)}_class_count.json")
+
+        if not os.path.exists(data_class_count_file):
+            break
+
+        with open(data_class_count_file, "r") as f:
+            class_count = json.load(f)
+
+        for class_hash, count in class_count.items():
+            global_class_count[class_hash] = global_class_count.get(class_hash, 0) + count
+
+        participant_id += 1
+
+    # Guardar conteo total en class_count.json
+    output_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'),experiment_name, "trustworthiness", "count_class.json")
+
+    with open(output_file, "w") as f:
+        json.dump(global_class_count, f, indent=2)
+
 def count_class_samples(scenario_name, dataloaders_files, class_counter: Counter = None):
     """
     Counts the number of samples by class.
@@ -56,6 +86,35 @@ def count_class_samples(scenario_name, dataloaders_files, class_counter: Counter
         json.dump(result, f)
 
 
+def get_all_data_entropy(experiment_name):
+    participant_id = 0
+    data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_id)}_class_count.json")
+    entropy_per_participant = {}
+    
+    while True:
+        data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_id)}_class_count.json")
+
+        if not os.path.exists(data_class_count_file):
+            break
+
+        with open(data_class_count_file, "r") as f:
+            class_count = json.load(f)
+
+        total = sum(class_count.values())
+        if total == 0:
+            entropy_value = 0.0
+        else:
+            probabilities = [count / total for count in class_count.values()]
+            entropy_value = entropy(probabilities, base=2)
+
+        entropy_per_participant[str(participant_id)] = round(entropy_value, 6)
+        participant_id += 1
+        
+    name_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'),experiment_name, "trustworthiness", "entropy.json")
+
+    with open(name_file, "w") as f:
+        json.dump(entropy_per_participant, f, indent=2)
+       
 def get_entropy(client_id, scenario_name, dataloader):
     """
     Get the entropy of each client in the scenario.
@@ -72,6 +131,7 @@ def get_entropy(client_id, scenario_name, dataloader):
     name_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "entropy.json")
         
     if os.path.exists(name_file):
+        logging.info(f"entropy fiel already exists.. loading.")
         with open(name_file, "r") as f:
             client_entropy = json.load(f)