CyberDataLab
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/installation.rst
Lines changed: 24 additions & 8 deletions b/‎docs/installation.rst
Lines changed: 24 additions & 8 deletions
diff --git a/‎docs/nebula.core.utils.rst
Lines changed: 8 additions & 0 deletions b/‎docs/nebula.core.utils.rst
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/requirements.txt
Lines changed: 1 addition & 1 deletion b/‎docs/requirements.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎nebula/addons/functions.py
Lines changed: 7 additions & 2 deletions b/‎nebula/addons/functions.py
Lines changed: 7 additions & 2 deletions
diff --git a/‎nebula/addons/reporter.py
Lines changed: 42 additions & 6 deletions b/‎nebula/addons/reporter.py
Lines changed: 42 additions & 6 deletions
diff --git a/‎nebula/config/config.py
Lines changed: 41 additions & 6 deletions b/‎nebula/config/config.py
Lines changed: 41 additions & 6 deletions
@@ -137,6 +137,7 @@ app/
 data/
 *.db*
 *.out
+*.pid
 
 .requirements.txt
 data-analysis/
 
@@ -95,25 +95,32 @@ by listing the version of the NEBULA with the following command line::
     python app/main.py --version
 
 
-Building the nebula participant
+Building the nebula node
 ====================================
+There are two ways to deploy the node in the federation: using Docker containers or isolated processes.
+You can choose the one that best fits your needs in the frontend.
 
-Docker image
--------------------------
-You can build the docker image using the following command line in the root directory::
+1. Using Docker containers
+--------------------------------
+You need to build the docker image using the following command line in the root directory::
 
     docker build -t nebula-core .
 
 In case of using GPU in the docker, you have to follow the instructions in the following link to install nvidia-container-toolkit::
 
 https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html
 
-Checking the docker images
-==========================
 You can check the docker images using the following command line::
 
         docker images
 
+2. Using isolated processes
+------------------------------------
+You need to install the requirements of the node (core) using the following command line in the root directory::
+
+    pip3 install -r nebula/requirements.txt
+
+
 Running NEBULA
 ==================
 To run NEBULA, you can use the following command line::
@@ -152,7 +159,7 @@ To stop NEBULA, you can use the following command line::
 
     python app/main.py --stop
 
-Be careful, this command will stop all the containers related to NEBULA: frontend, controller, and participants.
+Be careful, this command will stop all the containers related to NEBULA: frontend, controller, and nodes.
 
 
 Possible issues during the installation or execution
@@ -205,4 +212,13 @@ If frontend is not working, restart docker daemon
 
 ===================================
 
-If the frontend is not working, check the logs in app/logs/server.log
+Error: Too many open files
+
+Solution: Increase the number of open files
+
+    ulimit -n 65536
+
+Also, you can add the following lines to the file /etc/security/limits.conf
+
+    * soft nofile 65536
+    * hard nofile 65536
@@ -52,6 +52,14 @@ nebula.core.utils.nebulalogger\_tensorboard module
    :undoc-members:
    :show-inheritance:
 
+nebula.core.utils.tasks module
+------------------------------
+
+.. automodule:: nebula.core.utils.tasks
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Module contents
 ---------------
 
 
@@ -1,3 +1,3 @@
 Sphinx==8.0.2
-sphinx-autoapi==3.2.1
+sphinx-autoapi==3.3.1
 sphinx-book-theme==1.1.3
@@ -1,8 +1,13 @@
 import logging
 
 
-def print_msg_box(msg, indent=1, width=None, title=None):
+def print_msg_box(msg, indent=1, width=None, title=None, logger_name=None):
     """Print message-box with optional title."""
+    if logger_name:
+        logger = logging.getLogger(logger_name)
+    else:
+        logger = logging.getLogger()
+        
     if not isinstance(msg, str):
         raise TypeError("msg parameter must be a string")
 
@@ -18,4 +23,4 @@ def print_msg_box(msg, indent=1, width=None, title=None):
         box += f'║{space}{"-" * len(title):<{width}}{space}║\n'  # underscore
     box += "".join([f"║{space}{line:<{width}}{space}║\n" for line in lines])
     box += f'╚{"═" * (width + indent * 2)}╝'  # lower_border
-    logging.info(box)
+    logger.info(box)
@@ -23,6 +23,17 @@ def __init__(self, config, trainer, cm: "CommunicationsManager"):
         self.data_queue = asyncio.Queue()
         self.url = f'http://{self.config.participant["scenario_args"]["controller"]}/nebula/dashboard/{self.config.participant["scenario_args"]["name"]}/node/update'
         self.counter = 0
+        
+        self.first_net_metrics = True
+        self.prev_bytes_sent = 0
+        self.prev_bytes_recv = 0
+        self.prev_packets_sent = 0
+        self.prev_packets_recv = 0
+        
+        self.acc_bytes_sent = 0
+        self.acc_bytes_recv = 0
+        self.acc_packets_sent = 0
+        self.acc_packets_recv = 0
 
     async def enqueue_data(self, name, value):
         await self.data_queue.put((name, value))
@@ -33,7 +44,7 @@ async def start(self):
 
     async def run_reporter(self):
         while True:
-            if self.config.participant["scenario_args"]["controller"] == "nebula-frontend":
+            if self.config.participant["scenario_args"]["controller"] != "nebula-test":
                 await self.__report_status_to_controller()
             await self.__report_data_queue()
             await self.__report_resources()
@@ -45,7 +56,7 @@ async def run_reporter(self):
 
     async def report_scenario_finished(self):
         url = f'http://{self.config.participant["scenario_args"]["controller"]}/nebula/dashboard/{self.config.participant["scenario_args"]["name"]}/node/done'
-        data = json.dumps({"ip": self.config.participant["network_args"]["ip"], "port": self.config.participant["network_args"]["port"]})
+        data = json.dumps({"idx": self.config.participant["device_args"]["idx"]})
         headers = {
             "Content-Type": "application/json",
             "User-Agent": f'NEBULA Participant {self.config.participant["device_args"]["idx"]}',
@@ -87,6 +98,9 @@ async def __report_status_to_controller(self):
                         logging.debug(text)
         except aiohttp.ClientError as e:
             logging.error(f"Error connecting to the controller at {self.url}: {e}")
+        except Exception as e:
+            logging.error(f"Error sending status to controller, will try again in a few seconds: {e}")
+            await asyncio.sleep(5)
 
     async def __report_resources(self):
         cpu_percent = psutil.cpu_percent()
@@ -115,6 +129,28 @@ async def __report_resources(self):
         bytes_recv = net_io_counters.bytes_recv
         packets_sent = net_io_counters.packets_sent
         packets_recv = net_io_counters.packets_recv
+        
+        if self.first_net_metrics:
+            bytes_sent_diff = 0
+            bytes_recv_diff = 0
+            packets_sent_diff = 0
+            packets_recv_diff = 0
+            self.first_net_metrics = False
+        else:
+            bytes_sent_diff = bytes_sent - self.prev_bytes_sent
+            bytes_recv_diff = bytes_recv - self.prev_bytes_recv
+            packets_sent_diff = packets_sent - self.prev_packets_sent
+            packets_recv_diff = packets_recv - self.prev_packets_recv
+
+        self.prev_bytes_sent = bytes_sent
+        self.prev_bytes_recv = bytes_recv
+        self.prev_packets_sent = packets_sent
+        self.prev_packets_recv = packets_recv
+        
+        self.acc_bytes_sent += bytes_sent_diff
+        self.acc_bytes_recv += bytes_recv_diff
+        self.acc_packets_sent += packets_sent_diff
+        self.acc_packets_recv += packets_recv_diff
 
         current_connections = await self.cm.get_addrs_current_connections(only_direct=True)
 
@@ -127,10 +163,10 @@ async def __report_resources(self):
             "RAM/RAM process (%)": memory_percent_process,
             "RAM/RAM process (MB)": memory_process,
             "Disk/Disk (%)": disk_percent,
-            "Network/Network (bytes sent)": bytes_sent,
-            "Network/Network (bytes received)": bytes_recv,
-            "Network/Network (packets sent)": packets_sent,
-            "Network/Network (packets received)": packets_recv,
+            "Network/Network (bytes sent)": round(self.acc_bytes_sent / (1024 ** 2), 3),
+            "Network/Network (bytes received)": round(self.acc_bytes_recv / (1024 ** 2), 3),
+            "Network/Network (packets sent)": self.acc_packets_sent,
+            "Network/Network (packets received)": self.acc_packets_recv,
             "Network/Connections": len(current_connections),
         }
         self.trainer.logger.log_data(resources)
 
@@ -3,6 +3,11 @@
 import os
 from logging import Formatter, FileHandler
 
+CYAN = "\x1b[0;36m"
+RESET = "\x1b[0m"
+
+TRAINING_LOGGER = "nebula.training"
+
 
 class Config:
     topology = {}
@@ -23,6 +28,7 @@ def __init__(self, entity, topology_config_file=None, participant_config_file=No
         if self.participant != {}:
             self.__default_config()
             self.__set_default_logging()
+            self.__set_training_logging()
 
     def __getstate__(self):
         # Return the attributes of the class that should be serialized
@@ -39,6 +45,10 @@ def get_topology_config(self):
     def get_participant_config(self):
         return json.dumps(self.participant, indent=2)
 
+    def get_train_logging_config(self):
+        # TBD
+        pass
+
     def __default_config(self):
         self.participant["device_args"]["name"] = f"participant_{self.participant['device_args']['idx']}_{self.participant['network_args']['ip']}_{self.participant['network_args']['port']}"
         self.participant["network_args"]["addr"] = f"{self.participant['network_args']['ip']}:{self.participant['network_args']['port']}"
@@ -56,31 +66,56 @@ def __set_default_logging(self):
         logging.basicConfig(level=level, handlers=[console_handler, file_handler, file_handler_only_debug, exp_errors_file_handler])
 
     def __setup_logging(self, log_filename):
-        CYAN = "\x1b[0;36m"
-        RESET = "\x1b[0m"
         info_file_format = f"%(asctime)s - {self.participant['device_args']['name']} - [%(filename)s:%(lineno)d] %(message)s"
         debug_file_format = f"%(asctime)s - {self.participant['device_args']['name']} - [%(filename)s:%(lineno)d] %(message)s\n[in %(pathname)s:%(lineno)d]"
         log_console_format = f"{CYAN}%(asctime)s - {self.participant['device_args']['name']} - [%(filename)s:%(lineno)d]{RESET}\n%(message)s"
 
         console_handler = logging.StreamHandler()
-        console_handler.setLevel(logging.INFO if self.participant["device_args"]["logging"] else logging.CRITICAL)
+        console_handler.setLevel(logging.CRITICAL)
         console_handler.setFormatter(Formatter(log_console_format))
 
-        file_handler = FileHandler("{}.log".format(log_filename), mode="w")
+        file_handler = FileHandler("{}.log".format(log_filename), mode="w", encoding="utf-8")
         file_handler.setLevel(logging.INFO if self.participant["device_args"]["logging"] else logging.CRITICAL)
         file_handler.setFormatter(Formatter(info_file_format))
 
-        file_handler_only_debug = FileHandler("{}_debug.log".format(log_filename), mode="w")
+        file_handler_only_debug = FileHandler("{}_debug.log".format(log_filename), mode="w", encoding="utf-8")
         file_handler_only_debug.setLevel(logging.DEBUG if self.participant["device_args"]["logging"] else logging.CRITICAL)
         file_handler_only_debug.addFilter(lambda record: record.levelno == logging.DEBUG)
         file_handler_only_debug.setFormatter(Formatter(debug_file_format))
 
-        exp_errors_file_handler = FileHandler("{}_error.log".format(log_filename), mode="w")
+        exp_errors_file_handler = FileHandler("{}_error.log".format(log_filename), mode="w", encoding="utf-8")
         exp_errors_file_handler.setLevel(logging.WARNING if self.participant["device_args"]["logging"] else logging.CRITICAL)
         exp_errors_file_handler.setFormatter(Formatter(debug_file_format))
 
         return console_handler, file_handler, file_handler_only_debug, exp_errors_file_handler
 
+    def __set_training_logging(self):
+        training_log_filename = f"{self.log_filename}_training"
+        info_file_format = f"%(asctime)s - {self.participant['device_args']['name']} - [%(filename)s:%(lineno)d] %(message)s"
+        log_console_format = f"{CYAN}%(asctime)s - {self.participant['device_args']['name']} - [%(filename)s:%(lineno)d]{RESET}\n%(message)s"
+        level = logging.DEBUG if self.participant["device_args"]["logging"] else logging.CRITICAL
+
+        console_handler = logging.StreamHandler()
+        console_handler.setLevel(logging.CRITICAL)
+        console_handler.setFormatter(Formatter(log_console_format))
+        
+        file_handler = FileHandler("{}.log".format(training_log_filename), mode="w", encoding="utf-8")
+        file_handler.setLevel(level)
+        file_handler.setFormatter(Formatter(info_file_format))
+        
+        logger = logging.getLogger(TRAINING_LOGGER)
+        logger.setLevel(level)
+        logger.addHandler(console_handler)
+        logger.addHandler(file_handler)
+        logger.propagate = False
+
+        pl_logger = logging.getLogger("lightning.pytorch")
+        pl_logger.setLevel(logging.INFO)
+        pl_logger.handlers = []
+        pl_logger.propagate = False
+        pl_logger.addHandler(console_handler)
+        pl_logger.addHandler(file_handler)
+
     def to_json(self):
         # Return participant configuration as a json string
         return json.dumps(self.participant, sort_keys=False, indent=2)