Skip to content

Commit 35aa95a

Browse files
committed
chore(reporter, lightning): improved names of the metrics
1 parent 0b7816a commit 35aa95a

File tree

2 files changed

+22
-22
lines changed

2 files changed

+22
-22
lines changed

nebula/addons/reporter.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -326,19 +326,19 @@ async def __report_resources(self):
326326
current_connections = await self.cm.get_addrs_current_connections(only_direct=True)
327327

328328
resources = {
329-
"WCPU/CPU global (%)": cpu_percent,
330-
"WCPU/CPU process (%)": cpu_percent_process,
331-
"WCPU/CPU temperature (°)": cpu_temp,
332-
"ZRAM/RAM global (%)": memory_percent,
333-
"ZRAM/RAM global (MB)": memory_used,
334-
"ZRAM/RAM process (%)": memory_percent_process,
335-
"ZRAM/RAM process (MB)": memory_process,
336-
"YDisk/Disk (%)": disk_percent,
337-
"XNetwork/Network (bytes sent)": round(self.acc_bytes_sent / (1024**2), 3),
338-
"XNetwork/Network (bytes received)": round(self.acc_bytes_recv / (1024**2), 3),
339-
"XNetwork/Network (packets sent)": self.acc_packets_sent,
340-
"XNetwork/Network (packets received)": self.acc_packets_recv,
341-
"XNetwork/Connections": len(current_connections),
329+
"W-CPU/CPU global (%)": cpu_percent,
330+
"W-CPU/CPU process (%)": cpu_percent_process,
331+
"W-CPU/CPU temperature (°)": cpu_temp,
332+
"Z-RAM/RAM global (%)": memory_percent,
333+
"Z-RAM/RAM global (MB)": memory_used,
334+
"Z-RAM/RAM process (%)": memory_percent_process,
335+
"Z-RAM/RAM process (MB)": memory_process,
336+
"Y-Disk/Disk (%)": disk_percent,
337+
"X-Network/Network (bytes sent)": round(self.acc_bytes_sent / (1024**2), 3),
338+
"X-Network/Network (bytes received)": round(self.acc_bytes_recv / (1024**2), 3),
339+
"X-Network/Network (packets sent)": self.acc_packets_sent,
340+
"X-Network/Network (packets received)": self.acc_packets_recv,
341+
"X-Network/Connections": len(current_connections),
342342
}
343343
self.trainer.logger.log_data(resources)
344344

@@ -365,13 +365,13 @@ async def __report_resources(self):
365365
)
366366
gpu_fan_speed = await asyncio.to_thread(pynvml.nvmlDeviceGetFanSpeed, handle)
367367
gpu_info = {
368-
f"WGPU/GPU{i} (%)": gpu_percent,
369-
f"WGPU/GPU{i} temperature (°)": gpu_temp,
370-
f"WGPU/GPU{i} memory (%)": gpu_mem_percent,
371-
f"WGPU/GPU{i} power": gpu_power,
372-
f"WGPU/GPU{i} clocks": gpu_clocks,
373-
f"WGPU/GPU{i} memory clocks": gpu_memory_clocks,
374-
f"WGPU/GPU{i} fan speed": gpu_fan_speed,
368+
f"W-GPU/GPU{i} (%)": gpu_percent,
369+
f"W-GPU/GPU{i} temperature (°)": gpu_temp,
370+
f"W-GPU/GPU{i} memory (%)": gpu_mem_percent,
371+
f"W-GPU/GPU{i} power": gpu_power,
372+
f"W-GPU/GPU{i} clocks": gpu_clocks,
373+
f"W-GPU/GPU{i} memory clocks": gpu_memory_clocks,
374+
f"W-GPU/GPU{i} fan speed": gpu_fan_speed,
375375
}
376376
self.trainer.logger.log_data(gpu_info)
377377
except Exception: # noqa: S110

nebula/core/training/lightning.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def get_model_weight(self):
353353

354354
def on_round_start(self):
355355
self.data.setup()
356-
self._logger.log_data({"1Round": self.round})
356+
self._logger.log_data({"A-Round": self.round})
357357
# self.reporter.enqueue_data("Round", self.round)
358358

359359
def on_round_end(self):
@@ -365,5 +365,5 @@ def on_round_end(self):
365365
self.cleanup()
366366

367367
def on_learning_cycle_end(self):
368-
self._logger.log_data({"1Round": self.round})
368+
self._logger.log_data({"A-Round": self.round})
369369
# self.reporter.enqueue_data("Round", self.round)

0 commit comments

Comments
 (0)