Skip to content

Add cnn model #813

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ data/examples/output/
!/graphnet/src/graphnet/models/pretrained/**/**/**/**/**.pth
# Exception to geometry tables
!/data/geometry_tables/**/**.parquet
!/data/image_mapping_tables/**/**.parquet
!/data/tests/sqlite/upgrade_genie_step4_140028_000998_first_5_frames/upgrade_genie_step4_140028_000998_first_5_frames.db
!/data/tests/parquet/oscNext_genie_level7_v02/merged/**
!data/tests/parquet/oscNext_genie_level7_v02/oscNext_genie_level7_v02_first_5_frames.parquet
Expand Down
Binary file not shown.
Binary file not shown.
Binary file added data/tests/images/IC86lower_deepcore_test.npy
Binary file not shown.
Binary file added data/tests/images/IC86main_array_test.npy
Binary file not shown.
Binary file added data/tests/images/IC86upper_deepcore_test.npy
Binary file not shown.
343 changes: 343 additions & 0 deletions examples/04_training/09_train_cnn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,343 @@
"""Example of training a CNN Model."""

import os
from typing import Any, Dict, List, Optional

from pytorch_lightning.loggers import WandbLogger
import torch
from torch.optim.adam import Adam

from graphnet.constants import EXAMPLE_DATA_DIR, EXAMPLE_OUTPUT_DIR
from graphnet.data.constants import TRUTH
from graphnet.models import StandardModel
from graphnet.models.cnn import LCSC
from graphnet.models.data_representation import PercentileClusters
from graphnet.models.task.reconstruction import EnergyReconstruction
from graphnet.training.callbacks import PiecewiseLinearLR
from graphnet.training.loss_functions import LogCoshLoss
from graphnet.utilities.argparse import ArgumentParser
from graphnet.utilities.logging import Logger
from graphnet.data.dataset import SQLiteDataset
from graphnet.data.dataset import ParquetDataset
from graphnet.models.detector import ORCA150
from torch_geometric.data import Batch
from graphnet.models.data_representation.images import ExamplePrometheusImage

# Constants
features = ["sensor_id", "sensor_string_id", "t"]
truth = TRUTH.PROMETHEUS


def main(
path: str,
pulsemap: str,
target: str,
truth_table: str,
gpus: Optional[List[int]],
max_epochs: int,
early_stopping_patience: int,
batch_size: int,
num_workers: int,
wandb: bool = False,
) -> None:
"""Run example."""
# Construct Logger
logger = Logger()

# Initialise Weights & Biases (W&B) run
if wandb:
# Make sure W&B output directory exists
wandb_dir = "./wandb/"
os.makedirs(wandb_dir, exist_ok=True)
wandb_logger = WandbLogger(
project="example-script",
entity="graphnet-team",
save_dir=wandb_dir,
log_model=True,
)

logger.info(f"features: {features}")
logger.info(f"truth: {truth}")

# Configuration
config: Dict[str, Any] = {
"path": path,
"pulsemap": pulsemap,
"batch_size": batch_size,
"num_workers": num_workers,
"target": target,
"early_stopping_patience": early_stopping_patience,
"fit": {
"gpus": gpus,
"max_epochs": max_epochs,
},
"dataset_reference": (
SQLiteDataset if path.endswith(".db") else ParquetDataset
),
}

archive = os.path.join(EXAMPLE_OUTPUT_DIR, "train_cnn_model")
run_name = "lcsc_{}_example".format(config["target"])
if wandb:
# Log configuration to W&B
wandb_logger.experiment.config.update(config)

# First we need to define how the image is constructed.
# This is done using an ImageDefinition.

# An ImageDefinition combines two components:

# 1. A pixel definition, which defines how the pixel data is
# represented. Since an image has always fixed dimensions this
# pixel definition is also responsible to represent the data in
# a way such that this fixed dimensions can be achieved.
# Normally, this could mean that light pulses that arrive at
# the same optical module must be aggregated to a
# fixed-dimensional vector.
# A pixel definition works exactly the same as the
# a node definition in the graph scenerio.

# 2. A pixel mapping, which defines where each pixel is located
# in the final image. This is highly detector specific, as it
# depends on the geometry of the detector.

# An ImageDefinition can be used to create multiple images of
# a single event. In the example of IceCube, you can e.g
# create three images, one for the so called main array,
# one for the upper deep core and one for the lower deep
# core. Essentially, these are just different areas in
# the detector.

# Here we use the PercentileClusters pixel definition, which
# aggregates the light pulses that arrive at the same optical
# module with percentiles.
print(features)
pixel_definition = PercentileClusters(
cluster_on=["sensor_id", "sensor_string_id"],
percentiles=[10, 50, 90],
add_counts=True,
input_feature_names=features,
)

# The final image definition used here is the ExamplePrometheusImage,
# which is a detector specific pixel mapping.
# It maps optical modules into the image
# using the sensor_string_id and sensor_id
# (number of the optical module).
# The detector class standardizes the input features,
# so that the features are in a ML friendly range.
# For the mapping of the optical modules to the image it is
# essential to not change the value of the sensor_id and
# sensor_string_id. Therefore we need to make sure that
# these features are not standardized, which is done by the
# `replace_with_identity` argument of the detector.
image_definition = ExamplePrometheusImage(
detector=ORCA150(
replace_with_identity=[
"sensor_id",
"sensor_string_id",
],
),
node_definition=pixel_definition,
input_feature_names=features,
string_label="sensor_string_id",
dom_number_label="sensor_id",
)

# Use SQLiteDataset to load in data
# The input here depends on the dataset being used,
# in this case the Prometheus dataset.
dataset = SQLiteDataset(
path=config["path"],
pulsemaps=config["pulsemap"],
truth_table=truth_table,
features=features,
truth=truth,
data_representation=image_definition,
)

# Create the training and validation dataloaders.
training_dataloader = torch.utils.data.DataLoader(
dataset=dataset,
batch_size=config["batch_size"],
num_workers=config["num_workers"],
collate_fn=Batch.from_data_list,
)

validation_dataloader = torch.utils.data.DataLoader(
dataset=dataset,
batch_size=config["batch_size"],
num_workers=config["num_workers"],
collate_fn=Batch.from_data_list,
)

# Building model

# Define architecture of the backbone, in this example
# the LCSC architecture from Alexander Harnisch is used.
backbone = LCSC(
num_input_features=image_definition.nb_outputs,
out_put_dim=2,
input_norm=True,
num_conv_layers=5,
conv_filters=[5, 10, 20, 40, 60],
kernel_size=3,
image_size=(8, 9, 22), # dimensions of the example image
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be neat to add a property to the ImageDefinition that contains the resulting image dimension. E.g. ImageDefinition.shape

pooling_type=[
"Avg",
None,
"Avg",
None,
"Avg",
],
pooling_kernel_size=[
[1, 1, 2],
None,
[2, 2, 2],
None,
[2, 2, 2],
],
pooling_stride=[
[1, 1, 2],
None,
[2, 2, 2],
None,
[2, 2, 2],
],
num_fc_neurons=50,
norm_list=True,
norm_type="Batch",
)
# Define the task.
# Here an energy reconstruction, with a LogCoshLoss function.
# The target and prediction are transformed using the log10 function.
# When infering the prediction is transformed back to the
# original scale using 10^x.
task = EnergyReconstruction(
hidden_size=backbone.nb_outputs,
target_labels=config["target"],
loss_function=LogCoshLoss(),
transform_prediction_and_target=lambda x: torch.log10(x),
transform_inference=lambda x: torch.pow(10, x),
)
# Define the full model, which includes the backbone, task(s),
# along with typical machine learning options such as
# learning rate optimizers and schedulers.
model = StandardModel(
data_representation=image_definition,
backbone=backbone,
tasks=[task],
optimizer_class=Adam,
optimizer_kwargs={"lr": 1e-03, "eps": 1e-03},
scheduler_class=PiecewiseLinearLR,
scheduler_kwargs={
"milestones": [
0,
len(training_dataloader) / 2,
len(training_dataloader) * config["fit"]["max_epochs"],
],
"factors": [1e-2, 1, 1e-02],
},
scheduler_config={
"interval": "step",
},
)

# Training model
model.fit(
training_dataloader,
validation_dataloader,
early_stopping_patience=config["early_stopping_patience"],
logger=wandb_logger if wandb else None,
**config["fit"],
)

# Get predictions
additional_attributes = model.target_labels
assert isinstance(additional_attributes, list) # mypy

results = model.predict_as_dataframe(
validation_dataloader,
additional_attributes=additional_attributes + ["event_no"],
gpus=config["fit"]["gpus"],
)

# Save predictions and model to file
db_name = path.split("/")[-1].split(".")[0]
path = os.path.join(archive, db_name, run_name)
logger.info(f"Writing results to {path}")
os.makedirs(path, exist_ok=True)

# Save results as .csv
results.to_csv(f"{path}/cnn_results.csv")

# Save model config and state dict - Version safe save method.
# This method of saving models is the safest way.
model.save_state_dict(f"{path}/cnn_state_dict.pth")
model.save_config(f"{path}/cnn_model_config.yml")


if __name__ == "__main__":

# Parse command-line arguments
parser = ArgumentParser(
description="""
Train GNN model without the use of config files.
"""
)

parser.add_argument(
"--path",
help="Path to dataset file (default: %(default)s)",
default=f"{EXAMPLE_DATA_DIR}/sqlite/prometheus/prometheus-events.db",
)

parser.add_argument(
"--pulsemap",
help="Name of pulsemap to use (default: %(default)s)",
default="total",
)

parser.add_argument(
"--target",
help=(
"Name of feature to use as regression target (default: "
"%(default)s)"
),
default="total_energy",
)

parser.add_argument(
"--truth-table",
help="Name of truth table to be used (default: %(default)s)",
default="mc_truth",
)

parser.with_standard_arguments(
"gpus",
("max-epochs", 1),
"early-stopping-patience",
("batch-size", 16),
("num-workers", 2),
)

parser.add_argument(
"--wandb",
action="store_true",
help="If True, Weights & Biases are used to track the experiment.",
)

args, unknown = parser.parse_known_args()

main(
args.path,
args.pulsemap,
args.target,
args.truth_table,
args.gpus,
args.max_epochs,
args.early_stopping_patience,
args.batch_size,
args.num_workers,
args.wandb,
)
17 changes: 17 additions & 0 deletions src/graphnet/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@
TEST_PARQUET_DATA = os.path.join(
TEST_DATA_DIR, "parquet", _test_dataset_name, "merged"
)
TEST_IMAGE_DIR = os.path.join(TEST_DATA_DIR, "images")
TEST_IC86MAIN_IMAGE = os.path.join(TEST_IMAGE_DIR, "IC86main_array_test.npy")
TEST_IC86LOWERDC_IMAGE = os.path.join(
TEST_IMAGE_DIR, "IC86lower_deepcore_test.npy"
)
TEST_IC86UPPERDC_IMAGE = os.path.join(
TEST_IMAGE_DIR, "IC86upper_deepcore_test.npy"
)

# Example data
EXAMPLE_DATA_DIR = os.path.join(DATA_DIR, "examples")
Expand All @@ -41,3 +49,12 @@
ICECUBE_GEOMETRY_TABLE_DIR = os.path.join(GEOMETRY_TABLE_DIR, "icecube")
PROMETHEUS_GEOMETRY_TABLE_DIR = os.path.join(GEOMETRY_TABLE_DIR, "prometheus")
LIQUIDO_GEOMETRY_TABLE_DIR = os.path.join(GEOMETRY_TABLE_DIR, "liquid-o")

# Image Mapping Tables
IMAGE_MAPPING_TABLE_DIR = os.path.join(DATA_DIR, "image_mapping_tables")
IC86_CNN_MAPPING = os.path.join(
IMAGE_MAPPING_TABLE_DIR, "IC86_CNN_mapping.parquet"
)
PROMETHEUS_CNN_MAPPING = os.path.join(
IMAGE_MAPPING_TABLE_DIR, "prometheus_CNN_mapping.parquet"
)
4 changes: 2 additions & 2 deletions src/graphnet/data/extractors/icecube/i3filtermapextractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
class I3FilterMapExtractor(I3Extractor):
"""Class for extracting I3FilterMap properties.

This class extracts the boolean condition of the I3FilterMask from the
I3FilterMap in the frame.
This class extracts the boolean condition of the I3FilterMask from
the I3FilterMap in the frame.
"""

def __init__(
Expand Down
5 changes: 5 additions & 0 deletions src/graphnet/models/cnn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""CNN-specific modules, for performing the main learnable operations."""

from .cnn import CNN
from .theos_muonE_upgoing import TheosMuonEUpgoing
Copy link
Collaborator

@RasmusOrsoe RasmusOrsoe Aug 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.theos_muonE_upgoing breaks with snake-case convention. Do we need "theos" in there? It's very jargony. Credit can be given in the associated docstring instead of the module name

from .lcsc import LCSC
Loading