graphnet-team · mobra7 · Oct 24, 2024 · Oct 24, 2024 · Oct 24, 2024 · Oct 24, 2024
diff --git a/examples/04_training/06_train_icemix_model.py b/examples/04_training/06_train_icemix_model.py
@@ -150,7 +150,7 @@ def main(
             "add_norm_layer": True,
             "skip_readout": True,
         },
-        n_features=len(features),
+        fourier_mapping=[0, 1, 2, 3, None, None],
     )
     task = DirectionReconstructionWithKappa(
         hidden_size=backbone.nb_outputs,

diff --git a/src/graphnet/models/components/embedding.py b/src/graphnet/models/components/embedding.py
@@ -54,9 +54,9 @@ class FourierEncoder(LightningModule):
 
     This module incorporates sinusoidal positional embeddings and auxiliary
     embeddings to process input sequences and produce meaningful
-    representations. The module assumes that the input data is in the format of
-    (x, y, z, time, charge, auxiliary), being the first four features
-    mandatory.
+    representations. The features x, y, z and time are mandatory, while charge
+    and auxiliary are optional. Please use the mapping to ensure correct
+    fourier encoding.
     """
 
     def __init__(
@@ -65,7 +65,7 @@ def __init__(
         mlp_dim: Optional[int] = None,
         output_dim: int = 384,
         scaled: bool = False,
-        n_features: int = 6,
+        mapping: list = [0, 1, 2, 3, 4, 5],
     ):
         """Construct `FourierEncoder`.
 
@@ -78,23 +78,37 @@ def __init__(
                 depending on `n_features`.
             output_dim: Dimension of the output (I.e. number of columns).
             scaled: Whether or not to scale the embeddings.
-            n_features: The number of features in the input data.
+            mapping: Mapping of the data to [x,y,z,time,charge,auxiliary].
+                Use None for missing features.
         """
         super().__init__()
-
+        self.mapping_str = ["x", "y", "z", "time", "charge", "auxiliary"]
+        self.mapping = mapping
+        self.n_features = len([i for i in mapping if i is not None])
         self.sin_emb = SinusoidalPosEmb(dim=seq_length, scaled=scaled)
         self.sin_emb2 = SinusoidalPosEmb(dim=seq_length // 2, scaled=scaled)
 
-        if n_features < 4:
+        assert len(mapping) == 6, (
+            "Fourier mapping must have 6 elements."
+            "Use None for missing features."
+        )
+        assert all(
+            isinstance(i, int) or i is None for i in mapping
+        ), "Use int or None in fourier mapping."
+
+        if any([i is None for i in mapping[:4]]):
+            missing = [
+                self.mapping_str[i] for i in range(4) if mapping[i] is None
+            ]
             raise ValueError(
-                f"At least x, y, z and time of the DOM are required. Got only "
-                f"{n_features} features."
+                f"x, y, z and time of the DOM are required."
+                f"{missing} missing in mapping."
             )
-        elif n_features >= 6:
+        elif self.n_features == 6:
             self.aux_emb = nn.Embedding(2, seq_length // 2)
             hidden_dim = 6 * seq_length
         else:
-            hidden_dim = int((n_features + 0.5) * seq_length)
+            hidden_dim = int((self.n_features + 0.5) * seq_length)
 
         if mlp_dim is None:
             mlp_dim = hidden_dim
@@ -106,28 +120,42 @@ def __init__(
             nn.Linear(mlp_dim, output_dim),
         )
 
-        self.n_features = n_features
-
     def forward(
         self,
         x: Tensor,
         seq_length: Tensor,
     ) -> Tensor:
         """Forward pass."""
+        mapping_max = max(i for i in self.mapping if i is not None) + 1
+        if mapping_max > x.shape[2]:
+            raise IndexError(
+                f"Fourier mapping does not fit given data."
+                f"Feature space of data is too small (size {x.shape[2]}),"
+                f"given fourier mapping requires at least {mapping_max}."
+            )
+
         length = torch.log10(seq_length.to(dtype=x.dtype))
-        embeddings = [self.sin_emb(4096 * x[:, :, :3]).flatten(-2)]  # Position
 
+        # Position
+        embeddings = [
+            self.sin_emb(4096 * x[:, :, self.mapping[:3]]).flatten(-2)
+        ]
+
+        # Charge
         if self.n_features >= 5:
-            embeddings.append(self.sin_emb(1024 * x[:, :, 4]))  # Charge
+            embeddings.append(self.sin_emb(1024 * x[:, :, self.mapping[4]]))
 
-        embeddings.append(self.sin_emb(4096 * x[:, :, 3]))  # Time
+        # Time
+        embeddings.append(self.sin_emb(4096 * x[:, :, self.mapping[3]]))
 
-        if self.n_features >= 6:
-            embeddings.append(self.aux_emb(x[:, :, 5].long()))  # Auxiliary
+        # Auxiliary
+        if self.n_features == 6:
+            embeddings.append(self.aux_emb(x[:, :, self.mapping[5]].long()))
 
+        # Length
         embeddings.append(
             self.sin_emb2(length).unsqueeze(1).expand(-1, max(seq_length), -1)
-        )  # Length
+        )
 
         x = torch.cat(embeddings, -1)
         x = self.mlp(x)

diff --git a/src/graphnet/models/gnn/icemix.py b/src/graphnet/models/gnn/icemix.py
@@ -42,7 +42,7 @@ def __init__(
         scaled_emb: bool = False,
         include_dynedge: bool = False,
         dynedge_args: Optional[Dict[str, Any]] = None,
-        n_features: int = 6,
+        fourier_mapping: list = [0, 1, 2, 3, 4, 5],
     ):
         """Construct `DeepIce`.
 
@@ -61,7 +61,9 @@ def __init__(
                 provided, DynEdge will be initialized with the original Kaggle
                 Competition settings. If `include_dynedge` is False, this
                 argument have no impact.
-            n_features: The number of features in the input data.
+            fourier_mapping: Mapping of the data to [x,y,z,time,charge,
+                auxiliary] for the FourierEncoder. Use None for missing
+                features.
         """
         super().__init__(seq_length, hidden_dim)
         fourier_out_dim = hidden_dim // 2 if include_dynedge else hidden_dim
@@ -70,7 +72,7 @@ def __init__(
             mlp_dim=None,
             output_dim=fourier_out_dim,
             scaled=scaled_emb,
-            n_features=n_features,
+            mapping=fourier_mapping,
         )
         self.rel_pos = SpacetimeEncoder(head_size)
         self.sandwich = nn.ModuleList(

diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py
@@ -358,8 +358,8 @@ def __init__(
                     f"input_feature_names {input_feature_names}"
                 )
             self.all_features = input_feature_names + [
-                "scatt_lenght",
-                "abs_lenght",
+                "scatt_length",
+                "abs_length",
             ]
             self.f_scattering, self.f_absoprtion = ice_transparency(**ice_args)
         else:

diff --git a/src/graphnet/models/transformer/iseecube.py b/src/graphnet/models/transformer/iseecube.py
@@ -31,7 +31,7 @@ def __init__(
         max_rel_pos: int = 256,
         num_register_tokens: int = 3,
         scaled_emb: bool = False,
-        n_features: int = 6,
+        fourier_mapping: list = [0, 1, 2, 3, 4, 5],
     ):
         """Construct `ISeeCube`.
 
@@ -46,15 +46,17 @@ def __init__(
             max_rel_pos: Maximum relative position for relative position bias.
             num_register_tokens: The number of register tokens.
             scaled_emb: Whether to scale the sinusoidal positional embeddings.
-            n_features: The number of features in the input data.
+            fourier_mapping: Mapping of the data to [x,y,z,time,charge,
+                auxiliary] for the FourierEncoder. Use None for missing
+                features.
         """
         super().__init__(seq_length, hidden_dim)
         self.fourier_ext = FourierEncoder(
             seq_length=seq_length,
             mlp_dim=mlp_dim,
             output_dim=hidden_dim,
             scaled=scaled_emb,
-            n_features=n_features,
+            mapping=fourier_mapping,
         )
         self.pos_embedding = nn.Parameter(
             torch.empty(1, seq_length, hidden_dim).normal_(std=0.02),