Skip to content

Addressing issue #753 by adding feature mapping for the Fourier Encoder #759

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
2 changes: 1 addition & 1 deletion examples/04_training/06_train_icemix_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def main(
"add_norm_layer": True,
"skip_readout": True,
},
n_features=len(features),
fourier_mapping=[0, 1, 2, 3, None, None],
)
task = DirectionReconstructionWithKappa(
hidden_size=backbone.nb_outputs,
Expand Down
66 changes: 47 additions & 19 deletions src/graphnet/models/components/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ class FourierEncoder(LightningModule):

This module incorporates sinusoidal positional embeddings and auxiliary
embeddings to process input sequences and produce meaningful
representations. The module assumes that the input data is in the format of
(x, y, z, time, charge, auxiliary), being the first four features
mandatory.
representations. The features x, y, z and time are mandatory, while charge
and auxiliary are optional. Please use the mapping to ensure correct
fourier encoding.
"""

def __init__(
Expand All @@ -65,7 +65,7 @@ def __init__(
mlp_dim: Optional[int] = None,
output_dim: int = 384,
scaled: bool = False,
n_features: int = 6,
mapping: list = [0, 1, 2, 3, 4, 5],
):
"""Construct `FourierEncoder`.

Expand All @@ -78,23 +78,37 @@ def __init__(
depending on `n_features`.
output_dim: Dimension of the output (I.e. number of columns).
scaled: Whether or not to scale the embeddings.
n_features: The number of features in the input data.
mapping: Mapping of the data to [x,y,z,time,charge,auxiliary].
Use None for missing features.
"""
super().__init__()

self.mapping_str = ["x", "y", "z", "time", "charge", "auxiliary"]
self.mapping = mapping
self.n_features = len([i for i in mapping if i is not None])
self.sin_emb = SinusoidalPosEmb(dim=seq_length, scaled=scaled)
self.sin_emb2 = SinusoidalPosEmb(dim=seq_length // 2, scaled=scaled)

if n_features < 4:
assert len(mapping) == 6, (
"Fourier mapping must have 6 elements."
"Use None for missing features."
)
assert all(
isinstance(i, int) or i is None for i in mapping
), "Use int or None in fourier mapping."

if any([i is None for i in mapping[:4]]):
missing = [
self.mapping_str[i] for i in range(4) if mapping[i] is None
]
raise ValueError(
f"At least x, y, z and time of the DOM are required. Got only "
f"{n_features} features."
f"x, y, z and time of the DOM are required."
f"{missing} missing in mapping."
)
elif n_features >= 6:
elif self.n_features == 6:
self.aux_emb = nn.Embedding(2, seq_length // 2)
hidden_dim = 6 * seq_length
else:
hidden_dim = int((n_features + 0.5) * seq_length)
hidden_dim = int((self.n_features + 0.5) * seq_length)

if mlp_dim is None:
mlp_dim = hidden_dim
Expand All @@ -106,28 +120,42 @@ def __init__(
nn.Linear(mlp_dim, output_dim),
)

self.n_features = n_features

def forward(
self,
x: Tensor,
seq_length: Tensor,
) -> Tensor:
"""Forward pass."""
mapping_max = max(i for i in self.mapping if i is not None) + 1
if mapping_max > x.shape[2]:
raise IndexError(
f"Fourier mapping does not fit given data."
f"Feature space of data is too small (size {x.shape[2]}),"
f"given fourier mapping requires at least {mapping_max}."
)

length = torch.log10(seq_length.to(dtype=x.dtype))
embeddings = [self.sin_emb(4096 * x[:, :, :3]).flatten(-2)] # Position

# Position
embeddings = [
self.sin_emb(4096 * x[:, :, self.mapping[:3]]).flatten(-2)
]

# Charge
if self.n_features >= 5:
embeddings.append(self.sin_emb(1024 * x[:, :, 4])) # Charge
embeddings.append(self.sin_emb(1024 * x[:, :, self.mapping[4]]))

embeddings.append(self.sin_emb(4096 * x[:, :, 3])) # Time
# Time
embeddings.append(self.sin_emb(4096 * x[:, :, self.mapping[3]]))

if self.n_features >= 6:
embeddings.append(self.aux_emb(x[:, :, 5].long())) # Auxiliary
# Auxiliary
if self.n_features == 6:
embeddings.append(self.aux_emb(x[:, :, self.mapping[5]].long()))

# Length
embeddings.append(
self.sin_emb2(length).unsqueeze(1).expand(-1, max(seq_length), -1)
) # Length
)

x = torch.cat(embeddings, -1)
x = self.mlp(x)
Expand Down
8 changes: 5 additions & 3 deletions src/graphnet/models/gnn/icemix.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init__(
scaled_emb: bool = False,
include_dynedge: bool = False,
dynedge_args: Optional[Dict[str, Any]] = None,
n_features: int = 6,
fourier_mapping: list = [0, 1, 2, 3, 4, 5],
):
"""Construct `DeepIce`.

Expand All @@ -61,7 +61,9 @@ def __init__(
provided, DynEdge will be initialized with the original Kaggle
Competition settings. If `include_dynedge` is False, this
argument have no impact.
n_features: The number of features in the input data.
fourier_mapping: Mapping of the data to [x,y,z,time,charge,
auxiliary] for the FourierEncoder. Use None for missing
features.
"""
super().__init__(seq_length, hidden_dim)
fourier_out_dim = hidden_dim // 2 if include_dynedge else hidden_dim
Expand All @@ -70,7 +72,7 @@ def __init__(
mlp_dim=None,
output_dim=fourier_out_dim,
scaled=scaled_emb,
n_features=n_features,
mapping=fourier_mapping,
)
self.rel_pos = SpacetimeEncoder(head_size)
self.sandwich = nn.ModuleList(
Expand Down
4 changes: 2 additions & 2 deletions src/graphnet/models/graphs/nodes/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,8 @@ def __init__(
f"input_feature_names {input_feature_names}"
)
self.all_features = input_feature_names + [
"scatt_lenght",
"abs_lenght",
"scatt_length",
"abs_length",
]
self.f_scattering, self.f_absoprtion = ice_transparency(**ice_args)
else:
Expand Down
8 changes: 5 additions & 3 deletions src/graphnet/models/transformer/iseecube.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(
max_rel_pos: int = 256,
num_register_tokens: int = 3,
scaled_emb: bool = False,
n_features: int = 6,
fourier_mapping: list = [0, 1, 2, 3, 4, 5],
):
"""Construct `ISeeCube`.

Expand All @@ -46,15 +46,17 @@ def __init__(
max_rel_pos: Maximum relative position for relative position bias.
num_register_tokens: The number of register tokens.
scaled_emb: Whether to scale the sinusoidal positional embeddings.
n_features: The number of features in the input data.
fourier_mapping: Mapping of the data to [x,y,z,time,charge,
auxiliary] for the FourierEncoder. Use None for missing
features.
"""
super().__init__(seq_length, hidden_dim)
self.fourier_ext = FourierEncoder(
seq_length=seq_length,
mlp_dim=mlp_dim,
output_dim=hidden_dim,
scaled=scaled_emb,
n_features=n_features,
mapping=fourier_mapping,
)
self.pos_embedding = nn.Parameter(
torch.empty(1, seq_length, hidden_dim).normal_(std=0.02),
Expand Down
Loading