Updating hyrax_cnn so that the first linear layer is dynamic based on input image size. Removing superfluous testing notebook. (#386)

drewoldag · web-flow · commit 56ae5fc3bbbb · 2025-08-18T12:08:46.000-07:00
diff --git a/src/hyrax/data_sets/random_dataset_testing.ipynb b/src/hyrax/data_sets/random_dataset_testing.ipynb
diff --git a/src/hyrax/hyrax_default_config.toml b/src/hyrax/hyrax_default_config.toml
@@ -95,6 +95,12 @@ latent_dim = 64
 final_layer = "tanh"
 
 
+[model.hyrax_cnn]
+# The number of classes to predict as the output of the model. i.e. 2 would be a
+# binary classifer, 10 would predict the 10 classes in the CiFAR dataset.
+output_classes = 10
+
+
 [criterion]
 # The name of the built-in criterion to use or the import path to an external criterion
 name = "torch.nn.CrossEntropyLoss"
diff --git a/src/hyrax/models/hyrax_cnn.py b/src/hyrax/models/hyrax_cnn.py
@@ -19,16 +19,46 @@ class HyraxCNN(nn.Module):
     This CNN is designed to work with datasets that are prepared with Hyrax's HSC Data Set class.
     """
 
-    def __init__(self, config, shape):
+    def __init__(self, config, shape=(3, 32, 32)):
         super().__init__()
-        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.config = config
+
+        self.num_input_channels, self.image_width, self.image_height = shape
+        hidden_channels_1 = 6
+        hidden_channels_2 = 16
+
+        # Calculate how much our convolutional layers and pooling will affect
+        # the size of final convolution.
+        #
+        # If the number of layers are changed this will need to be rewritten.
+        conv1_end_w = self.conv2d_output_size(self.image_width, kernel_size=5)
+        conv1_end_h = self.conv2d_output_size(self.image_height, kernel_size=5)
+
+        pool1_end_w = self.pool2d_output_size(conv1_end_w, kernel_size=2, stride=2)
+        pool1_end_h = self.pool2d_output_size(conv1_end_h, kernel_size=2, stride=2)
+
+        conv2_end_w = self.conv2d_output_size(pool1_end_w, kernel_size=5)
+        conv2_end_h = self.conv2d_output_size(pool1_end_h, kernel_size=5)
+
+        pool2_end_w = self.pool2d_output_size(conv2_end_w, kernel_size=2, stride=2)
+        pool2_end_h = self.pool2d_output_size(conv2_end_h, kernel_size=2, stride=2)
+
+        self.conv1 = nn.Conv2d(self.num_input_channels, hidden_channels_1, 5)
         self.pool = nn.MaxPool2d(2, 2)
-        self.conv2 = nn.Conv2d(6, 16, 5)
-        self.fc1 = nn.Linear(16 * 5 * 5, 120)
+        self.conv2 = nn.Conv2d(hidden_channels_1, hidden_channels_2, 5)
+        self.fc1 = nn.Linear(hidden_channels_2 * pool2_end_h * pool2_end_w, 120)
         self.fc2 = nn.Linear(120, 84)
-        self.fc3 = nn.Linear(84, 10)
+        self.fc3 = nn.Linear(84, self.config["model"]["hyrax_cnn"]["output_classes"])
 
-        self.config = config
+    def conv2d_output_size(self, input_size, kernel_size, padding=0, stride=1, dilation=1) -> int:
+        # From https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
+        numerator = input_size + 2 * padding - dilation * (kernel_size - 1) - 1
+        return int((numerator / stride) + 1)
+
+    def pool2d_output_size(self, input_size, kernel_size, stride, padding=0, dilation=1) -> int:
+        # From https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html
+        numerator = input_size + 2 * padding - dilation * (kernel_size - 1) - 1
+        return int((numerator / stride) + 1)
 
     def forward(self, x):
         # This check is inefficient - we assume that the example CNN will be primarily