format

jkminder · jkminder · commit e3b8993ac464 · 2025-06-11T12:12:08.000Z
diff --git a/dictionary_learning/dictionary.py b/dictionary_learning/dictionary.py
@@ -348,7 +348,13 @@ def from_pretrained(
 
 
 class BatchTopKSAE(Dictionary, nn.Module):
-    def __init__(self, activation_dim: int, dict_size: int, k: int, activation_normalizer: ActivationNormalizer | None = None):
+    def __init__(
+        self,
+        activation_dim: int,
+        dict_size: int,
+        k: int,
+        activation_normalizer: ActivationNormalizer | None = None,
+    ):
         super().__init__()
         self.activation_dim = activation_dim
         self.dict_size = dict_size
@@ -371,7 +377,11 @@ def __init__(self, activation_dim: int, dict_size: int, k: int, activation_norma
         self.b_dec = nn.Parameter(th.zeros(activation_dim))
 
     def encode(
-        self, x: th.Tensor, return_active: bool = False, use_threshold: bool = True, normalize_activations: bool = True
+        self,
+        x: th.Tensor,
+        return_active: bool = False,
+        use_threshold: bool = True,
+        normalize_activations: bool = True,
     ):
         if normalize_activations:
             x = self.normalize_activations(x)
@@ -405,7 +415,12 @@ def normalize_activations(self, x: th.Tensor) -> th.Tensor:
             return self.activation_normalizer(x)
         return x
 
-    def forward(self, x: th.Tensor, output_features: bool = False, normalize_activations: bool = True):
+    def forward(
+        self,
+        x: th.Tensor,
+        output_features: bool = False,
+        normalize_activations: bool = True,
+    ):
         encoded_acts_BF = self.encode(x, normalize_activations=normalize_activations)
         x_hat_BD = self.decode(encoded_acts_BF)
 
@@ -983,7 +998,6 @@ def from_pretrained(
                 )
         num_layers, activation_dim, dict_size = state_dict["encoder.weight"].shape
 
-
         crosscoder = cls(
             activation_dim,
             dict_size,
@@ -1093,7 +1107,7 @@ def encode(
 
         Returns:
             If return_active is False: encoded features tensor
-            If return_active is True: tuple of (features, scaled_features, active_mask, 
+            If return_active is True: tuple of (features, scaled_features, active_mask,
                                                post_relu_features, post_relu_scaled_features)
         """
         if normalize_activations:
@@ -1151,7 +1165,7 @@ def encode_decoupled(
 
         Returns:
             If return_active is False: encoded features tensor of shape (batch_size, num_layers, dict_size)
-            If return_active is True: tuple of (features, scaled_features, active_mask, 
+            If return_active is True: tuple of (features, scaled_features, active_mask,
                                                post_relu_features, post_relu_scaled_features)
 
         Raises:
@@ -1228,7 +1242,12 @@ def encode_decoupled(
             return f
 
     def get_activations(
-        self, x: th.Tensor, use_threshold: bool = True, select_features=None, normalize_activations: bool = True, **kwargs
+        self,
+        x: th.Tensor,
+        use_threshold: bool = True,
+        select_features=None,
+        normalize_activations: bool = True,
+        **kwargs,
     ):
         """
         Get scaled feature activations for the input.
@@ -1314,7 +1333,7 @@ def from_pretrained(
             ), f"k in kwargs ({kwargs['k']}) does not match k in state_dict ({state_dict['k']})"
             kwargs.pop("k")
         kwargs.update()
-       
+
         crosscoder = cls(
             activation_dim,
             dict_size,
diff --git a/tests/test_cache.py b/tests/test_cache.py
@@ -8,6 +8,7 @@
 from dictionary_learning.cache import ActivationCache
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
+
 @pytest.fixture
 def temp_dir():
     """Create a temporary directory for test files."""
@@ -149,38 +150,40 @@ def test_activation_cache_with_normalizer(temp_dir):
     """Test ActivationCache collection and normalizer against direct model activations."""
     # Set flag to handle meta tensors properly
     th.fx.experimental._config.meta_nonzero_assume_all_nonzero = True
-    
+
     # Skip test if CUDA not available to avoid device mapping issues
     if not th.cuda.is_available():
         pytest.skip("CUDA not available, skipping test to avoid device mapping issues")
-    
+
     # Test strings
     test_strings = [
         "The quick brown fox jumps over the lazy dog.",
         "Machine learning is a subset of artificial intelligence.",
         "Python is a popular programming language for data science.",
         "Neural networks are inspired by biological brain structures.",
-        "Deep learning has revolutionized computer vision and natural language processing."
+        "Deep learning has revolutionized computer vision and natural language processing.",
     ]
-    
+
     # Use the list directly - it already implements __len__ and __getitem__
     dataset = test_strings
-    
+
     # Load GPT-2 model - use auto device mapping but force concrete tensors
     tokenizer = AutoTokenizer.from_pretrained("gpt2")
-    model = AutoModelForCausalLM.from_pretrained("gpt2", device_map="auto", torch_dtype=th.float32)
+    model = AutoModelForCausalLM.from_pretrained(
+        "gpt2", device_map="auto", torch_dtype=th.float32
+    )
     model = LanguageModel(model, torch_dtype=th.float32, tokenizer=tokenizer)
     model.tokenizer.pad_token = model.tokenizer.eos_token
 
     # Get a transformer block to extract activations from
     target_layer = model.transformer.h[6]  # Middle layer of GPT-2
     submodule_name = "transformer_h_6"
-    
+
     # Parameters for activation collection
     batch_size = 2
     context_len = 64
     d_model = 768  # GPT-2 hidden size
-    
+
     # Collect activations using ActivationCache
     ActivationCache.collect(
         data=dataset,
@@ -197,17 +200,16 @@ def test_activation_cache_with_normalizer(temp_dir):
         store_tokens=True,
     )
 
-    
     # Load the cached activations
     cache = ActivationCache(temp_dir, submodule_name + "_out")
-    
+
     # Collect activations directly from model for comparison
     direct_activations = []
     direct_tokens = []
-    
+
     for i in range(0, len(test_strings), batch_size):
-        batch_texts = test_strings[i:i+batch_size]
-        
+        batch_texts = test_strings[i : i + batch_size]
+
         # Tokenize
         tokens = model.tokenizer(
             batch_texts,
@@ -217,60 +219,84 @@ def test_activation_cache_with_normalizer(temp_dir):
             padding=True,
             add_special_tokens=True,
         )
-        
+
         # Get activations directly
         with model.trace(tokens):
             layer_output = target_layer.output[0].save()
-        
+
         # Extract valid tokens (non-padding)
         attention_mask = tokens["attention_mask"]
-        valid_activations = layer_output.reshape(-1, d_model)[attention_mask.reshape(-1).bool()]
-        valid_tokens = tokens["input_ids"].reshape(-1)[attention_mask.reshape(-1).bool()]
-        
+        valid_activations = layer_output.reshape(-1, d_model)[
+            attention_mask.reshape(-1).bool()
+        ]
+        valid_tokens = tokens["input_ids"].reshape(-1)[
+            attention_mask.reshape(-1).bool()
+        ]
+
         direct_activations.append(valid_activations.cpu())
         direct_tokens.append(valid_tokens.cpu())
-    
+
     # Concatenate direct activations
     direct_activations = th.cat(direct_activations, dim=0)
     direct_tokens = th.cat(direct_tokens, dim=0)
-    
+
     # Test that we have the same number of activations
-    assert len(cache) == direct_activations.shape[0], f"Cache length {len(cache)} != direct activations length {direct_activations.shape[0]}"
-    
+    assert (
+        len(cache) == direct_activations.shape[0]
+    ), f"Cache length {len(cache)} != direct activations length {direct_activations.shape[0]}"
+
     # Test that tokens match
-    assert th.equal(cache.tokens, direct_tokens), "Cached tokens don't match direct tokens"
-    
+    assert th.equal(
+        cache.tokens, direct_tokens
+    ), "Cached tokens don't match direct tokens"
+
     # Test that activations match (within tolerance for numerical precision)
     cached_activations = th.stack([cache[i] for i in range(len(cache))], dim=0)
-    assert th.allclose(cached_activations, direct_activations, atol=1e-5, rtol=1e-5), "Cached activations don't match direct activations"
-    
+    assert th.allclose(
+        cached_activations, direct_activations, atol=1e-5, rtol=1e-5
+    ), "Cached activations don't match direct activations"
+
     # Test mean and std computation
     computed_mean = direct_activations.mean(dim=0)
     computed_std = direct_activations.std(dim=0, unbiased=True)
-    
-    assert th.allclose(cache.mean, computed_mean, atol=1e-5, rtol=1e-5), "Cached mean doesn't match computed mean"
-    assert th.allclose(cache.std, computed_std, atol=1e-5, rtol=1e-5), "Cached std doesn't match computed std"
-    
+
+    assert th.allclose(
+        cache.mean, computed_mean, atol=1e-5, rtol=1e-5
+    ), "Cached mean doesn't match computed mean"
+    assert th.allclose(
+        cache.std, computed_std, atol=1e-5, rtol=1e-5
+    ), "Cached std doesn't match computed std"
+
     # Test normalizer functionality
     normalizer = cache.normalizer
-    
+
     # Test normalization of a sample activation
     sample_activation = cached_activations[0]
     normalized = normalizer(sample_activation)
-    
+
     # Verify normalization: (x - mean) / std (with small epsilon for numerical stability)
     expected_normalized = (sample_activation - cache.mean) / (cache.std + 1e-8)
-    assert th.allclose(normalized, expected_normalized, atol=1e-6), "Normalizer doesn't work correctly"
-    
+    assert th.allclose(
+        normalized, expected_normalized, atol=1e-6
+    ), "Normalizer doesn't work correctly"
+
     # Test batch normalization
     batch_normalized = normalizer(cached_activations[:5])
-    expected_batch_normalized = (cached_activations[:5] - cache.mean) / (cache.std + 1e-8)
-    assert th.allclose(batch_normalized, expected_batch_normalized, atol=1e-6), "Batch normalization doesn't work correctly"
-    
+    expected_batch_normalized = (cached_activations[:5] - cache.mean) / (
+        cache.std + 1e-8
+    )
+    assert th.allclose(
+        batch_normalized, expected_batch_normalized, atol=1e-6
+    ), "Batch normalization doesn't work correctly"
+
     # Test that normalization preserves shape
-    assert normalized.shape == sample_activation.shape, "Normalization changed tensor shape"
-    assert batch_normalized.shape == cached_activations[:5].shape, "Batch normalization changed tensor shape"
-    
+    assert (
+        normalized.shape == sample_activation.shape
+    ), "Normalization changed tensor shape"
+    assert (
+        batch_normalized.shape == cached_activations[:5].shape
+    ), "Batch normalization changed tensor shape"
+
     print(f"✓ Successfully tested ActivationCache with {len(cache)} activations")
     print(f"✓ Mean shape: {cache.mean.shape}, Std shape: {cache.std.shape}")
     print(f"✓ Normalizer tests passed")
diff --git a/tests/test_running_stat_welford.py b/tests/test_running_stat_welford.py
@@ -7,7 +7,7 @@
 
 
 @pytest.mark.parametrize("dtype", [torch.float32, torch.float16])
-@pytest.mark.parametrize("D", [5, 127])          # feature dimensionalities
+@pytest.mark.parametrize("D", [5, 127])  # feature dimensionalities
 def test_streaming_matches_reference(dtype, D):
     """
     Stream random data through RunningStatWelford in random-sized batches
@@ -30,11 +30,11 @@ def test_streaming_matches_reference(dtype, D):
 
     # Ground-truth (double precision to remove numeric noise)
     ref_mean = full.double().mean(dim=0)
-    ref_std  = full.double().std(dim=0, unbiased=True)
+    ref_std = full.double().std(dim=0, unbiased=True)
 
     # Compare
     torch.testing.assert_close(acc.mean, ref_mean, rtol=1e-6, atol=1e-7)
-    torch.testing.assert_close(acc.std(),  ref_std,  rtol=1e-6, atol=1e-7)
+    torch.testing.assert_close(acc.std(), ref_std, rtol=1e-6, atol=1e-7)
     assert acc.n == N_total
 
 
@@ -60,10 +60,10 @@ def test_merge_two_accumulators():
 
     # Reference
     ref_mean = data.double().mean(dim=0)
-    ref_std  = data.double().std(dim=0, unbiased=True)
+    ref_std = data.double().std(dim=0, unbiased=True)
 
     torch.testing.assert_close(acc1.mean, ref_mean, rtol=1e-6, atol=1e-7)
-    torch.testing.assert_close(acc1.std(),  ref_std,  rtol=1e-6, atol=1e-7)
+    torch.testing.assert_close(acc1.std(), ref_std, rtol=1e-6, atol=1e-7)
     assert acc1.n == N_total
 
 
@@ -87,4 +87,6 @@ def test_edge_cases():
     acc.update(torch.tensor([[2.0, 4.0, 6.0]], dtype=dtype))
     assert acc.n == 2
     torch.testing.assert_close(acc.mean, torch.tensor([1.5, 3.0, 4.5], dtype=dtype))
-    torch.testing.assert_close(acc.std(), torch.tensor([0.70710678, 1.41421356, 2.12132034], dtype=dtype))
+    torch.testing.assert_close(
+        acc.std(), torch.tensor([0.70710678, 1.41421356, 2.12132034], dtype=dtype)
+    )