Enhance ActivationCache to support sequence range tracking based on presence of BOS token

jkminder · jkminder · commit f8343dffd7fb · 2025-06-22T17:11:27.000+02:00
This commit introduces the following changes:

- Added a `_sequence_ranges` attribute to the `ActivationCache` class to store sequence start indices.
- Implemented a `sequence_ranges` property that loads sequence ranges from a file if they exist and are configured to be stored.
- Updated the `collect` method to track and store sequence ranges when the model does not have a beginning-of-sequence (BOS) token.
- Added assertions to ensure the integrity of sequence ranges during activation collection.
- Introduced tests to verify the correct behavior of sequence range storage when the model has and does not have a BOS token.

These modifications improve the functionality of the `ActivationCache` by allowing it to handle sequence ranges more effectively, enhancing the overall clarity and maintainability of the code.
diff --git a/dictionary_learning/cache.py b/dictionary_learning/cache.py
@@ -269,6 +269,7 @@ def __init__(self, store_dir: str, submodule_name: str = None):
                 os.path.join(store_dir, "tokens.pt"), weights_only=True
             ).cpu()
 
+        self._sequence_ranges = None
         self._mean = None
         self._std = None
 
@@ -322,6 +323,23 @@ def __getitem__(self, index: int):
     def tokens(self):
         return self._tokens
 
+    @property
+    def sequence_ranges(self):
+        if hasattr(self, '_sequence_ranges') and self._sequence_ranges is not None:
+            return self._sequence_ranges
+        
+        if ("store_sequence_ranges" in self.config and 
+            self.config["store_sequence_ranges"] and
+            os.path.exists(os.path.join(self._cache_store_dir, "..", "sequence_ranges.pt"))):
+            self._sequence_ranges = th.load(
+                os.path.join(self._cache_store_dir, "..", "sequence_ranges.pt"), 
+                weights_only=True
+            ).cpu()
+            return self._sequence_ranges
+        else:
+            # Return None if sequence ranges not available
+            return None
+
     @staticmethod
     def get_activations(submodule: nn.Module, io: str):
         if io == "in":
@@ -434,17 +452,25 @@ def exists(
             cached data is present and num_tokens is the total number of tokens in the cache
         """
         num_tokens = 0
+        config = None
         for submodule_name in submodule_names:
-            if not os.path.exists(
-                os.path.join(store_dir, f"{submodule_name}_{io}", "config.json")
-            ):
+            config_path = os.path.join(store_dir, f"{submodule_name}_{io}", "config.json")
+            if not os.path.exists(config_path):
                 return False, 0
-            with open(
-                os.path.join(store_dir, f"{submodule_name}_{io}", "config.json"), "r"
-            ) as f:
-                num_tokens = json.load(f)["total_size"]
+            with open(config_path, "r") as f:
+                config = json.load(f)
+                num_tokens = config["total_size"]
+        
         if store_tokens and not os.path.exists(os.path.join(store_dir, "tokens.pt")):
             return False, 0
+            
+        # Check for sequence ranges if they should exist
+        if (config and 
+            "store_sequence_ranges" in config and 
+            config["store_sequence_ranges"] and
+            not os.path.exists(os.path.join(store_dir, "sequence_ranges.pt"))):
+            return False, 0
+            
         return True, num_tokens
 
     @th.no_grad()
@@ -475,10 +501,24 @@ def collect(
         assert (
             not shuffle_shards or not store_tokens
         ), "Shuffling shards and storing tokens is not supported yet"
+        
+        # Check if we need to store sequence ranges
+        has_bos_token = model.tokenizer.bos_token_id is not None
+        store_sequence_ranges = (
+            store_tokens and 
+            not shuffle_shards and 
+            not has_bos_token
+        )
+        if store_sequence_ranges:
+            print("No BOS token found. Will store sequence ranges.")
+        
         dataloader = DataLoader(data, batch_size=batch_size, num_workers=num_workers)
 
         activation_cache = [[] for _ in submodules]
         tokens_cache = []
+        sequence_ranges_cache = []
+        current_token_position = 0  # Track position in flattened token stream
+        
         store_sub_dirs = [
             os.path.join(store_dir, f"{submodule_names[i]}_{io}")
             for i in range(len(submodules))
@@ -530,6 +570,14 @@ def collect(
             store_mask = attention_mask.clone()
             if ignore_first_n_tokens_per_sample > 0:
                 store_mask[:, :ignore_first_n_tokens_per_sample] = 0
+            
+            # Track sequence ranges if needed
+            if store_sequence_ranges:
+                batch_lengths = store_mask.sum(dim=1).tolist()
+                batch_sequence_ranges = np.cumsum([0] + batch_lengths[:-1]) + current_token_position
+                sequence_ranges_cache.extend(batch_sequence_ranges.tolist())
+                current_token_position += sum(batch_lengths)
+
             if store_tokens:
                 tokens_cache.append(
                     tokens["input_ids"].reshape(-1)[store_mask.reshape(-1).bool()]
@@ -572,7 +620,8 @@ def collect(
                     if dtype is not None:
                         activation_cache[i][-1] = activation_cache[i][-1].to(dtype)
 
-                assert len(tokens_cache[-1]) == activation_cache[0][-1].shape[0]
+                if store_tokens:
+                    assert len(tokens_cache[-1]) == activation_cache[0][-1].shape[0]
                 assert activation_cache[0][-1].shape[0] == store_mask.sum().item()
                 current_size += activation_cache[0][-1].shape[0]
             else:
@@ -639,6 +688,7 @@ def collect(
                         "total_size": total_size,
                         "shard_count": shard_count,
                         "store_tokens": store_tokens,
+                        "store_sequence_ranges": store_sequence_ranges,
                     },
                     f,
                 )
@@ -652,6 +702,16 @@ def collect(
             ), f"{tokens_cache.shape[0]} != {total_size}"
             th.save(tokens_cache, os.path.join(store_dir, "tokens.pt"))
 
+        # store sequence ranges
+        if store_sequence_ranges:
+            print("Storing sequence ranges...")
+            # add the last sequence range to the end of the cache
+            sequence_ranges_cache.append(current_token_position)
+            assert sequence_ranges_cache[-1] == total_size
+            sequence_ranges_tensor = th.tensor(sequence_ranges_cache, dtype=th.long)
+            th.save(sequence_ranges_tensor, os.path.join(store_dir, "sequence_ranges.pt"))
+            print(f"Stored {len(sequence_ranges_cache)} sequence ranges")
+
         # store running stats
         for i in range(len(submodules)):
             th.save(
@@ -685,6 +745,14 @@ def tokens(self):
             (self.activation_cache_1.tokens, self.activation_cache_2.tokens), dim=0
         )
 
+    @property
+    def sequence_ranges(self):
+        seq_starts_1 = self.activation_cache_1.sequence_ranges
+        seq_starts_2 = self.activation_cache_2.sequence_ranges
+        if seq_starts_1 is not None and seq_starts_2 is not None:
+            return th.stack((seq_starts_1, seq_starts_2), dim=0)
+        return None
+
     @property
     def mean(self):
         return th.stack(
@@ -718,6 +786,13 @@ def __getitem__(self, index: int):
     def tokens(self):
         return th.stack([cache.tokens for cache in self.activation_caches], dim=0)
 
+    @property
+    def sequence_ranges(self):
+        seq_starts_list = [cache.sequence_ranges for cache in self.activation_caches]
+        if all(seq_starts is not None for seq_starts in seq_starts_list):
+            return th.stack(seq_starts_list, dim=0)
+        return None
+
     @property
     def mean(self):
         return th.stack([cache.mean for cache in self.activation_caches], dim=0)
diff --git a/dictionary_learning/dictionary.py b/dictionary_learning/dictionary.py
@@ -465,11 +465,9 @@ def __init__(
             activation_mean: Optional mean tensor for input activation normalization. If None, no normalization is applied.
             activation_std: Optional std tensor for input activation normalization. If None, no normalization is applied.
         """
-        # First initialize the base classes that don't take normalization parameters
-        super().__init__()
+
+        super().__init__(activation_mean=activation_mean, activation_std=activation_std, activation_shape=(activation_dim,))
         
-        # Then explicitly initialize the NormalizableMixin
-        NormalizableMixin.__init__(self, activation_mean=activation_mean, activation_std=activation_std, activation_shape=(activation_dim,))
         
         self.activation_dim = activation_dim
         self.dict_size = dict_size
@@ -1036,9 +1034,7 @@ def __init__(
         """
         # First initialize the base classes that don't take normalization parameters
         super().__init__(activation_mean=activation_mean, activation_std=activation_std, activation_shape=(num_layers, activation_dim))
-        
-        # Then explicitly initialize the NormalizableMixin
-        # NormalizableMixin.__init__(
+
         
         if num_decoder_layers is None:
             num_decoder_layers = num_layers
@@ -1266,7 +1262,7 @@ def from_pretrained(
         """
         if isinstance(code_normalization, str):
             code_normalization = CodeNormalization.from_string(code_normalization)
-        if from_hub:
+        if from_hub or path.endswith(".safetensors"):
             return super().from_pretrained(path, device=device, dtype=dtype, **kwargs)
 
         state_dict = th.load(path, map_location="cpu", weights_only=True)
diff --git a/tests/test_cache.py b/tests/test_cache.py
@@ -7,7 +7,7 @@
 from nnsight import LanguageModel
 from dictionary_learning.cache import ActivationCache
 from transformers import AutoModelForCausalLM, AutoTokenizer
-
+import numpy as np
 
 @pytest.fixture
 def temp_dir():
@@ -267,36 +267,148 @@ def test_activation_cache_with_normalizer(temp_dir):
         cache.std, computed_std, atol=1e-5, rtol=1e-5
     ), "Cached std doesn't match computed std"
 
-    # Test normalizer functionality
-    normalizer = cache.normalizer
+    print(f"✓ Successfully tested ActivationCache with {len(cache)} activations")
+    print(f"✓ Mean shape: {cache.mean.shape}, Std shape: {cache.std.shape}")
+
 
-    # Test normalization of a sample activation
-    sample_activation = cached_activations[0]
-    normalized = normalizer(sample_activation)
+def test_sequence_ranges_no_bos_token(temp_dir):
+    """Test that sequence ranges are stored when model has no BOS token."""
+    # Set flag to handle meta tensors properly
+    if hasattr(th.fx, 'experimental'):
+        th.fx.experimental._config.meta_nonzero_assume_all_nonzero = True
 
-    # Verify normalization: (x - mean) / std (with small epsilon for numerical stability)
-    expected_normalized = (sample_activation - cache.mean) / (cache.std + 1e-8)
-    assert th.allclose(
-        normalized, expected_normalized, atol=1e-6
-    ), "Normalizer doesn't work correctly"
+    # Skip test if CUDA not available
+    if not th.cuda.is_available():
+        pytest.skip("CUDA not available, skipping test")
+
+    # Test strings of different lengths
+    test_strings = [
+        "Hello world",
+        "This is a longer sentence with more tokens",
+        "Short",
+        "Medium length text here",
+    ]
 
-    # Test batch normalization
-    batch_normalized = normalizer(cached_activations[:5])
-    expected_batch_normalized = (cached_activations[:5] - cache.mean) / (
-        cache.std + 1e-8
+    # Load GPT-2 model and modify tokenizer to simulate no BOS token
+    tokenizer = AutoTokenizer.from_pretrained("gpt2")
+    model = AutoModelForCausalLM.from_pretrained(
+        "gpt2", device_map="auto", torch_dtype=th.float32
     )
-    assert th.allclose(
-        batch_normalized, expected_batch_normalized, atol=1e-6
-    ), "Batch normalization doesn't work correctly"
+    model = LanguageModel(model, torch_dtype=th.float32, tokenizer=tokenizer)
+    model.tokenizer.pad_token = model.tokenizer.eos_token
+    
+    # Simulate model without BOS token
+    original_bos_token_id = model.tokenizer.bos_token_id
+    model.tokenizer.bos_token_id = None
+
+    tokens = model.tokenizer(test_strings, add_special_tokens=True, return_tensors="pt", padding=True, truncation=True)
+    lengths = tokens["attention_mask"].sum(dim=1).tolist()
+    ranges = np.cumsum([0] + lengths)
+    try:
+        # Get a transformer block
+        target_layer = model.transformer.h[6]
+        submodule_name = "transformer_h_6"
+
+        # Parameters for activation collection
+        batch_size = 2
+        context_len = 32
+        d_model = 768
+
+        # Collect activations with sequence start tracking
+        ActivationCache.collect(
+            data=test_strings,
+            submodules=(target_layer,),
+            submodule_names=(submodule_name,),
+            model=model,
+            store_dir=temp_dir,
+            batch_size=batch_size,
+            context_len=context_len,
+            shard_size=1000,
+            d_model=d_model,
+            io="out",
+            store_tokens=True,
+            shuffle_shards=False,  # Required for sequence ranges
+        )
 
-    # Test that normalization preserves shape
-    assert (
-        normalized.shape == sample_activation.shape
-    ), "Normalization changed tensor shape"
-    assert (
-        batch_normalized.shape == cached_activations[:5].shape
-    ), "Batch normalization changed tensor shape"
+        # Load the cached activations
+        cache = ActivationCache(temp_dir, submodule_name + "_out")
+
+        # Verify sequence ranges were stored
+        sequence_ranges = cache.sequence_ranges
+        assert sequence_ranges is not None, "sequence ranges should be stored for model without BOS token"
+        
+        # Should have one sequence start per input string plus one for the last sequence
+        assert len(sequence_ranges) == len(test_strings) + 1, f"Expected {len(test_strings)} sequence ranges, got {len(sequence_ranges)}"
+        
+        # First sequence should start at position 0
+        assert sequence_ranges[0].item() == 0, "First sequence should start at position 0"
+
+        # sequence ranges should be the same as the ranges computed from the tokens
+        assert np.allclose(sequence_ranges, ranges), "sequence ranges should be the same as the ranges computed from the tokens"
+        
+        # sequence ranges should be in ascending order
+        for i in range(1, len(sequence_ranges)):
+            assert sequence_ranges[i] > sequence_ranges[i-1], f"sequence ranges should be ascending: {sequence_ranges}"
+
+        # Verify sequence ranges align with token boundaries
+        tokens = cache.tokens
+        total_tokens = len(tokens)
+        
+        # All sequence ranges should be valid indices
+        for start_idx in sequence_ranges:
+            assert 0 <= start_idx <= total_tokens, f"Invalid sequence start index: {start_idx}"
+
+    finally:
+        # Restore original BOS token
+        model.tokenizer.bos_token_id = original_bos_token_id
+
+
+def test_sequence_ranges_with_bos_token(temp_dir):
+    """Test that sequence ranges are NOT stored when model has BOS token."""
+    # Set flag to handle meta tensors properly
+    if hasattr(th.fx, 'experimental'):
+        th.fx.experimental._config.meta_nonzero_assume_all_nonzero = True
 
-    print(f"✓ Successfully tested ActivationCache with {len(cache)} activations")
-    print(f"✓ Mean shape: {cache.mean.shape}, Std shape: {cache.std.shape}")
-    print(f"✓ Normalizer tests passed")
+    # Skip test if CUDA not available
+    if not th.cuda.is_available():
+        pytest.skip("CUDA not available, skipping test")
+
+    test_strings = ["Hello world", "Another test sentence"]
+
+    # Load GPT-2 model with BOS token
+    tokenizer = AutoTokenizer.from_pretrained("gpt2")
+    model = AutoModelForCausalLM.from_pretrained(
+        "gpt2", device_map="auto", torch_dtype=th.float32
+    )
+    model = LanguageModel(model, torch_dtype=th.float32, tokenizer=tokenizer)
+    model.tokenizer.pad_token = model.tokenizer.eos_token
+    
+    # Ensure model has BOS token (set it explicitly)
+    model.tokenizer.bos_token_id = model.tokenizer.eos_token_id
+
+    # Get a transformer block
+    target_layer = model.transformer.h[6]
+    submodule_name = "transformer_h_6"
+
+    # Collect activations
+    ActivationCache.collect(
+        data=test_strings,
+        submodules=(target_layer,),
+        submodule_names=(submodule_name,),
+        model=model,
+        store_dir=temp_dir,
+        batch_size=2,
+        context_len=32,
+        shard_size=1000,
+        d_model=768,
+        io="out",
+        store_tokens=True,
+        shuffle_shards=False,
+    )
+
+    # Load the cached activations
+    cache = ActivationCache(temp_dir, submodule_name + "_out")
+
+    # Verify sequence ranges were NOT stored
+    sequence_ranges = cache.sequence_ranges
+    assert sequence_ranges is None, "sequence ranges should not be stored for model with BOS token"