chore(test): update tests for version bump to 0.1.0

LVivona · LVivona · commit aa774ad9ffb5 · 2025-04-13T17:05:47.000-04:00
diff --git a/binding/python/tests/test_buffer.py b/binding/python/tests/test_buffer.py
@@ -39,30 +39,23 @@ def encode_unsigned_variant_encoding(number: int) -> bytes:
         return number.to_bytes(1, "little")
 
 
-def encode_tensor_info(dtype: str, shape: Tuple[int, ...], offset: Tuple[int, int]) -> List[bytes]:
-    """Encodes the struct TensorInfo into byte buffer"""
+def encode_header(id: str, dtype: str, shape: Tuple[int, ...], offset: Tuple[int, int]) -> bytes:
+    """Encodes the struct TensorInfo into byte buffer with string ID prefix."""
     if dtype not in _DTYPE:
         raise ValueError(f"Unsupported dtype: {dtype}")
 
-    # flatten out the tensor info
-    layout = chain([_DTYPE[dtype], len(shape)], shape, offset)
-    return b"".join(list(map(encode_unsigned_variant_encoding, layout)))
+    encoded_id = encode_unsigned_variant_encoding(len(id)) + id.encode("utf-8")
 
-
-def encode_hash_map(index_map: Dict[str, int]) -> List[bytes]:
-    """Encodes a dictionary of string keys and integer values."""
-    length = encode_unsigned_variant_encoding(len(index_map))
-
-    hash_map_layout = chain.from_iterable(
-        (
-            encode_unsigned_variant_encoding(len(k)),
-            k.encode("utf-8"),
-            encode_unsigned_variant_encoding(v),
-        )
-        for k, v in index_map.items()
+    # Compose numeric fields
+    numeric_layout = chain(
+        [_DTYPE[dtype], len(shape)],
+        shape,
+        offset
     )
 
-    return b"".join(chain([length], hash_map_layout))
+    encoded_tensor_info = b"".join(encode_unsigned_variant_encoding(x) for x in numeric_layout)
+
+    return encoded_id + encoded_tensor_info
 
 
 def test_empty_file():
@@ -74,7 +67,7 @@ def test_empty_file():
     # header size + metadata + empty tensors
     MAX_FILE_SIZE = 8 + header_size
     assert header_size == 8, "expected packed buffer shoudl be unsinged interger 8."
-    assert buffer[8:] == b"\x00\x00\x00     ", "expected empty metadata fields."
+    assert buffer[8:] == b"\x00\x00      ", "expected empty metadata fields."
     assert MAX_FILE_SIZE == len(buffer), "These should  be equal"
 
 
@@ -87,35 +80,27 @@ def test_man_cmp():
 
     # Create tensor info buffer
     tensor_info_buffer = b"".join(
-        encode_tensor_info(
+        encode_header(
+            f"weight_{i}",
             "F32",
             shape,
             (i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
         )
         for i in range(size)
     )
-    layout_tensor_info = length + tensor_info_buffer
-
-    expected = []
-    for (start, end, step) in [(0, size, 1), (size - 1, -1, -1)]:
-        # Create hash map layout
-        hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(start, end, step)})
-
-        # Construct full layout
-        layout = b"\0" + layout_tensor_info + hash_map_layout
-        layout += b" " * (((8 - len(layout)) % 8) % 8)
-        n = len(layout)
-        n_header = n.to_bytes(8, "little")
-
-        # layout together
-        buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
-        expected.append(buffer)
+    layout = length + tensor_info_buffer
+    layout = b"\0" + layout
+    layout += b" " * (((8 - len(layout)) % 8) % 8)
+    n = len(layout)
+    n_header = n.to_bytes(8, "little")
+    
+    expected = n_header + layout + (b"\0" * tensor_chunk_length * size)
 
     tensor_dict = {"weight_0": torch.zeros(shape), "weight_1": torch.zeros(shape)}
 
     buffer = save(tensor_dict)
     # we need to check both since there is no order in the hashmap
-    assert buffer in expected, f"got {buffer}, and expected {expected}"
+    assert buffer == expected, f"got {buffer}, and expected {expected}"
 
 
 def test_missmatch_length_of_metadata_large():
@@ -127,28 +112,22 @@ def test_missmatch_length_of_metadata_large():
 
     # Create tensor info buffer
     tensor_info_buffer = b"".join(
-        encode_tensor_info(
+        encode_header(
+            f"weight_{i}",
             "F32",
             shape,
             (i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
         )
         for i in range(size)
     )
-    layout_tensor_info = length + tensor_info_buffer
-
-    expected = [0] * 2
-
-    # Create hash map layout
-    hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})
-
-    # Construct full layout
-    layout = b"\0" + layout_tensor_info + hash_map_layout
+    layout = length + tensor_info_buffer
+    layout = b"\0" + layout
     layout += b" " * (((8 - len(layout)) % 8) % 8)
     n = len(layout)
     n_header = n.to_bytes(8, "little")
-
+    
     # layout together
-    buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
+    buffer = n_header + layout + b"\0" * (tensor_chunk_length * size)
 
     with pytest.raises(BintensorError):
         # this is not a valid since the metadata
@@ -165,70 +144,25 @@ def test_missmatch_length_of_metadata_small():
 
     # Create tensor info buffer
     tensor_info_buffer = b"".join(
-        encode_tensor_info(
+        encode_header(
+            f"weight_{i}",
             "F32",
             shape,
             (i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
         )
         for i in range(size)
     )
-    layout_tensor_info = length + tensor_info_buffer
-
-    # Create hash map layout
-    hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})
-
-    # Construct full layout
-    layout = b"\0" + layout_tensor_info + hash_map_layout
+    layout = length + tensor_info_buffer
+    layout = b"\0" + layout
     layout += b" " * (((8 - len(layout)) % 8) % 8)
     n = len(layout)
     n_header = n.to_bytes(8, "little")
 
     # layout together
-    buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
+    buffer = n_header + layout + b"\0" * (tensor_chunk_length * size)
 
     with pytest.raises(BintensorError):
         # this is not a valid since the metadata
         # size doe not match as it too big
         _ = load(buffer)
 
-
-def test_missmatch_length_of_metadata():
-    size = 2
-    shape = (2, 2)
-    tensor_chunk_length = shape[0] * shape[1] * 4  # Size of a tensor buffer
-
-    # convert usize or unsigned long long into variant encoding
-    length = encode_unsigned_variant_encoding(size * 1000)
-
-    # Create tensor info byte buffer
-    tensor_info_buffer = b"".join(
-        encode_tensor_info(
-            "F32",
-            shape,
-            (i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
-        )
-        for i in range(size)
-    )
-    layout_tensor_info = length + tensor_info_buffer
-
-    # Create hash map layout
-    hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})
-
-    # Construct full layout
-    # metadata empty + tensor_info + hash_map_index_map
-    layout = b"\0" + layout_tensor_info + hash_map_layout
-
-    # empty padding
-    layout += b" " * (((8 - len(layout)) % 8) % 8)
-    n = len(layout)
-
-    # size of full header (metadata + tensors info + index map)
-    n_header = n.to_bytes(8, "little")
-
-    # layout together into buffer
-    buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
-
-    with pytest.raises(BintensorError):
-        # this is not a valid since the metadata
-        # size doe not match as it too big
-        _ = load(buffer)
diff --git a/binding/python/tests/test_np.py b/binding/python/tests/test_np.py
@@ -1,11 +1,10 @@
 import pytest
 
-import os
 import tempfile
 import numpy as np
 
 from typing import Dict, Tuple
-from bintensors.numpy import load, load_file, save, save_file, safe_open
+from bintensors.numpy import load, load_file, save, save_file, safe_open, save_with_checksum
 
 
 def _compare_np_array(lhs: np.ndarray, rhs: np.ndarray) -> bool:
@@ -139,3 +138,36 @@ def test_safe_open_access_with_metadata():
             assert model.get_tensor("h.0.ln_1.weight") is not None
             assert model.get_tensor("h.0.ln_1.bias") is not None
             assert model.metadata()["hello"] == "world"
+
+
+def test_checksum_two_diffrent_models():
+    model_1 = { "ln.weight" : np.random.random((10,10)), "ln.bias" : np.random.random((10)) }
+    model_2 = { "ln.weight" : np.random.random((10,10)), "ln.bias" : np.random.random((10)) }
+
+    checksum1, _ = save_with_checksum(model_1)
+    checksum2, _ = save_with_checksum(model_2)
+
+    assert checksum1 != checksum2, "These checksum are not equivilent"
+
+
+def test_checksum_two_same_models():
+    model_1 = { "ln.weight" : np.zeros((2,2)), "ln.bias" : np.zeros((10)) }
+    model_2 = { "ln.weight" : np.zeros((2,2)), "ln.bias" : np.zeros((10)) }
+
+    for _ in range(1000):
+        checksum1, _ = save_with_checksum(model_1)
+        checksum2, _ = save_with_checksum(model_2)
+        assert checksum1 == checksum2, "These checksum are equivilent"
+
+
+def test_checksum_two_same_models_with_diffrent_framework():
+    import torch
+    from bintensors.torch import save_with_checksum as save_with_checksum_pt
+    model_1 = { "ln.weight" : np.zeros((2,2), dtype=np.float32), "ln.bias" : np.zeros((10), dtype=np.float32) }
+    model_2 = { "ln.weight" : torch.zeros((2,2), dtype=torch.float32), "ln.bias" : torch.zeros((10), dtype=torch.float32) }
+
+    for _ in range(1000):
+        checksum1, _ = save_with_checksum(model_1)
+        checksum2, _ = save_with_checksum_pt(model_2)
+        assert checksum1 == checksum2, "These checksum are equivilent"
+
diff --git a/binding/python/tests/test_pt.py b/binding/python/tests/test_pt.py
@@ -5,7 +5,7 @@
 import torch
 
 from typing import Dict, Tuple
-from bintensors.torch import load, save, save_file, load_file, safe_open
+from bintensors.torch import load, save, save_file, load_file, safe_open, save_with_checksum
 
 
 def _compare_torch_tensors(lhs: torch.Tensor, rhs: torch.Tensor) -> bool:
@@ -124,3 +124,23 @@ def test_pt_safe_open_access_and_metadata():
             assert model.get_tensor("h.0.ln_1.weight") is not None
             assert model.get_tensor("h.0.ln_1.bias") is not None
             assert model.metadata() is None
+
+
+def test_checksum_two_diffrent_models():
+    model_1 = { "ln.weight" : torch.rand((10,10)), "ln.bias" : torch.rand((10)) }
+    model_2 = { "ln.weight" : torch.rand((10,10)), "ln.bias" : torch.rand((10)) }
+
+    checksum1, _ = save_with_checksum(model_1)
+    checksum2, _ = save_with_checksum(model_2)
+
+    assert checksum1 != checksum2, "These checksum are not equivilent"
+
+
+def test_checksum_two_same_models():
+    model_1 = { "ln.weight" : torch.zeros((2,2)), "ln.bias" : torch.zeros((10)) }
+    model_2 = { "ln.weight" : torch.zeros((2,2)), "ln.bias" : torch.zeros((10)) }
+
+    for _ in range(1000):
+        checksum1, _ = save_with_checksum(model_1)
+        checksum2, _ = save_with_checksum(model_2)
+        assert checksum1 == checksum2, "These checksum are equivilent"