feat(test): add Python test for binding to improve CI (#7)

LVivona · web-flow · commit 48055f464f65 · 2025-04-07T17:22:37.000-04:00
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
@@ -87,8 +87,8 @@ jobs:
           black --check --line-length 119 --target-version py35 py/bintensors
 
     # TODO: uncomment this after adding formal pytest
-    #   - name: Run tests
-    #     run: |
-    #       cargo test
-    #       pip install .[testing]
-    #       pytest -sv tests/
+      - name: Run tests
+        run: |
+          cargo test
+          pip install .[testing]
+          pytest -sv tests/
diff --git a/binding/python/makefile b/binding/python/makefile
@@ -0,0 +1,56 @@
+.PHONY: help clean test lint lint-check format format-dir
+
+# Define default Python and pip executables
+PYTHON ?= python
+PIP ?= pip
+PYTEST ?= pytest
+BLACK ?= black
+BLACK_OPTS ?= --line-length 119 --target-version py35
+
+# Source directories
+SRC_DIRS ?= py/bintensors tests
+
+help:
+	@echo "Available make targets:"
+	@echo "  help      - Show this help message"
+	@echo "  clean     - Remove build artifacts and cache files"
+	@echo "  test      - Run all tests"
+	@echo "  lint      - Run Black lint check on all source files"
+	@echo "  check     - Run Black lint check without modifying files"
+	@echo "  format    - Format all source files with Black"
+	@echo "  format-dir DIR=path/to/dir - Format files in specific directory"
+
+clean:
+	rm -rf build/ dist/ *.egg-info/ .pytest_cache/ .coverage htmlcov/ .eggs/
+	find . -type d -name __pycache__ -exec rm -rf {} +
+	find . -type f -name "*.pyc" -delete
+
+test:
+	$(PYTEST) -sv tests/
+
+lint: 
+	$(BLACK) $(BLACK_OPTS) $(SRC_DIRS)
+
+check:
+	$(BLACK) $(BLACK_OPTS) --check $(SRC_DIRS)
+
+format: lint
+
+format-dir:
+	@if [ -z "$(DIR)" ]; then \
+		echo "Error: DIR parameter is required. Usage: make format-dir DIR=path/to/dir"; \
+		exit 1; \
+	fi
+	@if [ ! -d "$(DIR)" ]; then \
+		echo "Error: Directory '$(DIR)' does not exist"; \
+		exit 1; \
+	fi
+	$(BLACK) $(BLACK_OPTS) "$(DIR)"
+
+# Install development dependencies
+install-dev:
+	$(PIP) install -e ".[dev]"
+
+# Build the package
+build:
+	$(PIP) install .
diff --git a/binding/python/py/bintensors/numpy.py b/binding/python/py/bintensors/numpy.py
@@ -178,9 +178,6 @@ def load_file(filename: Union[str, os.PathLike]) -> Dict[str, np.ndarray]:
 
 
 # np.float8 formats require 2.1; we do not support these dtypes on earlier versions
-_float8_e4m3fn = getattr(np, "float8_e4m3fn", None)
-_float8_e5m2 = getattr(np, "float8_e5m2", None)
-
 _TYPES = {
     "F64": np.float64,
     "F32": np.float32,
@@ -193,9 +190,7 @@ def load_file(filename: Union[str, os.PathLike]) -> Dict[str, np.ndarray]:
     "U16": np.uint16,
     "I8": np.int8,
     "U8": np.uint8,
-    "BOOL": np.bool,
-    "F8_E4M3": _float8_e4m3fn,
-    "F8_E5M2": _float8_e5m2,
+    "BOOL": bool,
 }
 
 
diff --git a/binding/python/pyproject.toml b/binding/python/pyproject.toml
@@ -41,11 +41,10 @@ quality = [
 ]
 testing = [
     "bintensors[numpy]",
-    "h5py>=3.7.0",
+    "bintensors[torch]",
     "setuptools_rust>=1.5.2",
     "pytest>=7.2.0",
     "pytest-benchmark>=4.0.0",
-    # "python-afl>=0.7.3",
     "hypothesis>=6.70.2",
 ]
 all = [
diff --git a/binding/python/tests/test_buffer.py b/binding/python/tests/test_buffer.py
@@ -0,0 +1,234 @@
+import pytest
+import struct
+
+import torch
+from typing import List, Dict, Tuple
+from itertools import chain
+
+from bintensors import BintensorError
+from bintensors.torch import save, load
+
+_DTYPE = {
+    "BOL": 0,
+    "U8": 1,
+    "I8": 2,
+    "F8_E5M2": 3,
+    "F8_E4M3": 4,
+    "I16": 5,
+    "U16": 6,
+    "F16": 7,
+    "BF16": 8,
+    "I32": 9,
+    "U32": 10,
+    "F32": 11,
+    "F64": 12,
+    "I64": 13,
+    "F64": 14,
+}
+
+
+def encode_unsigned_variant_encoding(number: int) -> bytes:
+    """Encodes an unsigned integer into a variable-length format."""
+    if number > 0xFFFFFFFF:
+        return b"\xfd" + number.to_bytes(8, "little")
+    elif number > 0xFFFF:
+        return b"\xfc" + number.to_bytes(4, "little")
+    elif number > 0xFA:
+        return b"\xfb" + number.to_bytes(2, "little")
+    else:
+        return number.to_bytes(1, "little")
+
+
+def encode_tensor_info(dtype: str, shape: Tuple[int, ...], offset: Tuple[int, int]) -> List[bytes]:
+    """Encodes the struct TensorInfo into byte buffer"""
+    if dtype not in _DTYPE:
+        raise ValueError(f"Unsupported dtype: {dtype}")
+
+    # flatten out the tensor info
+    layout = chain([_DTYPE[dtype], len(shape)], shape, offset)
+    return b"".join(list(map(encode_unsigned_variant_encoding, layout)))
+
+
+def encode_hash_map(index_map: Dict[str, int]) -> List[bytes]:
+    """Encodes a dictionary of string keys and integer values."""
+    length = encode_unsigned_variant_encoding(len(index_map))
+
+    hash_map_layout = chain.from_iterable(
+        (
+            encode_unsigned_variant_encoding(len(k)),
+            k.encode("utf-8"),
+            encode_unsigned_variant_encoding(v),
+        )
+        for k, v in index_map.items()
+    )
+
+    return b"".join(chain([length], hash_map_layout))
+
+
+def test_empty_file():
+    "bintensors allows empty dictonary"
+    tensor_dict = {}
+    buffer = save(tensor_dict)
+    # decouple first 8 bytes part of the buffer unsinged long long
+    header_size = struct.unpack("<Q", buffer[0:8])[0]
+    # header size + metadata + empty tensors
+    MAX_FILE_SIZE = 8 + header_size
+    assert header_size == 8, "expected packed buffer shoudl be unsinged interger 8."
+    assert buffer[8:] == b"\x00\x00\x00     ", "expected empty metadata fields."
+    assert MAX_FILE_SIZE == len(buffer), "These should  be equal"
+
+
+def test_man_cmp():
+    size = 2
+    shape = (2, 2)
+    tensor_chunk_length = shape[0] * shape[1] * 4  # Size of a tensor buffer
+
+    length = encode_unsigned_variant_encoding(size)
+
+    # Create tensor info buffer
+    tensor_info_buffer = b"".join(
+        encode_tensor_info(
+            "F32",
+            shape,
+            (i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
+        )
+        for i in range(size)
+    )
+    layout_tensor_info = length + tensor_info_buffer
+
+    expected = []
+    for (start, end, step) in [(0, size, 1), (size - 1, -1, -1)]:
+        # Create hash map layout
+        hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(start, end, step)})
+
+        # Construct full layout
+        layout = b"\0" + layout_tensor_info + hash_map_layout
+        layout += b" " * (((8 - len(layout)) % 8) % 8)
+        n = len(layout)
+        n_header = n.to_bytes(8, "little")
+
+        # layout together
+        buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
+        expected.append(buffer)
+
+    tensor_dict = {"weight_0": torch.zeros(shape), "weight_1": torch.zeros(shape)}
+
+    buffer = save(tensor_dict)
+    # we need to check both since there is no order in the hashmap
+    assert buffer in expected, f"got {buffer}, and expected {expected}"
+
+
+def test_missmatch_length_of_metadata_large():
+    size = 2
+    shape = (2, 2)
+    tensor_chunk_length = shape[0] * shape[1] * 4  # Size of a tensor buffer
+
+    length = encode_unsigned_variant_encoding(size * 1000)
+
+    # Create tensor info buffer
+    tensor_info_buffer = b"".join(
+        encode_tensor_info(
+            "F32",
+            shape,
+            (i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
+        )
+        for i in range(size)
+    )
+    layout_tensor_info = length + tensor_info_buffer
+
+    expected = [0] * 2
+
+    # Create hash map layout
+    hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})
+
+    # Construct full layout
+    layout = b"\0" + layout_tensor_info + hash_map_layout
+    layout += b" " * (((8 - len(layout)) % 8) % 8)
+    n = len(layout)
+    n_header = n.to_bytes(8, "little")
+
+    # layout together
+    buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
+
+    with pytest.raises(BintensorError):
+        # this is not a valid since the metadata
+        # size doe not match as it too big
+        _ = load(buffer)
+
+
+def test_missmatch_length_of_metadata_small():
+    size = 2
+    shape = (2, 2)
+    tensor_chunk_length = shape[0] * shape[1] * 4  # Size of a tensor buffer
+
+    length = encode_unsigned_variant_encoding(size - 1)
+
+    # Create tensor info buffer
+    tensor_info_buffer = b"".join(
+        encode_tensor_info(
+            "F32",
+            shape,
+            (i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
+        )
+        for i in range(size)
+    )
+    layout_tensor_info = length + tensor_info_buffer
+
+    # Create hash map layout
+    hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})
+
+    # Construct full layout
+    layout = b"\0" + layout_tensor_info + hash_map_layout
+    layout += b" " * (((8 - len(layout)) % 8) % 8)
+    n = len(layout)
+    n_header = n.to_bytes(8, "little")
+
+    # layout together
+    buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
+
+    with pytest.raises(BintensorError):
+        # this is not a valid since the metadata
+        # size doe not match as it too big
+        _ = load(buffer)
+
+
+def test_missmatch_length_of_metadata():
+    size = 2
+    shape = (2, 2)
+    tensor_chunk_length = shape[0] * shape[1] * 4  # Size of a tensor buffer
+
+    # convert usize or unsigned long long into variant encoding
+    length = encode_unsigned_variant_encoding(size * 1000)
+
+    # Create tensor info byte buffer
+    tensor_info_buffer = b"".join(
+        encode_tensor_info(
+            "F32",
+            shape,
+            (i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
+        )
+        for i in range(size)
+    )
+    layout_tensor_info = length + tensor_info_buffer
+
+    # Create hash map layout
+    hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})
+
+    # Construct full layout
+    # metadata empty + tensor_info + hash_map_index_map
+    layout = b"\0" + layout_tensor_info + hash_map_layout
+
+    # empty padding
+    layout += b" " * (((8 - len(layout)) % 8) % 8)
+    n = len(layout)
+
+    # size of full header (metadata + tensors info + index map)
+    n_header = n.to_bytes(8, "little")
+
+    # layout together into buffer
+    buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
+
+    with pytest.raises(BintensorError):
+        # this is not a valid since the metadata
+        # size doe not match as it too big
+        _ = load(buffer)
diff --git a/binding/python/tests/test_np.py b/binding/python/tests/test_np.py
diff --git a/binding/python/tests/test_pt.py b/binding/python/tests/test_pt.py

Original file line number	Diff line number	Diff line change
`@@ -41,11 +41,10 @@ quality = [`
`41`	`41`	`]`
`42`	`42`	`testing = [`
`43`	`43`	`"bintensors[numpy]",`
`44`		`- "h5py>=3.7.0",`
	`44`	`+ "bintensors[torch]",`
`45`	`45`	`"setuptools_rust>=1.5.2",`
`46`	`46`	`"pytest>=7.2.0",`
`47`	`47`	`"pytest-benchmark>=4.0.0",`
`48`		`- # "python-afl>=0.7.3",`
`49`	`48`	`"hypothesis>=6.70.2",`
`50`	`49`	`]`
`51`	`50`	`all = [`