Skip to content

feat(test): add Python test for binding to improve CI #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ jobs:
black --check --line-length 119 --target-version py35 py/bintensors

# TODO: uncomment this after adding formal pytest
# - name: Run tests
# run: |
# cargo test
# pip install .[testing]
# pytest -sv tests/
- name: Run tests
run: |
cargo test
pip install .[testing]
pytest -sv tests/
56 changes: 56 additions & 0 deletions binding/python/makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
.PHONY: help clean test lint lint-check format format-dir

# Define default Python and pip executables
PYTHON ?= python
PIP ?= pip
PYTEST ?= pytest
BLACK ?= black
BLACK_OPTS ?= --line-length 119 --target-version py35

# Source directories
SRC_DIRS ?= py/bintensors tests

help:
@echo "Available make targets:"
@echo " help - Show this help message"
@echo " clean - Remove build artifacts and cache files"
@echo " test - Run all tests"
@echo " lint - Run Black lint check on all source files"
@echo " check - Run Black lint check without modifying files"
@echo " format - Format all source files with Black"
@echo " format-dir DIR=path/to/dir - Format files in specific directory"

clean:
rm -rf build/ dist/ *.egg-info/ .pytest_cache/ .coverage htmlcov/ .eggs/
find . -type d -name __pycache__ -exec rm -rf {} +
find . -type f -name "*.pyc" -delete

test:
$(PYTEST) -sv tests/

lint:
$(BLACK) $(BLACK_OPTS) $(SRC_DIRS)

check:
$(BLACK) $(BLACK_OPTS) --check $(SRC_DIRS)

format: lint

format-dir:
@if [ -z "$(DIR)" ]; then \
echo "Error: DIR parameter is required. Usage: make format-dir DIR=path/to/dir"; \
exit 1; \
fi
@if [ ! -d "$(DIR)" ]; then \
echo "Error: Directory '$(DIR)' does not exist"; \
exit 1; \
fi
$(BLACK) $(BLACK_OPTS) "$(DIR)"

# Install development dependencies
install-dev:
$(PIP) install -e ".[dev]"

# Build the package
build:
$(PIP) install .
7 changes: 1 addition & 6 deletions binding/python/py/bintensors/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,6 @@ def load_file(filename: Union[str, os.PathLike]) -> Dict[str, np.ndarray]:


# np.float8 formats require 2.1; we do not support these dtypes on earlier versions
_float8_e4m3fn = getattr(np, "float8_e4m3fn", None)
_float8_e5m2 = getattr(np, "float8_e5m2", None)

_TYPES = {
"F64": np.float64,
"F32": np.float32,
Expand All @@ -193,9 +190,7 @@ def load_file(filename: Union[str, os.PathLike]) -> Dict[str, np.ndarray]:
"U16": np.uint16,
"I8": np.int8,
"U8": np.uint8,
"BOOL": np.bool,
"F8_E4M3": _float8_e4m3fn,
"F8_E5M2": _float8_e5m2,
"BOOL": bool,
}


Expand Down
3 changes: 1 addition & 2 deletions binding/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,10 @@ quality = [
]
testing = [
"bintensors[numpy]",
"h5py>=3.7.0",
"bintensors[torch]",
"setuptools_rust>=1.5.2",
"pytest>=7.2.0",
"pytest-benchmark>=4.0.0",
# "python-afl>=0.7.3",
"hypothesis>=6.70.2",
]
all = [
Expand Down
234 changes: 234 additions & 0 deletions binding/python/tests/test_buffer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
import pytest
import struct

import torch
from typing import List, Dict, Tuple
from itertools import chain

from bintensors import BintensorError
from bintensors.torch import save, load

_DTYPE = {
"BOL": 0,
"U8": 1,
"I8": 2,
"F8_E5M2": 3,
"F8_E4M3": 4,
"I16": 5,
"U16": 6,
"F16": 7,
"BF16": 8,
"I32": 9,
"U32": 10,
"F32": 11,
"F64": 12,
"I64": 13,
"F64": 14,
}


def encode_unsigned_variant_encoding(number: int) -> bytes:
"""Encodes an unsigned integer into a variable-length format."""
if number > 0xFFFFFFFF:
return b"\xfd" + number.to_bytes(8, "little")
elif number > 0xFFFF:
return b"\xfc" + number.to_bytes(4, "little")
elif number > 0xFA:
return b"\xfb" + number.to_bytes(2, "little")
else:
return number.to_bytes(1, "little")


def encode_tensor_info(dtype: str, shape: Tuple[int, ...], offset: Tuple[int, int]) -> List[bytes]:
"""Encodes the struct TensorInfo into byte buffer"""
if dtype not in _DTYPE:
raise ValueError(f"Unsupported dtype: {dtype}")

# flatten out the tensor info
layout = chain([_DTYPE[dtype], len(shape)], shape, offset)
return b"".join(list(map(encode_unsigned_variant_encoding, layout)))


def encode_hash_map(index_map: Dict[str, int]) -> List[bytes]:
"""Encodes a dictionary of string keys and integer values."""
length = encode_unsigned_variant_encoding(len(index_map))

hash_map_layout = chain.from_iterable(
(
encode_unsigned_variant_encoding(len(k)),
k.encode("utf-8"),
encode_unsigned_variant_encoding(v),
)
for k, v in index_map.items()
)

return b"".join(chain([length], hash_map_layout))


def test_empty_file():
"bintensors allows empty dictonary"
tensor_dict = {}
buffer = save(tensor_dict)
# decouple first 8 bytes part of the buffer unsinged long long
header_size = struct.unpack("<Q", buffer[0:8])[0]
# header size + metadata + empty tensors
MAX_FILE_SIZE = 8 + header_size
assert header_size == 8, "expected packed buffer shoudl be unsinged interger 8."
assert buffer[8:] == b"\x00\x00\x00 ", "expected empty metadata fields."
assert MAX_FILE_SIZE == len(buffer), "These should be equal"


def test_man_cmp():
size = 2
shape = (2, 2)
tensor_chunk_length = shape[0] * shape[1] * 4 # Size of a tensor buffer

length = encode_unsigned_variant_encoding(size)

# Create tensor info buffer
tensor_info_buffer = b"".join(
encode_tensor_info(
"F32",
shape,
(i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
)
for i in range(size)
)
layout_tensor_info = length + tensor_info_buffer

expected = []
for (start, end, step) in [(0, size, 1), (size - 1, -1, -1)]:
# Create hash map layout
hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(start, end, step)})

# Construct full layout
layout = b"\0" + layout_tensor_info + hash_map_layout
layout += b" " * (((8 - len(layout)) % 8) % 8)
n = len(layout)
n_header = n.to_bytes(8, "little")

# layout together
buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
expected.append(buffer)

tensor_dict = {"weight_0": torch.zeros(shape), "weight_1": torch.zeros(shape)}

buffer = save(tensor_dict)
# we need to check both since there is no order in the hashmap
assert buffer in expected, f"got {buffer}, and expected {expected}"


def test_missmatch_length_of_metadata_large():
size = 2
shape = (2, 2)
tensor_chunk_length = shape[0] * shape[1] * 4 # Size of a tensor buffer

length = encode_unsigned_variant_encoding(size * 1000)

# Create tensor info buffer
tensor_info_buffer = b"".join(
encode_tensor_info(
"F32",
shape,
(i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
)
for i in range(size)
)
layout_tensor_info = length + tensor_info_buffer

expected = [0] * 2

# Create hash map layout
hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})

# Construct full layout
layout = b"\0" + layout_tensor_info + hash_map_layout
layout += b" " * (((8 - len(layout)) % 8) % 8)
n = len(layout)
n_header = n.to_bytes(8, "little")

# layout together
buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)

with pytest.raises(BintensorError):
# this is not a valid since the metadata
# size doe not match as it too big
_ = load(buffer)


def test_missmatch_length_of_metadata_small():
size = 2
shape = (2, 2)
tensor_chunk_length = shape[0] * shape[1] * 4 # Size of a tensor buffer

length = encode_unsigned_variant_encoding(size - 1)

# Create tensor info buffer
tensor_info_buffer = b"".join(
encode_tensor_info(
"F32",
shape,
(i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
)
for i in range(size)
)
layout_tensor_info = length + tensor_info_buffer

# Create hash map layout
hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})

# Construct full layout
layout = b"\0" + layout_tensor_info + hash_map_layout
layout += b" " * (((8 - len(layout)) % 8) % 8)
n = len(layout)
n_header = n.to_bytes(8, "little")

# layout together
buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)

with pytest.raises(BintensorError):
# this is not a valid since the metadata
# size doe not match as it too big
_ = load(buffer)


def test_missmatch_length_of_metadata():
size = 2
shape = (2, 2)
tensor_chunk_length = shape[0] * shape[1] * 4 # Size of a tensor buffer

# convert usize or unsigned long long into variant encoding
length = encode_unsigned_variant_encoding(size * 1000)

# Create tensor info byte buffer
tensor_info_buffer = b"".join(
encode_tensor_info(
"F32",
shape,
(i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
)
for i in range(size)
)
layout_tensor_info = length + tensor_info_buffer

# Create hash map layout
hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})

# Construct full layout
# metadata empty + tensor_info + hash_map_index_map
layout = b"\0" + layout_tensor_info + hash_map_layout

# empty padding
layout += b" " * (((8 - len(layout)) % 8) % 8)
n = len(layout)

# size of full header (metadata + tensors info + index map)
n_header = n.to_bytes(8, "little")

# layout together into buffer
buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)

with pytest.raises(BintensorError):
# this is not a valid since the metadata
# size doe not match as it too big
_ = load(buffer)
Loading
Loading