Skip to content

Commit 48055f4

Browse files
authored
feat(test): add Python test for binding to improve CI (#7)
2 parents 26e39dd + 3f979b2 commit 48055f4

File tree

7 files changed

+564
-13
lines changed

7 files changed

+564
-13
lines changed

.github/workflows/python.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ jobs:
8787
black --check --line-length 119 --target-version py35 py/bintensors
8888
8989
# TODO: uncomment this after adding formal pytest
90-
# - name: Run tests
91-
# run: |
92-
# cargo test
93-
# pip install .[testing]
94-
# pytest -sv tests/
90+
- name: Run tests
91+
run: |
92+
cargo test
93+
pip install .[testing]
94+
pytest -sv tests/

binding/python/makefile

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
.PHONY: help clean test lint lint-check format format-dir
2+
3+
# Define default Python and pip executables
4+
PYTHON ?= python
5+
PIP ?= pip
6+
PYTEST ?= pytest
7+
BLACK ?= black
8+
BLACK_OPTS ?= --line-length 119 --target-version py35
9+
10+
# Source directories
11+
SRC_DIRS ?= py/bintensors tests
12+
13+
help:
14+
@echo "Available make targets:"
15+
@echo " help - Show this help message"
16+
@echo " clean - Remove build artifacts and cache files"
17+
@echo " test - Run all tests"
18+
@echo " lint - Run Black lint check on all source files"
19+
@echo " check - Run Black lint check without modifying files"
20+
@echo " format - Format all source files with Black"
21+
@echo " format-dir DIR=path/to/dir - Format files in specific directory"
22+
23+
clean:
24+
rm -rf build/ dist/ *.egg-info/ .pytest_cache/ .coverage htmlcov/ .eggs/
25+
find . -type d -name __pycache__ -exec rm -rf {} +
26+
find . -type f -name "*.pyc" -delete
27+
28+
test:
29+
$(PYTEST) -sv tests/
30+
31+
lint:
32+
$(BLACK) $(BLACK_OPTS) $(SRC_DIRS)
33+
34+
check:
35+
$(BLACK) $(BLACK_OPTS) --check $(SRC_DIRS)
36+
37+
format: lint
38+
39+
format-dir:
40+
@if [ -z "$(DIR)" ]; then \
41+
echo "Error: DIR parameter is required. Usage: make format-dir DIR=path/to/dir"; \
42+
exit 1; \
43+
fi
44+
@if [ ! -d "$(DIR)" ]; then \
45+
echo "Error: Directory '$(DIR)' does not exist"; \
46+
exit 1; \
47+
fi
48+
$(BLACK) $(BLACK_OPTS) "$(DIR)"
49+
50+
# Install development dependencies
51+
install-dev:
52+
$(PIP) install -e ".[dev]"
53+
54+
# Build the package
55+
build:
56+
$(PIP) install .

binding/python/py/bintensors/numpy.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,6 @@ def load_file(filename: Union[str, os.PathLike]) -> Dict[str, np.ndarray]:
178178

179179

180180
# np.float8 formats require 2.1; we do not support these dtypes on earlier versions
181-
_float8_e4m3fn = getattr(np, "float8_e4m3fn", None)
182-
_float8_e5m2 = getattr(np, "float8_e5m2", None)
183-
184181
_TYPES = {
185182
"F64": np.float64,
186183
"F32": np.float32,
@@ -193,9 +190,7 @@ def load_file(filename: Union[str, os.PathLike]) -> Dict[str, np.ndarray]:
193190
"U16": np.uint16,
194191
"I8": np.int8,
195192
"U8": np.uint8,
196-
"BOOL": np.bool,
197-
"F8_E4M3": _float8_e4m3fn,
198-
"F8_E5M2": _float8_e5m2,
193+
"BOOL": bool,
199194
}
200195

201196

binding/python/pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,10 @@ quality = [
4141
]
4242
testing = [
4343
"bintensors[numpy]",
44-
"h5py>=3.7.0",
44+
"bintensors[torch]",
4545
"setuptools_rust>=1.5.2",
4646
"pytest>=7.2.0",
4747
"pytest-benchmark>=4.0.0",
48-
# "python-afl>=0.7.3",
4948
"hypothesis>=6.70.2",
5049
]
5150
all = [

binding/python/tests/test_buffer.py

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
import pytest
2+
import struct
3+
4+
import torch
5+
from typing import List, Dict, Tuple
6+
from itertools import chain
7+
8+
from bintensors import BintensorError
9+
from bintensors.torch import save, load
10+
11+
_DTYPE = {
12+
"BOL": 0,
13+
"U8": 1,
14+
"I8": 2,
15+
"F8_E5M2": 3,
16+
"F8_E4M3": 4,
17+
"I16": 5,
18+
"U16": 6,
19+
"F16": 7,
20+
"BF16": 8,
21+
"I32": 9,
22+
"U32": 10,
23+
"F32": 11,
24+
"F64": 12,
25+
"I64": 13,
26+
"F64": 14,
27+
}
28+
29+
30+
def encode_unsigned_variant_encoding(number: int) -> bytes:
31+
"""Encodes an unsigned integer into a variable-length format."""
32+
if number > 0xFFFFFFFF:
33+
return b"\xfd" + number.to_bytes(8, "little")
34+
elif number > 0xFFFF:
35+
return b"\xfc" + number.to_bytes(4, "little")
36+
elif number > 0xFA:
37+
return b"\xfb" + number.to_bytes(2, "little")
38+
else:
39+
return number.to_bytes(1, "little")
40+
41+
42+
def encode_tensor_info(dtype: str, shape: Tuple[int, ...], offset: Tuple[int, int]) -> List[bytes]:
43+
"""Encodes the struct TensorInfo into byte buffer"""
44+
if dtype not in _DTYPE:
45+
raise ValueError(f"Unsupported dtype: {dtype}")
46+
47+
# flatten out the tensor info
48+
layout = chain([_DTYPE[dtype], len(shape)], shape, offset)
49+
return b"".join(list(map(encode_unsigned_variant_encoding, layout)))
50+
51+
52+
def encode_hash_map(index_map: Dict[str, int]) -> List[bytes]:
53+
"""Encodes a dictionary of string keys and integer values."""
54+
length = encode_unsigned_variant_encoding(len(index_map))
55+
56+
hash_map_layout = chain.from_iterable(
57+
(
58+
encode_unsigned_variant_encoding(len(k)),
59+
k.encode("utf-8"),
60+
encode_unsigned_variant_encoding(v),
61+
)
62+
for k, v in index_map.items()
63+
)
64+
65+
return b"".join(chain([length], hash_map_layout))
66+
67+
68+
def test_empty_file():
69+
"bintensors allows empty dictonary"
70+
tensor_dict = {}
71+
buffer = save(tensor_dict)
72+
# decouple first 8 bytes part of the buffer unsinged long long
73+
header_size = struct.unpack("<Q", buffer[0:8])[0]
74+
# header size + metadata + empty tensors
75+
MAX_FILE_SIZE = 8 + header_size
76+
assert header_size == 8, "expected packed buffer shoudl be unsinged interger 8."
77+
assert buffer[8:] == b"\x00\x00\x00 ", "expected empty metadata fields."
78+
assert MAX_FILE_SIZE == len(buffer), "These should be equal"
79+
80+
81+
def test_man_cmp():
82+
size = 2
83+
shape = (2, 2)
84+
tensor_chunk_length = shape[0] * shape[1] * 4 # Size of a tensor buffer
85+
86+
length = encode_unsigned_variant_encoding(size)
87+
88+
# Create tensor info buffer
89+
tensor_info_buffer = b"".join(
90+
encode_tensor_info(
91+
"F32",
92+
shape,
93+
(i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
94+
)
95+
for i in range(size)
96+
)
97+
layout_tensor_info = length + tensor_info_buffer
98+
99+
expected = []
100+
for (start, end, step) in [(0, size, 1), (size - 1, -1, -1)]:
101+
# Create hash map layout
102+
hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(start, end, step)})
103+
104+
# Construct full layout
105+
layout = b"\0" + layout_tensor_info + hash_map_layout
106+
layout += b" " * (((8 - len(layout)) % 8) % 8)
107+
n = len(layout)
108+
n_header = n.to_bytes(8, "little")
109+
110+
# layout together
111+
buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
112+
expected.append(buffer)
113+
114+
tensor_dict = {"weight_0": torch.zeros(shape), "weight_1": torch.zeros(shape)}
115+
116+
buffer = save(tensor_dict)
117+
# we need to check both since there is no order in the hashmap
118+
assert buffer in expected, f"got {buffer}, and expected {expected}"
119+
120+
121+
def test_missmatch_length_of_metadata_large():
122+
size = 2
123+
shape = (2, 2)
124+
tensor_chunk_length = shape[0] * shape[1] * 4 # Size of a tensor buffer
125+
126+
length = encode_unsigned_variant_encoding(size * 1000)
127+
128+
# Create tensor info buffer
129+
tensor_info_buffer = b"".join(
130+
encode_tensor_info(
131+
"F32",
132+
shape,
133+
(i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
134+
)
135+
for i in range(size)
136+
)
137+
layout_tensor_info = length + tensor_info_buffer
138+
139+
expected = [0] * 2
140+
141+
# Create hash map layout
142+
hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})
143+
144+
# Construct full layout
145+
layout = b"\0" + layout_tensor_info + hash_map_layout
146+
layout += b" " * (((8 - len(layout)) % 8) % 8)
147+
n = len(layout)
148+
n_header = n.to_bytes(8, "little")
149+
150+
# layout together
151+
buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
152+
153+
with pytest.raises(BintensorError):
154+
# this is not a valid since the metadata
155+
# size doe not match as it too big
156+
_ = load(buffer)
157+
158+
159+
def test_missmatch_length_of_metadata_small():
160+
size = 2
161+
shape = (2, 2)
162+
tensor_chunk_length = shape[0] * shape[1] * 4 # Size of a tensor buffer
163+
164+
length = encode_unsigned_variant_encoding(size - 1)
165+
166+
# Create tensor info buffer
167+
tensor_info_buffer = b"".join(
168+
encode_tensor_info(
169+
"F32",
170+
shape,
171+
(i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
172+
)
173+
for i in range(size)
174+
)
175+
layout_tensor_info = length + tensor_info_buffer
176+
177+
# Create hash map layout
178+
hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})
179+
180+
# Construct full layout
181+
layout = b"\0" + layout_tensor_info + hash_map_layout
182+
layout += b" " * (((8 - len(layout)) % 8) % 8)
183+
n = len(layout)
184+
n_header = n.to_bytes(8, "little")
185+
186+
# layout together
187+
buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
188+
189+
with pytest.raises(BintensorError):
190+
# this is not a valid since the metadata
191+
# size doe not match as it too big
192+
_ = load(buffer)
193+
194+
195+
def test_missmatch_length_of_metadata():
196+
size = 2
197+
shape = (2, 2)
198+
tensor_chunk_length = shape[0] * shape[1] * 4 # Size of a tensor buffer
199+
200+
# convert usize or unsigned long long into variant encoding
201+
length = encode_unsigned_variant_encoding(size * 1000)
202+
203+
# Create tensor info byte buffer
204+
tensor_info_buffer = b"".join(
205+
encode_tensor_info(
206+
"F32",
207+
shape,
208+
(i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
209+
)
210+
for i in range(size)
211+
)
212+
layout_tensor_info = length + tensor_info_buffer
213+
214+
# Create hash map layout
215+
hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})
216+
217+
# Construct full layout
218+
# metadata empty + tensor_info + hash_map_index_map
219+
layout = b"\0" + layout_tensor_info + hash_map_layout
220+
221+
# empty padding
222+
layout += b" " * (((8 - len(layout)) % 8) % 8)
223+
n = len(layout)
224+
225+
# size of full header (metadata + tensors info + index map)
226+
n_header = n.to_bytes(8, "little")
227+
228+
# layout together into buffer
229+
buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
230+
231+
with pytest.raises(BintensorError):
232+
# this is not a valid since the metadata
233+
# size doe not match as it too big
234+
_ = load(buffer)

0 commit comments

Comments
 (0)