Skip to content

Commit aa774ad

Browse files
committed
chore(test): update tests for version bump to 0.1.0
1 parent 0002147 commit aa774ad

File tree

3 files changed

+88
-102
lines changed

3 files changed

+88
-102
lines changed

binding/python/tests/test_buffer.py

Lines changed: 33 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -39,30 +39,23 @@ def encode_unsigned_variant_encoding(number: int) -> bytes:
3939
return number.to_bytes(1, "little")
4040

4141

42-
def encode_tensor_info(dtype: str, shape: Tuple[int, ...], offset: Tuple[int, int]) -> List[bytes]:
43-
"""Encodes the struct TensorInfo into byte buffer"""
42+
def encode_header(id: str, dtype: str, shape: Tuple[int, ...], offset: Tuple[int, int]) -> bytes:
43+
"""Encodes the struct TensorInfo into byte buffer with string ID prefix."""
4444
if dtype not in _DTYPE:
4545
raise ValueError(f"Unsupported dtype: {dtype}")
4646

47-
# flatten out the tensor info
48-
layout = chain([_DTYPE[dtype], len(shape)], shape, offset)
49-
return b"".join(list(map(encode_unsigned_variant_encoding, layout)))
47+
encoded_id = encode_unsigned_variant_encoding(len(id)) + id.encode("utf-8")
5048

51-
52-
def encode_hash_map(index_map: Dict[str, int]) -> List[bytes]:
53-
"""Encodes a dictionary of string keys and integer values."""
54-
length = encode_unsigned_variant_encoding(len(index_map))
55-
56-
hash_map_layout = chain.from_iterable(
57-
(
58-
encode_unsigned_variant_encoding(len(k)),
59-
k.encode("utf-8"),
60-
encode_unsigned_variant_encoding(v),
61-
)
62-
for k, v in index_map.items()
49+
# Compose numeric fields
50+
numeric_layout = chain(
51+
[_DTYPE[dtype], len(shape)],
52+
shape,
53+
offset
6354
)
6455

65-
return b"".join(chain([length], hash_map_layout))
56+
encoded_tensor_info = b"".join(encode_unsigned_variant_encoding(x) for x in numeric_layout)
57+
58+
return encoded_id + encoded_tensor_info
6659

6760

6861
def test_empty_file():
@@ -74,7 +67,7 @@ def test_empty_file():
7467
# header size + metadata + empty tensors
7568
MAX_FILE_SIZE = 8 + header_size
7669
assert header_size == 8, "expected packed buffer shoudl be unsinged interger 8."
77-
assert buffer[8:] == b"\x00\x00\x00 ", "expected empty metadata fields."
70+
assert buffer[8:] == b"\x00\x00 ", "expected empty metadata fields."
7871
assert MAX_FILE_SIZE == len(buffer), "These should be equal"
7972

8073

@@ -87,35 +80,27 @@ def test_man_cmp():
8780

8881
# Create tensor info buffer
8982
tensor_info_buffer = b"".join(
90-
encode_tensor_info(
83+
encode_header(
84+
f"weight_{i}",
9185
"F32",
9286
shape,
9387
(i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
9488
)
9589
for i in range(size)
9690
)
97-
layout_tensor_info = length + tensor_info_buffer
98-
99-
expected = []
100-
for (start, end, step) in [(0, size, 1), (size - 1, -1, -1)]:
101-
# Create hash map layout
102-
hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(start, end, step)})
103-
104-
# Construct full layout
105-
layout = b"\0" + layout_tensor_info + hash_map_layout
106-
layout += b" " * (((8 - len(layout)) % 8) % 8)
107-
n = len(layout)
108-
n_header = n.to_bytes(8, "little")
109-
110-
# layout together
111-
buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
112-
expected.append(buffer)
91+
layout = length + tensor_info_buffer
92+
layout = b"\0" + layout
93+
layout += b" " * (((8 - len(layout)) % 8) % 8)
94+
n = len(layout)
95+
n_header = n.to_bytes(8, "little")
96+
97+
expected = n_header + layout + (b"\0" * tensor_chunk_length * size)
11398

11499
tensor_dict = {"weight_0": torch.zeros(shape), "weight_1": torch.zeros(shape)}
115100

116101
buffer = save(tensor_dict)
117102
# we need to check both since there is no order in the hashmap
118-
assert buffer in expected, f"got {buffer}, and expected {expected}"
103+
assert buffer == expected, f"got {buffer}, and expected {expected}"
119104

120105

121106
def test_missmatch_length_of_metadata_large():
@@ -127,28 +112,22 @@ def test_missmatch_length_of_metadata_large():
127112

128113
# Create tensor info buffer
129114
tensor_info_buffer = b"".join(
130-
encode_tensor_info(
115+
encode_header(
116+
f"weight_{i}",
131117
"F32",
132118
shape,
133119
(i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
134120
)
135121
for i in range(size)
136122
)
137-
layout_tensor_info = length + tensor_info_buffer
138-
139-
expected = [0] * 2
140-
141-
# Create hash map layout
142-
hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})
143-
144-
# Construct full layout
145-
layout = b"\0" + layout_tensor_info + hash_map_layout
123+
layout = length + tensor_info_buffer
124+
layout = b"\0" + layout
146125
layout += b" " * (((8 - len(layout)) % 8) % 8)
147126
n = len(layout)
148127
n_header = n.to_bytes(8, "little")
149-
128+
150129
# layout together
151-
buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
130+
buffer = n_header + layout + b"\0" * (tensor_chunk_length * size)
152131

153132
with pytest.raises(BintensorError):
154133
# this is not a valid since the metadata
@@ -165,70 +144,25 @@ def test_missmatch_length_of_metadata_small():
165144

166145
# Create tensor info buffer
167146
tensor_info_buffer = b"".join(
168-
encode_tensor_info(
147+
encode_header(
148+
f"weight_{i}",
169149
"F32",
170150
shape,
171151
(i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
172152
)
173153
for i in range(size)
174154
)
175-
layout_tensor_info = length + tensor_info_buffer
176-
177-
# Create hash map layout
178-
hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})
179-
180-
# Construct full layout
181-
layout = b"\0" + layout_tensor_info + hash_map_layout
155+
layout = length + tensor_info_buffer
156+
layout = b"\0" + layout
182157
layout += b" " * (((8 - len(layout)) % 8) % 8)
183158
n = len(layout)
184159
n_header = n.to_bytes(8, "little")
185160

186161
# layout together
187-
buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
162+
buffer = n_header + layout + b"\0" * (tensor_chunk_length * size)
188163

189164
with pytest.raises(BintensorError):
190165
# this is not a valid since the metadata
191166
# size doe not match as it too big
192167
_ = load(buffer)
193168

194-
195-
def test_missmatch_length_of_metadata():
196-
size = 2
197-
shape = (2, 2)
198-
tensor_chunk_length = shape[0] * shape[1] * 4 # Size of a tensor buffer
199-
200-
# convert usize or unsigned long long into variant encoding
201-
length = encode_unsigned_variant_encoding(size * 1000)
202-
203-
# Create tensor info byte buffer
204-
tensor_info_buffer = b"".join(
205-
encode_tensor_info(
206-
"F32",
207-
shape,
208-
(i * tensor_chunk_length, i * tensor_chunk_length + tensor_chunk_length),
209-
)
210-
for i in range(size)
211-
)
212-
layout_tensor_info = length + tensor_info_buffer
213-
214-
# Create hash map layout
215-
hash_map_layout = encode_hash_map({f"weight_{i}": i for i in range(0, 2, 1)})
216-
217-
# Construct full layout
218-
# metadata empty + tensor_info + hash_map_index_map
219-
layout = b"\0" + layout_tensor_info + hash_map_layout
220-
221-
# empty padding
222-
layout += b" " * (((8 - len(layout)) % 8) % 8)
223-
n = len(layout)
224-
225-
# size of full header (metadata + tensors info + index map)
226-
n_header = n.to_bytes(8, "little")
227-
228-
# layout together into buffer
229-
buffer = n_header + layout + b"\0" * (tensor_chunk_length * 2)
230-
231-
with pytest.raises(BintensorError):
232-
# this is not a valid since the metadata
233-
# size doe not match as it too big
234-
_ = load(buffer)

binding/python/tests/test_np.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import pytest
22

3-
import os
43
import tempfile
54
import numpy as np
65

76
from typing import Dict, Tuple
8-
from bintensors.numpy import load, load_file, save, save_file, safe_open
7+
from bintensors.numpy import load, load_file, save, save_file, safe_open, save_with_checksum
98

109

1110
def _compare_np_array(lhs: np.ndarray, rhs: np.ndarray) -> bool:
@@ -139,3 +138,36 @@ def test_safe_open_access_with_metadata():
139138
assert model.get_tensor("h.0.ln_1.weight") is not None
140139
assert model.get_tensor("h.0.ln_1.bias") is not None
141140
assert model.metadata()["hello"] == "world"
141+
142+
143+
def test_checksum_two_diffrent_models():
144+
model_1 = { "ln.weight" : np.random.random((10,10)), "ln.bias" : np.random.random((10)) }
145+
model_2 = { "ln.weight" : np.random.random((10,10)), "ln.bias" : np.random.random((10)) }
146+
147+
checksum1, _ = save_with_checksum(model_1)
148+
checksum2, _ = save_with_checksum(model_2)
149+
150+
assert checksum1 != checksum2, "These checksum are not equivilent"
151+
152+
153+
def test_checksum_two_same_models():
154+
model_1 = { "ln.weight" : np.zeros((2,2)), "ln.bias" : np.zeros((10)) }
155+
model_2 = { "ln.weight" : np.zeros((2,2)), "ln.bias" : np.zeros((10)) }
156+
157+
for _ in range(1000):
158+
checksum1, _ = save_with_checksum(model_1)
159+
checksum2, _ = save_with_checksum(model_2)
160+
assert checksum1 == checksum2, "These checksum are equivilent"
161+
162+
163+
def test_checksum_two_same_models_with_diffrent_framework():
164+
import torch
165+
from bintensors.torch import save_with_checksum as save_with_checksum_pt
166+
model_1 = { "ln.weight" : np.zeros((2,2), dtype=np.float32), "ln.bias" : np.zeros((10), dtype=np.float32) }
167+
model_2 = { "ln.weight" : torch.zeros((2,2), dtype=torch.float32), "ln.bias" : torch.zeros((10), dtype=torch.float32) }
168+
169+
for _ in range(1000):
170+
checksum1, _ = save_with_checksum(model_1)
171+
checksum2, _ = save_with_checksum_pt(model_2)
172+
assert checksum1 == checksum2, "These checksum are equivilent"
173+

binding/python/tests/test_pt.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import torch
66

77
from typing import Dict, Tuple
8-
from bintensors.torch import load, save, save_file, load_file, safe_open
8+
from bintensors.torch import load, save, save_file, load_file, safe_open, save_with_checksum
99

1010

1111
def _compare_torch_tensors(lhs: torch.Tensor, rhs: torch.Tensor) -> bool:
@@ -124,3 +124,23 @@ def test_pt_safe_open_access_and_metadata():
124124
assert model.get_tensor("h.0.ln_1.weight") is not None
125125
assert model.get_tensor("h.0.ln_1.bias") is not None
126126
assert model.metadata() is None
127+
128+
129+
def test_checksum_two_diffrent_models():
130+
model_1 = { "ln.weight" : torch.rand((10,10)), "ln.bias" : torch.rand((10)) }
131+
model_2 = { "ln.weight" : torch.rand((10,10)), "ln.bias" : torch.rand((10)) }
132+
133+
checksum1, _ = save_with_checksum(model_1)
134+
checksum2, _ = save_with_checksum(model_2)
135+
136+
assert checksum1 != checksum2, "These checksum are not equivilent"
137+
138+
139+
def test_checksum_two_same_models():
140+
model_1 = { "ln.weight" : torch.zeros((2,2)), "ln.bias" : torch.zeros((10)) }
141+
model_2 = { "ln.weight" : torch.zeros((2,2)), "ln.bias" : torch.zeros((10)) }
142+
143+
for _ in range(1000):
144+
checksum1, _ = save_with_checksum(model_1)
145+
checksum2, _ = save_with_checksum(model_2)
146+
assert checksum1 == checksum2, "These checksum are equivilent"

0 commit comments

Comments
 (0)