Skip to content

Commit 50d9b26

Browse files
committed
Merge branch 'master' of github.com:mongodb/mongo-python-driver
2 parents 56f7268 + fafa00e commit 50d9b26

File tree

4 files changed

+55
-20
lines changed

4 files changed

+55
-20
lines changed

bson/binary.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,10 @@ def from_vector(
462462
raise ValueError(f"{padding=}. It must be in [0,1, ..7].")
463463
if padding and not vector:
464464
raise ValueError("Empty vector with non-zero padding.")
465+
if padding and not (vector[-1] & ((1 << padding) - 1)) == 0: # type: ignore
466+
raise ValueError(
467+
"If padding p is provided, all bits in the final byte lower than p must be 0."
468+
)
465469
elif dtype == BinaryVectorDtype.FLOAT32: # pack floats as float32
466470
format_str = "f"
467471
if padding:
@@ -490,6 +494,11 @@ def as_vector(self) -> BinaryVector:
490494
dtype = BinaryVectorDtype(dtype)
491495
n_values = len(self) - position
492496

497+
if padding and dtype != BinaryVectorDtype.PACKED_BIT:
498+
raise ValueError(
499+
f"Corrupt data. Padding ({padding}) must be 0 for all but PACKED_BIT dtypes. ({dtype=})"
500+
)
501+
493502
if dtype == BinaryVectorDtype.INT8:
494503
dtype_format = "b"
495504
format_string = f"<{n_values}{dtype_format}"
@@ -513,6 +522,12 @@ def as_vector(self) -> BinaryVector:
513522
dtype_format = "B"
514523
format_string = f"<{n_values}{dtype_format}"
515524
unpacked_uint8s = list(struct.unpack_from(format_string, self, position))
525+
if padding and not n_values:
526+
raise ValueError("Corrupt data. Vector has a padding P, but no data.")
527+
if padding and n_values and not (unpacked_uint8s[-1] & ((1 << padding) - 1)) == 0:
528+
raise ValueError(
529+
"Corrupt data. Vector has a padding P, but bits in the final byte lower than P are non-zero."
530+
)
516531
return BinaryVector(unpacked_uint8s, dtype, padding)
517532

518533
else:

test/bson_binary_vector/packed_bit.json

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,32 @@
2121
"canonical_bson": "1600000005766563746F7200040000000910007F0700"
2222
},
2323
{
24-
"description": "Empty Vector PACKED_BIT",
24+
"description": "PACKED_BIT with padding",
2525
"valid": true,
26-
"vector": [],
26+
"vector": [127, 8],
2727
"dtype_hex": "0x10",
2828
"dtype_alias": "PACKED_BIT",
29-
"padding": 0,
30-
"canonical_bson": "1400000005766563746F72000200000009100000"
29+
"padding": 3,
30+
"canonical_bson": "1600000005766563746F7200040000000910037F0800"
3131
},
3232
{
33-
"description": "PACKED_BIT with padding",
34-
"valid": true,
33+
"description": "PACKED_BIT with inconsistent padding",
34+
"valid": false,
3535
"vector": [127, 7],
3636
"dtype_hex": "0x10",
3737
"dtype_alias": "PACKED_BIT",
3838
"padding": 3,
3939
"canonical_bson": "1600000005766563746F7200040000000910037F0700"
4040
},
41+
{
42+
"description": "Empty Vector PACKED_BIT",
43+
"valid": true,
44+
"vector": [],
45+
"dtype_hex": "0x10",
46+
"dtype_alias": "PACKED_BIT",
47+
"padding": 0,
48+
"canonical_bson": "1400000005766563746F72000200000009100000"
49+
},
4150
{
4251
"description": "Overflow Vector PACKED_BIT",
4352
"valid": false,

test/test_bson.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,7 @@ def test_vector(self):
739739
"""Tests of subtype 9"""
740740
# We start with valid cases, across the 3 dtypes implemented.
741741
# Work with a simple vector that can be interpreted as int8, float32, or ubyte
742-
list_vector = [127, 7]
742+
list_vector = [127, 8]
743743
# As INT8, vector has length 2
744744
binary_vector = Binary.from_vector(list_vector, BinaryVectorDtype.INT8)
745745
vector = binary_vector.as_vector()
@@ -764,18 +764,18 @@ def test_vector(self):
764764
uncompressed = ""
765765
for val in list_vector:
766766
uncompressed += format(val, "08b")
767-
assert uncompressed[:-padding] == "0111111100000"
767+
assert uncompressed[:-padding] == "0111111100001"
768768

769769
# It is worthwhile explicitly showing the values encoded to BSON
770770
padded_doc = {"padded_vec": padded_vec}
771771
assert (
772772
encode(padded_doc)
773-
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x07\x00"
773+
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x08\x00"
774774
)
775775
# and dumped to json
776776
assert (
777777
json_util.dumps(padded_doc)
778-
== '{"padded_vec": {"$binary": {"base64": "EAN/Bw==", "subType": "09"}}}'
778+
== '{"padded_vec": {"$binary": {"base64": "EAN/CA==", "subType": "09"}}}'
779779
)
780780

781781
# FLOAT32 is also implemented

test/test_bson_binary_vector.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ def create_test(case_spec):
4848
def run_test(self):
4949
for test_case in case_spec.get("tests", []):
5050
description = test_case["description"]
51-
vector_exp = test_case.get("vector", [])
51+
vector_exp = test_case.get("vector", None)
5252
dtype_hex_exp = test_case["dtype_hex"]
5353
dtype_alias_exp = test_case.get("dtype_alias")
5454
padding_exp = test_case.get("padding", 0)
55-
canonical_bson_exp = test_case.get("canonical_bson")
55+
canonical_bson_exp = test_case.get("canonical_bson", None)
5656
# Convert dtype hex string into bytes
5757
dtype_exp = BinaryVectorDtype(int(dtype_hex_exp, 16).to_bytes(1, byteorder="little"))
5858

@@ -85,14 +85,25 @@ def run_test(self):
8585
self.assertEqual(cB_obs, canonical_bson_exp, description)
8686

8787
else:
88-
with self.assertRaises((struct.error, ValueError), msg=description):
89-
# Tests Binary.from_vector
90-
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
91-
# Tests Binary.as_vector
92-
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
93-
decoded_doc = decode(cB_exp)
94-
binary_obs = decoded_doc[test_key]
95-
binary_obs.as_vector()
88+
"""
89+
#### To prove correct in an invalid case (`valid:false`), one MUST
90+
- if the vector field is present, raise an exception when attempting to encode a document from the numeric values,
91+
dtype, and padding.
92+
- if the canonical_bson field is present, raise an exception when attempting to deserialize it into the corresponding
93+
numeric values, as the field contains corrupted data.
94+
"""
95+
# Tests Binary.from_vector()
96+
if vector_exp is not None:
97+
with self.assertRaises((struct.error, ValueError), msg=description):
98+
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
99+
100+
# Tests Binary.as_vector()
101+
if canonical_bson_exp is not None:
102+
with self.assertRaises((struct.error, ValueError), msg=description):
103+
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
104+
decoded_doc = decode(cB_exp)
105+
binary_obs = decoded_doc[test_key]
106+
binary_obs.as_vector()
96107

97108
return run_test
98109

0 commit comments

Comments
 (0)