From b4f5a2ac8e9a2aad74ff59f522078a351d9a822e Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Fri, 31 Jan 2025 20:51:20 +0800 Subject: [PATCH] feat(python): unify type system between python and xlang serialization in pyfury (#2034) ## What does this PR do? This pr unifies type system between python and xlang serialization in pyfury, so that we can remove duplicate code in pyfury, and lay the foundation for following features: - [ ] implement protocol such as chunk based map serialization for pythobn and xlang serialization - [ ] use python exsiting fastpath optimization in xlang serialization - [ ] extend `DataClassSerializer` to support codegen based serialization for xlang ## Related issues #1690 ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- python/pyfury/_fury.py | 45 ++--- python/pyfury/_registry.py | 116 ++++-------- python/pyfury/_serialization.pyx | 234 +++++++++---------------- python/pyfury/_serializer.py | 55 ++---- python/pyfury/resolver.py | 4 +- python/pyfury/serializer.py | 25 +-- python/pyfury/tests/test_serializer.py | 2 +- 7 files changed, 168 insertions(+), 313 deletions(-) diff --git a/python/pyfury/_fury.py b/python/pyfury/_fury.py index d97c6230c2..266f2324c2 100644 --- a/python/pyfury/_fury.py +++ b/python/pyfury/_fury.py @@ -30,6 +30,7 @@ NOT_NULL_VALUE_FLAG, ) from pyfury.util import is_little_endian, set_bit, get_bit, clear_bit +from pyfury.type import TypeId try: import numpy as np @@ -44,24 +45,21 @@ MAGIC_NUMBER = 0x62D4 -DEFAULT_DYNAMIC_WRITE_STRING_ID = -1 +DEFAULT_DYNAMIC_WRITE_META_STR_ID = -1 DYNAMIC_TYPE_ID = -1 USE_CLASSNAME = 0 USE_CLASS_ID = 1 # preserve 0 as flag for class id not set in ClassInfo` NO_CLASS_ID = 0 -PYINT_CLASS_ID = 1 -PYFLOAT_CLASS_ID = 2 -PYBOOL_CLASS_ID = 3 -STRING_CLASS_ID = 4 -PICKLE_CLASS_ID = 5 -PICKLE_STRONG_CACHE_CLASS_ID = 6 -PICKLE_CACHE_CLASS_ID = 7 +INT64_CLASS_ID = TypeId.INT64 +FLOAT64_CLASS_ID = TypeId.FLOAT64 +BOOL_CLASS_ID = TypeId.BOOL +STRING_CLASS_ID = TypeId.STRING # `NOT_NULL_VALUE_FLAG` + `CLASS_ID << 1` in little-endian order -NOT_NULL_PYINT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (PYINT_CLASS_ID << 9) -NOT_NULL_PYFLOAT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (PYFLOAT_CLASS_ID << 9) -NOT_NULL_PYBOOL_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (PYBOOL_CLASS_ID << 9) -NOT_NULL_STRING_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (STRING_CLASS_ID << 9) +NOT_NULL_INT64_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (INT64_CLASS_ID << 8) +NOT_NULL_FLOAT64_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (FLOAT64_CLASS_ID << 8) +NOT_NULL_BOOL_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (BOOL_CLASS_ID << 8) +NOT_NULL_STRING_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (STRING_CLASS_ID << 8) SMALL_STRING_THRESHOLD = 16 @@ -156,7 +154,7 @@ def __init__( stacklevel=2, ) self.pickler = Pickler(self.buffer) - self.unpickler = Unpickler(self.buffer) + self.unpickler = None else: self.pickler = _PicklerStub() self.unpickler = _UnpicklerStub() @@ -263,32 +261,32 @@ def serialize_ref(self, buffer, obj, classinfo=None): buffer.write_string(obj) return elif cls is int: - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(obj) return elif cls is bool: - buffer.write_int16(NOT_NULL_PYBOOL_FLAG) + buffer.write_int16(NOT_NULL_BOOL_FLAG) buffer.write_bool(obj) return if self.ref_resolver.write_ref_or_null(buffer, obj): return if classinfo is None: classinfo = self.class_resolver.get_classinfo(cls) - self.class_resolver.write_classinfo(buffer, classinfo) + self.class_resolver.write_typeinfo(buffer, classinfo) classinfo.serializer.write(buffer, obj) def serialize_nonref(self, buffer, obj): cls = type(obj) if cls is str: - buffer.write_varuint32(STRING_CLASS_ID << 1) + buffer.write_varuint32(STRING_CLASS_ID) buffer.write_string(obj) return elif cls is int: - buffer.write_varuint32(PYINT_CLASS_ID << 1) + buffer.write_varuint32(INT64_CLASS_ID) buffer.write_varint64(obj) return elif cls is bool: - buffer.write_varuint32(PYBOOL_CLASS_ID << 1) + buffer.write_varuint32(BOOL_CLASS_ID) buffer.write_bool(obj) return else: @@ -380,7 +378,7 @@ def deserialize_ref(self, buffer): ref_id = ref_resolver.try_preserve_ref_id(buffer) # indicates that the object is first read. if ref_id >= NOT_NULL_VALUE_FLAG: - classinfo = self.class_resolver.read_classinfo(buffer) + classinfo = self.class_resolver.read_typeinfo(buffer) o = classinfo.serializer.read(buffer) ref_resolver.set_read_object(ref_id, o) return o @@ -389,7 +387,7 @@ def deserialize_ref(self, buffer): def deserialize_nonref(self, buffer): """Deserialize not-null and non-reference object from buffer.""" - classinfo = self.class_resolver.read_classinfo(buffer) + classinfo = self.class_resolver.read_typeinfo(buffer) return classinfo.serializer.read(buffer) def xdeserialize_ref(self, buffer, serializer=None): @@ -448,7 +446,10 @@ def handle_unsupported_write(self, buffer, obj): def handle_unsupported_read(self, buffer): in_band = buffer.read_bool() if in_band: - return self.unpickler.load() + unpickler = self.unpickler + if unpickler is None: + self.unpickler = unpickler = Unpickler(buffer) + return unpickler.load() else: assert self._unsupported_objects is not None return next(self._unsupported_objects) diff --git a/python/pyfury/_registry.py b/python/pyfury/_registry.py index aa313b09d4..40a2572fa4 100644 --- a/python/pyfury/_registry.py +++ b/python/pyfury/_registry.py @@ -43,10 +43,8 @@ Int16Serializer, Int32Serializer, Int64Serializer, - DynamicIntSerializer, - FloatSerializer, - DoubleSerializer, - DynamicFloatSerializer, + Float32Serializer, + Float64Serializer, StringSerializer, DateSerializer, TimestampSerializer, @@ -60,9 +58,9 @@ PickleCacheSerializer, PickleStrongCacheSerializer, PickleSerializer, + DataClassSerializer, ) from pyfury._struct import ComplexObjectSerializer -from pyfury.buffer import Buffer from pyfury.meta.metastring import MetaStringEncoder, MetaStringDecoder from pyfury.type import ( TypeId, @@ -78,13 +76,6 @@ DYNAMIC_TYPE_ID, # preserve 0 as flag for class id not set in ClassInfo` NO_CLASS_ID, - PYINT_CLASS_ID, - PYFLOAT_CLASS_ID, - PYBOOL_CLASS_ID, - STRING_CLASS_ID, - PICKLE_CLASS_ID, - PICKLE_STRONG_CACHE_CLASS_ID, - PICKLE_CACHE_CLASS_ID, ) try: @@ -168,7 +159,7 @@ def __init__(self, fury): self._hash_to_classinfo = dict() self._dynamic_written_metastr = [] self._type_id_to_classinfo = dict() - self._type_id_counter = PICKLE_CACHE_CLASS_ID + 1 + self._type_id_counter = 64 self._dynamic_write_string_id = 0 # hold objects to avoid gc, since `flat_hash_map/vector` doesn't # hold python reference. @@ -181,60 +172,30 @@ def __init__(self, fury): self.typename_decoder = MetaStringDecoder("$", "_") def initialize(self): + self._initialize_xlang() if self.fury.language == Language.PYTHON: self._initialize_py() - else: - self._initialize_xlang() def _initialize_py(self): register = functools.partial(self._register_type, internal=True) - register(int, type_id=PYINT_CLASS_ID, serializer=Int64Serializer) - register(float, type_id=PYFLOAT_CLASS_ID, serializer=DoubleSerializer) - register(bool, type_id=PYBOOL_CLASS_ID, serializer=BooleanSerializer) - register(str, type_id=STRING_CLASS_ID, serializer=StringSerializer) - register(_PickleStub, type_id=PICKLE_CLASS_ID, serializer=PickleSerializer) + register( + _PickleStub, + type_id=PickleSerializer.PICKLE_CLASS_ID, + serializer=PickleSerializer, + ) register( PickleStrongCacheStub, - type_id=PICKLE_STRONG_CACHE_CLASS_ID, + type_id=97, serializer=PickleStrongCacheSerializer(self.fury), ) register( PickleCacheStub, - type_id=PICKLE_CACHE_CLASS_ID, + type_id=98, serializer=PickleCacheSerializer(self.fury), ) register(type(None), serializer=NoneSerializer) - register(Int8Type, serializer=ByteSerializer) - register(Int16Type, serializer=Int16Serializer) - register(Int32Type, serializer=Int32Serializer) - register(Int64Type, serializer=Int64Serializer) - register(Float32Type, serializer=FloatSerializer) - register(Float64Type, serializer=DoubleSerializer) - register(datetime.date, serializer=DateSerializer) - register(datetime.datetime, serializer=TimestampSerializer) - register(bytes, serializer=BytesSerializer) - register(list, serializer=ListSerializer) register(tuple, serializer=TupleSerializer) - register(dict, serializer=MapSerializer) - register(set, serializer=SetSerializer) - register(enum.Enum, serializer=EnumSerializer) register(slice, serializer=SliceSerializer) - try: - import pyarrow as pa - from pyfury.format.serializer import ( - ArrowRecordBatchSerializer, - ArrowTableSerializer, - ) - - register(pa.RecordBatch, serializer=ArrowRecordBatchSerializer) - register(pa.Table, serializer=ArrowTableSerializer) - except Exception: - pass - for size, ftype, type_id in PyArraySerializer.typecode_dict.values(): - register(ftype, serializer=PyArraySerializer(self.fury, ftype, type_id)) - register(array.array, serializer=DynamicPyArraySerializer) - if np: - register(np.ndarray, serializer=NDArraySerializer) def _initialize_xlang(self): register = functools.partial(self._register_type, internal=True) @@ -243,18 +204,18 @@ def _initialize_xlang(self): register(Int16Type, type_id=TypeId.INT16, serializer=Int16Serializer) register(Int32Type, type_id=TypeId.INT32, serializer=Int32Serializer) register(Int64Type, type_id=TypeId.INT64, serializer=Int64Serializer) - register(int, type_id=DYNAMIC_TYPE_ID, serializer=DynamicIntSerializer) + register(int, type_id=TypeId.INT64, serializer=Int64Serializer) register( Float32Type, type_id=TypeId.FLOAT32, - serializer=FloatSerializer, + serializer=Float32Serializer, ) register( Float64Type, type_id=TypeId.FLOAT64, - serializer=DoubleSerializer, + serializer=Float64Serializer, ) - register(float, type_id=DYNAMIC_TYPE_ID, serializer=DynamicFloatSerializer) + register(float, type_id=TypeId.FLOAT64, serializer=Float64Serializer) register(str, type_id=TypeId.STRING, serializer=StringSerializer) # TODO(chaokunyang) DURATION DECIMAL register( @@ -512,9 +473,19 @@ def get_classinfo(self, cls, create=True): raise TypeUnregisteredError(f"{cls} not registered") logger.info("Class %s not registered", cls) serializer = self._create_serializer(cls) - type_id = ( - NO_CLASS_ID if type(serializer) is not PickleSerializer else PICKLE_CLASS_ID - ) + type_id = None + if self.language == Language.PYTHON: + if isinstance(serializer, EnumSerializer): + type_id = TypeId.NAMED_ENUM + elif type(serializer) is PickleSerializer: + type_id = PickleSerializer.PICKLE_CLASS_ID + if not self.require_registration: + if isinstance(serializer, DataClassSerializer): + type_id = TypeId.NAMED_STRUCT + if type_id is None: + raise TypeUnregisteredError( + f"{cls} must be registered using `fury.register_type` API" + ) return self.__register_type( cls, type_id=type_id, @@ -544,33 +515,6 @@ def _create_serializer(self, cls): serializer = PickleSerializer(self.fury, cls) return serializer - def write_classinfo(self, buffer: Buffer, classinfo): - if classinfo.dynamic_type: - return - type_id = classinfo.type_id - if type_id != NO_CLASS_ID: - buffer.write_varuint32(type_id << 1) - return - buffer.write_varuint32(1) - self.metastring_resolver.write_meta_string_bytes( - buffer, classinfo.namespace_bytes - ) - self.metastring_resolver.write_meta_string_bytes( - buffer, classinfo.typename_bytes - ) - - def read_classinfo(self, buffer): - header = buffer.read_varuint32() - if header & 0b1 == 0: - type_id = header >> 1 - classinfo = self._type_id_to_classinfo[type_id] - if classinfo.serializer is None: - classinfo.serializer = self._create_serializer(classinfo.cls) - return classinfo - ns_metabytes = self.metastring_resolver.read_meta_string_bytes(buffer) - type_metabytes = self.metastring_resolver.read_meta_string_bytes(buffer) - return self._load_metabytes_to_classinfo(ns_metabytes, type_metabytes) - def _load_metabytes_to_classinfo(self, ns_metabytes, type_metabytes): typeinfo = self._ns_type_to_classinfo.get((ns_metabytes, type_metabytes)) if typeinfo is not None: @@ -588,6 +532,8 @@ def _load_metabytes_to_classinfo(self, ns_metabytes, type_metabytes): return classinfo def write_typeinfo(self, buffer, classinfo): + if classinfo.dynamic_type: + return type_id = classinfo.type_id internal_type_id = type_id & 0xFF buffer.write_varuint32(type_id) diff --git a/python/pyfury/_serialization.pyx b/python/pyfury/_serialization.pyx index 0da4ee9ae2..e4cb2c7783 100644 --- a/python/pyfury/_serialization.pyx +++ b/python/pyfury/_serialization.pyx @@ -26,6 +26,7 @@ import warnings from typing import TypeVar, Union, Iterable from pyfury._util import get_bit, set_bit, clear_bit +from pyfury import _fury as fmod from pyfury._fury import Language from pyfury._fury import _PicklerStub, _UnpicklerStub, Pickler, Unpickler from pyfury._fury import _ENABLE_CLASS_REGISTRATION_FORCIBLY @@ -213,29 +214,23 @@ cdef class MapRefResolver: self.read_ref_ids.clear() self.read_object = None + cdef int8_t USE_CLASSNAME = 0 cdef int8_t USE_CLASS_ID = 1 # preserve 0 as flag for class id not set in ClassInfo` cdef int8_t NO_CLASS_ID = 0 -cdef int8_t DEFAULT_DYNAMIC_WRITE_STRING_ID = -1 -cdef int8_t PYINT_CLASS_ID = 1 -cdef int8_t PYFLOAT_CLASS_ID = 2 -cdef int8_t PYBOOL_CLASS_ID = 3 -cdef int8_t STRING_CLASS_ID = 4 -cdef int8_t PICKLE_CLASS_ID = 5 -cdef int8_t PICKLE_STRONG_CACHE_CLASS_ID = 6 -cdef int8_t PICKLE_CACHE_CLASS_ID = 7 -cdef int16_t MAGIC_NUMBER = 0x62D4 -# `NOT_NULL_VALUE_FLAG` + `CLASS_ID<<1` in little-endian order -cdef int32_t NOT_NULL_PYINT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | \ - (PYINT_CLASS_ID << 9) -cdef int32_t NOT_NULL_PYFLOAT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | \ - (PYFLOAT_CLASS_ID << 9) -cdef int32_t NOT_NULL_PYBOOL_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | \ - (PYBOOL_CLASS_ID << 9) -cdef int32_t NOT_NULL_STRING_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | \ - (STRING_CLASS_ID << 9) -cdef int32_t SMALL_STRING_THRESHOLD = 16 +cdef int8_t DEFAULT_DYNAMIC_WRITE_META_STR_ID = fmod.DEFAULT_DYNAMIC_WRITE_META_STR_ID +cdef int8_t INT64_CLASS_ID = fmod.INT64_CLASS_ID +cdef int8_t FLOAT64_CLASS_ID = fmod.FLOAT64_CLASS_ID +cdef int8_t BOOL_CLASS_ID = fmod.BOOL_CLASS_ID +cdef int8_t STRING_CLASS_ID = fmod.STRING_CLASS_ID + +cdef int16_t MAGIC_NUMBER = fmod.MAGIC_NUMBER +cdef int32_t NOT_NULL_INT64_FLAG = fmod.NOT_NULL_INT64_FLAG +cdef int32_t NOT_NULL_FLOAT64_FLAG = fmod.NOT_NULL_FLOAT64_FLAG +cdef int32_t NOT_NULL_BOOL_FLAG = fmod.NOT_NULL_BOOL_FLAG +cdef int32_t NOT_NULL_STRING_FLAG = fmod.NOT_NULL_STRING_FLAG +cdef int32_t SMALL_STRING_THRESHOLD = fmod.SMALL_STRING_THRESHOLD @cython.final @@ -251,7 +246,7 @@ cdef class MetaStringBytes: self.length = len(data) self.hashcode = hashcode self.encoding = hashcode & 0xff - self.dynamic_write_string_id = DEFAULT_DYNAMIC_WRITE_STRING_ID + self.dynamic_write_string_id = DEFAULT_DYNAMIC_WRITE_META_STR_ID def __eq__(self, other): return type(other) is MetaStringBytes and other.hashcode == self.hashcode @@ -286,7 +281,7 @@ cdef class MetaStringResolver: self, Buffer buffer, MetaStringBytes metastr_bytes): cdef int16_t dynamic_type_id = metastr_bytes.dynamic_write_string_id cdef int32_t length = metastr_bytes.length - if dynamic_type_id == DEFAULT_DYNAMIC_WRITE_STRING_ID: + if dynamic_type_id == DEFAULT_DYNAMIC_WRITE_META_STR_ID: dynamic_type_id = self.dynamic_write_string_id metastr_bytes.dynamic_write_string_id = dynamic_type_id self.dynamic_write_string_id += 1 @@ -369,7 +364,7 @@ cdef class MetaStringResolver: self.dynamic_write_string_id = 0 for ptr in self._c_dynamic_written_enum_string: ( ptr).dynamic_write_string_id = \ - DEFAULT_DYNAMIC_WRITE_STRING_ID + DEFAULT_DYNAMIC_WRITE_META_STR_ID self._c_dynamic_written_enum_string.clear() @@ -378,7 +373,7 @@ cdef class ClassInfo: """ If dynamic_type is true, the serializer will be a dynamic typed serializer and it will write type info when writing the data. - In such cases, the `write_classinfo` should not write typeinfo. + In such cases, the `write_typeinfo` should not write typeinfo. In general, if we have 4 type for one class, we will have 5 serializers. For example, we have int8/16/32/64/128 for python `int` type, then we have 6 serializers for python `int`: `Int8/1632/64/128Serializer` for `int8/16/32/64/128` each, and another @@ -503,41 +498,6 @@ cdef class ClassResolver: self._populate_typeinfo(class_info) return class_info - cpdef inline write_classinfo(self, Buffer buffer, ClassInfo classinfo): - if classinfo.dynamic_type: - return - cdef int32_t type_id = classinfo.type_id - if type_id != NO_CLASS_ID: - buffer.write_varuint32((type_id << 1)) - return - buffer.write_varuint32(1) - self.metastring_resolver.write_meta_string_bytes( - buffer, classinfo.namespace_bytes - ) - self.metastring_resolver.write_meta_string_bytes( - buffer, classinfo.typename_bytes - ) - - cpdef inline ClassInfo read_classinfo(self, Buffer buffer): - cdef int32_t h1 = buffer.read_varuint32() - cdef int32_t type_id = h1 >> 1 - cdef ClassInfo classinfo - cdef PyObject * classinfo_ptr - # registered class id are greater than `NO_CLASS_ID`. - if h1 & 0b1 == 0: - if type_id < 0 or type_id >= self._c_registered_id_to_class_info.size(): - raise ValueError(f"Unexpected type_id {type_id}") - classinfo_ptr = self._c_registered_id_to_class_info[type_id] - if classinfo_ptr == NULL: - raise ValueError(f"Unexpected type_id {type_id}") - classinfo = classinfo_ptr - if classinfo.serializer is None: - classinfo.serializer = self._resolver._create_serializer(classinfo.cls) - return classinfo - cdef MetaStringBytes ns_metabytes = self.metastring_resolver.read_meta_string_bytes(buffer) - cdef MetaStringBytes type_metabytes = self.metastring_resolver.read_meta_string_bytes(buffer) - return self._load_bytes_to_classinfo(type_id, ns_metabytes, type_metabytes) - cdef inline ClassInfo _load_bytes_to_classinfo( self, int32_t type_id, MetaStringBytes ns_metabytes, MetaStringBytes type_metabytes): cdef PyObject * classinfo_ptr = self._c_meta_hash_to_classinfo[ @@ -737,44 +697,44 @@ cdef class Fury: buffer.write_string(obj) return elif cls is int: - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(obj) return elif cls is bool: - buffer.write_int16(NOT_NULL_PYBOOL_FLAG) + buffer.write_int16(NOT_NULL_BOOL_FLAG) buffer.write_bool(obj) return elif cls is float: - buffer.write_int16(NOT_NULL_PYFLOAT_FLAG) + buffer.write_int16(NOT_NULL_FLOAT64_FLAG) buffer.write_double(obj) return if self.ref_resolver.write_ref_or_null(buffer, obj): return if classinfo is None: classinfo = self.class_resolver.get_classinfo(cls) - self.class_resolver.write_classinfo(buffer, classinfo) + self.class_resolver.write_typeinfo(buffer, classinfo) classinfo.serializer.write(buffer, obj) cpdef inline serialize_nonref(self, Buffer buffer, obj): cls = type(obj) if cls is str: - buffer.write_varuint32(STRING_CLASS_ID << 1) + buffer.write_varuint32(STRING_CLASS_ID) buffer.write_string(obj) return elif cls is int: - buffer.write_varuint32(PYINT_CLASS_ID << 1) + buffer.write_varuint32(INT64_CLASS_ID) buffer.write_varint64(obj) return elif cls is bool: - buffer.write_varuint32(PYBOOL_CLASS_ID << 1) + buffer.write_varuint32(BOOL_CLASS_ID) buffer.write_bool(obj) return elif cls is float: - buffer.write_varuint32(PYFLOAT_CLASS_ID << 1) + buffer.write_varuint32(FLOAT64_CLASS_ID) buffer.write_double(obj) return cdef ClassInfo classinfo = self.class_resolver.get_classinfo(cls) - self.class_resolver.write_classinfo(buffer, classinfo) + self.class_resolver.write_typeinfo(buffer, classinfo) classinfo.serializer.write(buffer, obj) cpdef inline xserialize_ref( @@ -864,7 +824,7 @@ cdef class Fury: if ref_id < NOT_NULL_VALUE_FLAG: return ref_resolver.get_read_object() # indicates that the object is first read. - cdef ClassInfo classinfo = self.class_resolver.read_classinfo(buffer) + cdef ClassInfo classinfo = self.class_resolver.read_typeinfo(buffer) cls = classinfo.cls if cls is str: return buffer.read_string() @@ -880,7 +840,7 @@ cdef class Fury: cpdef inline deserialize_nonref(self, Buffer buffer): """Deserialize not-null and non-reference object from buffer.""" - cdef ClassInfo classinfo = self.class_resolver.read_classinfo(buffer) + cdef ClassInfo classinfo = self.class_resolver.read_typeinfo(buffer) cls = classinfo.cls if cls is str: return buffer.read_string() @@ -967,7 +927,7 @@ cdef class Fury: return if classinfo is None: classinfo = self.class_resolver.get_classinfo(type(value)) - self.class_resolver.write_classinfo(buffer, classinfo) + self.class_resolver.write_typeinfo(buffer, classinfo) classinfo.serializer.write(buffer, value) cpdef inline read_ref_pyobject(self, Buffer buffer): @@ -976,7 +936,7 @@ cdef class Fury: if ref_id < NOT_NULL_VALUE_FLAG: return ref_resolver.get_read_object() # indicates that the object is first read. - cdef ClassInfo classinfo = self.class_resolver.read_classinfo(buffer) + cdef ClassInfo classinfo = self.class_resolver.read_typeinfo(buffer) o = classinfo.serializer.read(buffer) ref_resolver.set_read_object(ref_id, o) return o @@ -1089,16 +1049,16 @@ cdef class Serializer: self.need_to_write_ref = not is_primitive_type(type_) cpdef write(self, Buffer buffer, value): - raise NotImplementedError + raise NotImplementedError(f"write method not implemented in {type(self)}") cpdef read(self, Buffer buffer): - raise NotImplementedError + raise NotImplementedError(f"read method not implemented in {type(self)}") cpdef xwrite(self, Buffer buffer, value): - raise NotImplemented + raise NotImplementedError(f"xwrite method not implemented in {type(self)}") cpdef xread(self, Buffer buffer): - raise NotImplemented + raise NotImplementedError(f"xread method not implemented in {type(self)}") @classmethod def support_subclass(cls) -> bool: @@ -1173,20 +1133,7 @@ cdef float FLOAT32_MAX_VALUE = 3.40282e+38 @cython.final -cdef class DynamicIntSerializer(CrossLanguageCompatibleSerializer): - cpdef inline xwrite(self, Buffer buffer, value): - # TODO(chaokunyang) check value range and write type and value - buffer.write_varuint32( TypeId.INT64) - buffer.write_varint64(value) - - cpdef inline xread(self, Buffer buffer): - type_id = buffer.read_varuint32() - assert type_id == TypeId.INT64, type_id - return buffer.read_varint64() - - -@cython.final -cdef class FloatSerializer(CrossLanguageCompatibleSerializer): +cdef class Float32Serializer(CrossLanguageCompatibleSerializer): cpdef inline write(self, Buffer buffer, value): buffer.write_float(value) @@ -1195,7 +1142,7 @@ cdef class FloatSerializer(CrossLanguageCompatibleSerializer): @cython.final -cdef class DoubleSerializer(CrossLanguageCompatibleSerializer): +cdef class Float64Serializer(CrossLanguageCompatibleSerializer): cpdef inline write(self, Buffer buffer, value): buffer.write_double(value) @@ -1203,19 +1150,6 @@ cdef class DoubleSerializer(CrossLanguageCompatibleSerializer): return buffer.read_double() -@cython.final -cdef class DynamicFloatSerializer(CrossLanguageCompatibleSerializer): - cpdef inline xwrite(self, Buffer buffer, value): - # TODO(chaokunyang) check value range and write type and value - buffer.write_varuint32( TypeId.FLOAT64) - buffer.write_double(value) - - cpdef inline xread(self, Buffer buffer): - cdef int32_t type_id = buffer.read_varuint32() - assert type_id == TypeId.FLOAT64, type_id - return buffer.read_double() - - @cython.final cdef class StringSerializer(CrossLanguageCompatibleSerializer): cpdef inline write(self, Buffer buffer, value): @@ -1293,12 +1227,12 @@ cdef class CollectionSerializer(Serializer): break if self.fury.ref_tracking: collect_flag |= COLLECTION_TRACKING_REF - buffer.write_varint64((len(value) << 4) | collect_flag) + buffer.write_varuint64((len(value) << 4) | collect_flag) return pair[int8_t, int64_t](collect_flag, obj2int(elem_type)) cpdef write(self, Buffer buffer, value): if len(value) == 0: - buffer.write_varint64(0) + buffer.write_varuint64(0) return cdef pair[int8_t, int64_t] header_pair = self.write_header(buffer, value) cdef int8_t collect_flag = header_pair.first @@ -1327,18 +1261,18 @@ cdef class CollectionSerializer(Serializer): buffer.write_int16(NOT_NULL_STRING_FLAG) buffer.write_string(s) elif cls is int: - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(s) elif cls is bool: - buffer.write_int16(NOT_NULL_PYBOOL_FLAG) + buffer.write_int16(NOT_NULL_BOOL_FLAG) buffer.write_bool(s) elif cls is float: - buffer.write_int16(NOT_NULL_PYFLOAT_FLAG) + buffer.write_int16(NOT_NULL_FLOAT64_FLAG) buffer.write_double(s) else: if not ref_resolver.write_ref_or_null(buffer, s): classinfo = class_resolver.get_classinfo(cls) - class_resolver.write_classinfo(buffer, classinfo) + class_resolver.write_typeinfo(buffer, classinfo) classinfo.serializer.write(buffer, s) cdef inline _write_string(self, Buffer buffer, value): @@ -1352,32 +1286,32 @@ cdef class CollectionSerializer(Serializer): self._add_element(collection_, i, buffer.read_string()) cdef inline _write_int(self, Buffer buffer, value): - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) for s in value: buffer.write_varint64(s) cdef inline _read_int(self, Buffer buffer, int64_t len_, object collection_): - assert buffer.read_int16() == NOT_NULL_PYINT_FLAG + assert buffer.read_int16() == NOT_NULL_INT64_FLAG for i in range(len_): self._add_element(collection_, i, buffer.read_varint64()) cdef inline _write_bool(self, Buffer buffer, value): - buffer.write_int16(NOT_NULL_PYBOOL_FLAG) + buffer.write_int16(NOT_NULL_BOOL_FLAG) for s in value: buffer.write_bool(s) cdef inline _read_bool(self, Buffer buffer, int64_t len_, object collection_): - assert buffer.read_int16() == NOT_NULL_PYBOOL_FLAG + assert buffer.read_int16() == NOT_NULL_BOOL_FLAG for i in range(len_): self._add_element(collection_, i, buffer.read_bool()) cdef inline _write_float(self, Buffer buffer, value): - buffer.write_int16(NOT_NULL_PYFLOAT_FLAG) + buffer.write_int16(NOT_NULL_FLOAT64_FLAG) for s in value: buffer.write_double(s) cdef inline _read_float(self, Buffer buffer, int64_t len_, object collection_): - assert buffer.read_int16() == NOT_NULL_PYFLOAT_FLAG + assert buffer.read_int16() == NOT_NULL_FLOAT64_FLAG for i in range(len_): self._add_element(collection_, i, buffer.read_double()) @@ -1385,14 +1319,14 @@ cdef class CollectionSerializer(Serializer): cdef MapRefResolver ref_resolver = self.ref_resolver cdef ClassResolver class_resolver = self.class_resolver classinfo = class_resolver.get_classinfo(elem_type) - class_resolver.write_classinfo(buffer, classinfo) + class_resolver.write_typeinfo(buffer, classinfo) for s in value: classinfo.serializer.write(buffer, s) cpdef _read_same_type_no_ref(self, Buffer buffer, int64_t len_, object collection_): cdef MapRefResolver ref_resolver = self.ref_resolver cdef ClassResolver class_resolver = self.class_resolver - classinfo = class_resolver.read_classinfo(buffer) + classinfo = class_resolver.read_typeinfo(buffer) for i in range(len_): obj = classinfo.serializer.read(buffer) self._add_element(collection_, i, obj) @@ -1401,7 +1335,7 @@ cdef class CollectionSerializer(Serializer): cdef MapRefResolver ref_resolver = self.ref_resolver cdef ClassResolver class_resolver = self.class_resolver classinfo = class_resolver.get_classinfo(elem_type) - class_resolver.write_classinfo(buffer, classinfo) + class_resolver.write_typeinfo(buffer, classinfo) for s in value: if not ref_resolver.write_ref_or_null(buffer, s): classinfo.serializer.write(buffer, s) @@ -1409,7 +1343,7 @@ cdef class CollectionSerializer(Serializer): cpdef _read_same_type_ref(self, Buffer buffer, int64_t len_, object collection_): cdef MapRefResolver ref_resolver = self.ref_resolver cdef ClassResolver class_resolver = self.class_resolver - classinfo = class_resolver.read_classinfo(buffer) + classinfo = class_resolver.read_typeinfo(buffer) for i in range(len_): ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: @@ -1440,7 +1374,7 @@ cdef class ListSerializer(CollectionSerializer): cpdef read(self, Buffer buffer): cdef MapRefResolver ref_resolver = self.fury.ref_resolver cdef ClassResolver class_resolver = self.fury.class_resolver - cdef int64_t len_and_flag = buffer.read_varint64() + cdef int64_t len_and_flag = buffer.read_varuint64() cdef int64_t len_ = len_and_flag >> 4 cdef int8_t collect_flag = (len_and_flag & 0xF) cdef list list_ = PyList_New(len_) @@ -1451,11 +1385,11 @@ cdef class ListSerializer(CollectionSerializer): type_flag = buffer.get_int16(buffer.reader_index) if type_flag == NOT_NULL_STRING_FLAG: self._read_string(buffer, len_, list_) - elif type_flag == NOT_NULL_PYINT_FLAG: + elif type_flag == NOT_NULL_INT64_FLAG: self._read_int(buffer, len_, list_) - elif type_flag == NOT_NULL_PYBOOL_FLAG: + elif type_flag == NOT_NULL_BOOL_FLAG: self._read_bool(buffer, len_, list_) - elif type_flag == NOT_NULL_PYFLOAT_FLAG: + elif type_flag == NOT_NULL_FLOAT64_FLAG: self._read_float(buffer, len_, list_) else: if (collect_flag & COLLECTION_TRACKING_REF) == 0: @@ -1495,7 +1429,7 @@ cdef inline get_next_elenment( if ref_id < NOT_NULL_VALUE_FLAG: return ref_resolver.get_read_object() # indicates that the object is first read. - classinfo = class_resolver.read_classinfo(buffer) + classinfo = class_resolver.read_typeinfo(buffer) cls = classinfo.cls # Note that all read operations in fast paths of list/tuple/set/dict/sub_dict # ust match corresponding writing operations. Otherwise, ref tracking will @@ -1519,7 +1453,7 @@ cdef class TupleSerializer(CollectionSerializer): cpdef inline read(self, Buffer buffer): cdef MapRefResolver ref_resolver = self.fury.ref_resolver cdef ClassResolver class_resolver = self.fury.class_resolver - cdef int64_t len_and_flag = buffer.read_varint64() + cdef int64_t len_and_flag = buffer.read_varuint64() cdef int64_t len_ = len_and_flag >> 4 cdef int8_t collect_flag = (len_and_flag & 0xF) cdef tuple tuple_ = PyTuple_New(len_) @@ -1529,11 +1463,11 @@ cdef class TupleSerializer(CollectionSerializer): type_flag = buffer.get_int16(buffer.reader_index) if type_flag == NOT_NULL_STRING_FLAG: self._read_string(buffer, len_, tuple_) - elif type_flag == NOT_NULL_PYINT_FLAG: + elif type_flag == NOT_NULL_INT64_FLAG: self._read_int(buffer, len_, tuple_) - elif type_flag == NOT_NULL_PYBOOL_FLAG: + elif type_flag == NOT_NULL_BOOL_FLAG: self._read_bool(buffer, len_, tuple_) - elif type_flag == NOT_NULL_PYFLOAT_FLAG: + elif type_flag == NOT_NULL_FLOAT64_FLAG: self._read_float(buffer, len_, tuple_) else: if (collect_flag & COLLECTION_TRACKING_REF) == 0: @@ -1576,7 +1510,7 @@ cdef class SetSerializer(CollectionSerializer): cdef ClassResolver class_resolver = self.fury.class_resolver cdef set instance = set() ref_resolver.reference(instance) - cdef int64_t len_and_flag = buffer.read_varint64() + cdef int64_t len_and_flag = buffer.read_varuint64() cdef int64_t len_ = len_and_flag >> 4 cdef int8_t collect_flag = (len_and_flag & 0xF) cdef int32_t ref_id @@ -1587,11 +1521,11 @@ cdef class SetSerializer(CollectionSerializer): type_flag = buffer.get_int16(buffer.reader_index) if type_flag == NOT_NULL_STRING_FLAG: self._read_string(buffer, len_, instance) - elif type_flag == NOT_NULL_PYINT_FLAG: + elif type_flag == NOT_NULL_INT64_FLAG: self._read_int(buffer, len_, instance) - elif type_flag == NOT_NULL_PYBOOL_FLAG: + elif type_flag == NOT_NULL_BOOL_FLAG: self._read_bool(buffer, len_, instance) - elif type_flag == NOT_NULL_PYFLOAT_FLAG: + elif type_flag == NOT_NULL_FLOAT64_FLAG: self._read_float(buffer, len_, instance) else: if (collect_flag & COLLECTION_TRACKING_REF) == 0: @@ -1605,7 +1539,7 @@ cdef class SetSerializer(CollectionSerializer): instance.add(ref_resolver.get_read_object()) continue # indicates that the object is first read. - classinfo = class_resolver.read_classinfo(buffer) + classinfo = class_resolver.read_typeinfo(buffer) cls = classinfo.cls if cls is str: instance.add(buffer.read_string()) @@ -1668,26 +1602,26 @@ cdef class MapSerializer(Serializer): else: if not self.ref_resolver.write_ref_or_null(buffer, k): key_classinfo = self.class_resolver.get_classinfo(key_cls) - self.class_resolver.write_classinfo(buffer, key_classinfo) + self.class_resolver.write_typeinfo(buffer, key_classinfo) key_classinfo.serializer.write(buffer, k) value_cls = type(v) if value_cls is str: buffer.write_int16(NOT_NULL_STRING_FLAG) buffer.write_string(v) elif value_cls is int: - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(v) elif value_cls is bool: - buffer.write_int16(NOT_NULL_PYBOOL_FLAG) + buffer.write_int16(NOT_NULL_BOOL_FLAG) buffer.write_bool(v) elif value_cls is float: - buffer.write_int16(NOT_NULL_PYFLOAT_FLAG) + buffer.write_int16(NOT_NULL_FLOAT64_FLAG) buffer.write_double(v) else: if not self.ref_resolver.write_ref_or_null(buffer, v): value_classinfo = self.class_resolver. \ get_classinfo(value_cls) - self.class_resolver.write_classinfo(buffer, value_classinfo) + self.class_resolver.write_typeinfo(buffer, value_classinfo) value_classinfo.serializer.write(buffer, v) cpdef inline read(self, Buffer buffer): @@ -1704,7 +1638,7 @@ cdef class MapSerializer(Serializer): if ref_id < NOT_NULL_VALUE_FLAG: key = ref_resolver.get_read_object() else: - key_classinfo = class_resolver.read_classinfo(buffer) + key_classinfo = class_resolver.read_typeinfo(buffer) if key_classinfo.cls is str: key = buffer.read_string() else: @@ -1714,7 +1648,7 @@ cdef class MapSerializer(Serializer): if ref_id < NOT_NULL_VALUE_FLAG: value = ref_resolver.get_read_object() else: - value_classinfo = class_resolver.read_classinfo(buffer) + value_classinfo = class_resolver.read_typeinfo(buffer) cls = value_classinfo.cls if cls is str: value = buffer.read_string() @@ -1788,26 +1722,26 @@ cdef class SubMapSerializer(Serializer): else: if not self.ref_resolver.write_ref_or_null(buffer, k): key_classinfo = self.class_resolver.get_classinfo(key_cls) - self.class_resolver.write_classinfo(buffer, key_classinfo) + self.class_resolver.write_typeinfo(buffer, key_classinfo) key_classinfo.serializer.write(buffer, k) value_cls = type(v) if value_cls is str: buffer.write_int16(NOT_NULL_STRING_FLAG) buffer.write_string(v) elif value_cls is int: - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(v) elif value_cls is bool: - buffer.write_int16(NOT_NULL_PYBOOL_FLAG) + buffer.write_int16(NOT_NULL_BOOL_FLAG) buffer.write_bool(v) elif value_cls is float: - buffer.write_int16(NOT_NULL_PYFLOAT_FLAG) + buffer.write_int16(NOT_NULL_FLOAT64_FLAG) buffer.write_double(v) else: if not self.ref_resolver.write_ref_or_null(buffer, v): value_classinfo = self.class_resolver. \ get_classinfo(value_cls) - self.class_resolver.write_classinfo(buffer, value_classinfo) + self.class_resolver.write_typeinfo(buffer, value_classinfo) value_classinfo.serializer.write(buffer, v) cpdef inline read(self, Buffer buffer): @@ -1824,7 +1758,7 @@ cdef class SubMapSerializer(Serializer): if ref_id < NOT_NULL_VALUE_FLAG: key = ref_resolver.get_read_object() else: - key_classinfo = class_resolver.read_classinfo(buffer) + key_classinfo = class_resolver.read_typeinfo(buffer) if key_classinfo.cls is str: key = buffer.read_string() else: @@ -1834,7 +1768,7 @@ cdef class SubMapSerializer(Serializer): if ref_id < NOT_NULL_VALUE_FLAG: value = ref_resolver.get_read_object() else: - value_classinfo = class_resolver.read_classinfo(buffer) + value_classinfo = class_resolver.read_typeinfo(buffer) cls = value_classinfo.cls if cls is str: value = buffer.read_string() @@ -1878,7 +1812,7 @@ cdef class SliceSerializer(Serializer): start, stop, step = value.start, value.stop, value.step if type(start) is int: # TODO support varint128 - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(start) else: if start is None: @@ -1888,7 +1822,7 @@ cdef class SliceSerializer(Serializer): self.fury.serialize_nonref(buffer, start) if type(stop) is int: # TODO support varint128 - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(stop) else: if stop is None: @@ -1898,7 +1832,7 @@ cdef class SliceSerializer(Serializer): self.fury.serialize_nonref(buffer, stop) if type(step) is int: # TODO support varint128 - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(step) else: if step is None: diff --git a/python/pyfury/_serializer.py b/python/pyfury/_serializer.py index ea31e1291b..e200f65643 100644 --- a/python/pyfury/_serializer.py +++ b/python/pyfury/_serializer.py @@ -22,14 +22,11 @@ from pyfury._fury import ( NOT_NULL_STRING_FLAG, - NOT_NULL_PYINT_FLAG, - NOT_NULL_PYBOOL_FLAG, + NOT_NULL_INT64_FLAG, + NOT_NULL_BOOL_FLAG, ) from pyfury.resolver import NOT_NULL_VALUE_FLAG, NULL_FLAG -from pyfury.type import ( - TypeId, - is_primitive_type, -) +from pyfury.type import is_primitive_type try: import numpy as np @@ -123,19 +120,7 @@ def read(self, buffer): return buffer.read_varint64() -class DynamicIntSerializer(CrossLanguageCompatibleSerializer): - def xwrite(self, buffer, value): - # TODO(chaokunyang) check value range and write type and value - buffer.write_varuint32(TypeId.INT64) - buffer.write_varint64(value) - - def xread(self, buffer): - type_id = buffer.read_varuint32() - assert type_id == TypeId.INT64, type_id - return buffer.read_varint64() - - -class FloatSerializer(CrossLanguageCompatibleSerializer): +class Float32Serializer(CrossLanguageCompatibleSerializer): def write(self, buffer, value): buffer.write_float(value) @@ -143,7 +128,7 @@ def read(self, buffer): return buffer.read_float() -class DoubleSerializer(CrossLanguageCompatibleSerializer): +class Float64Serializer(CrossLanguageCompatibleSerializer): def write(self, buffer, value): buffer.write_double(value) @@ -151,18 +136,6 @@ def read(self, buffer): return buffer.read_double() -class DynamicFloatSerializer(CrossLanguageCompatibleSerializer): - def xwrite(self, buffer, value): - # TODO(chaokunyang) check value range and write type and value - buffer.write_varuint32(TypeId.FLOAT64) - buffer.write_double(value) - - def xread(self, buffer): - type_id = buffer.read_varuint32() - assert type_id == TypeId.FLOAT64, type_id - return buffer.read_double() - - class StringSerializer(CrossLanguageCompatibleSerializer): def write(self, buffer, value: str): buffer.write_string(value) @@ -223,15 +196,15 @@ def write(self, buffer, value: Iterable[Any]): buffer.write_int16() buffer.write_string(s) elif cls is int: - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(s) elif cls is bool: - buffer.write_int16(NOT_NULL_PYBOOL_FLAG) + buffer.write_int16(NOT_NULL_BOOL_FLAG) buffer.write_bool(s) else: if not self.ref_resolver.write_ref_or_null(buffer, s): classinfo = self.class_resolver.get_classinfo(cls) - self.class_resolver.write_classinfo(buffer, classinfo) + self.class_resolver.write_typeinfo(buffer, classinfo) classinfo.serializer.write(buffer, s) def read(self, buffer): @@ -331,19 +304,19 @@ def write(self, buffer, value: Dict): else: if not self.ref_resolver.write_ref_or_null(buffer, k): classinfo = self.class_resolver.get_classinfo(key_cls) - self.class_resolver.write_classinfo(buffer, classinfo) + self.class_resolver.write_typeinfo(buffer, classinfo) classinfo.serializer.write(buffer, k) value_cls = type(v) if value_cls is str: buffer.write_int16(NOT_NULL_STRING_FLAG) buffer.write_string(v) elif value_cls is int: - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(v) else: if not self.ref_resolver.write_ref_or_null(buffer, v): classinfo = self.class_resolver.get_classinfo(value_cls) - self.class_resolver.write_classinfo(buffer, classinfo) + self.class_resolver.write_typeinfo(buffer, classinfo) classinfo.serializer.write(buffer, v) def read(self, buffer): @@ -400,7 +373,7 @@ def write(self, buffer, value: slice): start, stop, step = value.start, value.stop, value.step if type(start) is int: # TODO support varint128 - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(start) else: if start is None: @@ -410,7 +383,7 @@ def write(self, buffer, value: slice): self.fury.serialize_nonref(buffer, start) if type(stop) is int: # TODO support varint128 - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(stop) else: if stop is None: @@ -420,7 +393,7 @@ def write(self, buffer, value: slice): self.fury.serialize_nonref(buffer, stop) if type(step) is int: # TODO support varint128 - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(step) else: if step is None: diff --git a/python/pyfury/resolver.py b/python/pyfury/resolver.py index 4ba360bcc7..1b0cbc9db5 100644 --- a/python/pyfury/resolver.py +++ b/python/pyfury/resolver.py @@ -161,7 +161,7 @@ def read_ref_or_null(self, buffer): head_flag = buffer.read_int8() if head_flag == REF_FLAG: # read reference id and get object from reference resolver - ref_id = buffer.read_varint32() + ref_id = buffer.read_varuint32() self.read_object = self.get_read_object(ref_id) return REF_FLAG else: @@ -178,7 +178,7 @@ def try_preserve_ref_id(self, buffer) -> int: head_flag = buffer.read_int8() if head_flag == REF_FLAG: # read reference id and get object from reference resolver - ref_id = buffer.read_varint32() + ref_id = buffer.read_varuint32() self.read_object = self.get_read_object(id_=ref_id) else: self.read_object = None diff --git a/python/pyfury/serializer.py b/python/pyfury/serializer.py index b420423991..6619ee27de 100644 --- a/python/pyfury/serializer.py +++ b/python/pyfury/serializer.py @@ -39,7 +39,7 @@ np = None from pyfury._fury import ( - NOT_NULL_PYINT_FLAG, + NOT_NULL_INT64_FLAG, BufferObject, ) @@ -56,10 +56,8 @@ Int16Serializer, Int32Serializer, Int64Serializer, - DynamicIntSerializer, - FloatSerializer, - DoubleSerializer, - DynamicFloatSerializer, + Float32Serializer, + Float64Serializer, StringSerializer, DateSerializer, TimestampSerializer, @@ -82,8 +80,8 @@ Int16Serializer, Int32Serializer, Int64Serializer, - FloatSerializer, - DoubleSerializer, + Float32Serializer, + Float64Serializer, StringSerializer, DateSerializer, TimestampSerializer, @@ -96,8 +94,6 @@ SubMapSerializer, EnumSerializer, SliceSerializer, - DynamicIntSerializer, - DynamicFloatSerializer, ) from pyfury.type import ( @@ -222,7 +218,7 @@ def write(self, buffer, value): stop = value.stop step = value.step if type(start) is int: - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(start) else: if start is None: @@ -231,7 +227,7 @@ def write(self, buffer, value): buffer.write_int8(NOT_NULL_VALUE_FLAG) fury.serialize_nonref(buffer, start) if type(stop) is int: - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(stop) else: if stop is None: @@ -240,7 +236,7 @@ def write(self, buffer, value): buffer.write_int8(NOT_NULL_VALUE_FLAG) fury.serialize_nonref(buffer, stop) if type(step) is int: - buffer.write_int16(NOT_NULL_PYINT_FLAG) + buffer.write_int16(NOT_NULL_INT64_FLAG) buffer.write_varint64(step) else: if step is None: @@ -536,6 +532,7 @@ def xread(self, buffer): return arr def write(self, buffer, value): + buffer.write_varuint32(PickleSerializer.PICKLE_CLASS_ID) self.fury.handle_unsupported_write(buffer, value) def read(self, buffer): @@ -597,6 +594,7 @@ def xread(self, buffer): return np.frombuffer(data, dtype=self.dtype) def write(self, buffer, value): + buffer.write_int8(PickleSerializer.PICKLE_CLASS_ID) self.fury.handle_unsupported_write(buffer, value) def read(self, buffer): @@ -619,6 +617,7 @@ def xread(self, buffer): raise NotImplementedError("Multi-dimensional array not supported currently") def write(self, buffer, value): + buffer.write_int8(PickleSerializer.PICKLE_CLASS_ID) self.fury.handle_unsupported_write(buffer, value) def read(self, buffer): @@ -651,6 +650,8 @@ def to_buffer(self) -> "Buffer": class PickleSerializer(Serializer): + PICKLE_CLASS_ID = 96 + def xwrite(self, buffer, value): raise NotImplementedError diff --git a/python/pyfury/tests/test_serializer.py b/python/pyfury/tests/test_serializer.py index 9230580952..4478e81f16 100644 --- a/python/pyfury/tests/test_serializer.py +++ b/python/pyfury/tests/test_serializer.py @@ -53,7 +53,7 @@ def test_float(): assert ser_de(fury, -1.0) == -1.0 assert ser_de(fury, 1 / 3) == 1 / 3 serializer = fury.class_resolver.get_serializer(float) - assert type(serializer) is pyfury.DoubleSerializer + assert type(serializer) is pyfury.Float64Serializer def test_tuple():