From 25fd39d8a74d604f4aa0fc88af947d4018784aa3 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Wed, 20 May 2015 22:46:53 -0400 Subject: [PATCH 1/4] Check that a builder roundtrips through pickle and will give the same results afterwards. --- patsy/test_highlevel.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py index 9b7be37..fb2f988 100644 --- a/patsy/test_highlevel.py +++ b/patsy/test_highlevel.py @@ -5,6 +5,7 @@ # Exhaustive end-to-end tests of the top-level API. import sys +from six.moves import cPickle as pickle import __future__ import six import numpy as np @@ -758,3 +759,17 @@ def test_C_and_pandas_categorical(): [[1, 0], [1, 1], [1, 0]]) + +def test_pickle_builder_roundtrips(): + design_matrix = dmatrix("x + a", {"x": [1, 2, 3], + "a": ["a1", "a2", "a3"]}) + builder = design_matrix.design_info.builder + + new_data = {"x": [10, 20, 30], + "a": ["a3", "a1", "a2"]} + m1 = dmatrix(builder, new_data) + + builder2 = pickle.loads(pickle.dumps(design_matrix.design_info.builder)) + m2 = dmatrix(builder2, new_data) + + assert np.allclose(m1, m2) From 5aea78174c0a23a99bd7e3b2f1daf8b2142a05ad Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Tue, 18 Aug 2015 21:04:05 -0400 Subject: [PATCH 2/4] Beginning of work on pickling. --- patsy/eval.py | 17 ++++++++++++++--- patsy/test_highlevel.py | 6 ++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/patsy/eval.py b/patsy/eval.py index d4ed83f..f333b84 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -565,7 +565,20 @@ def eval(self, memorize_state, data): memorize_state, data) - __getstate__ = no_pickling + def __getstate__(self): + return (0, self.name, self.origin) + + def __setstate__(self, state): + # TODO What do we do about self.code? + (version, code, origin) = state + assert version == 0 + # TODO Give better error message when version is too recent, etc. + # Should use a single function from somewhere + # + self.code = code + +def test_EvalFactor_pickle_saves_origin(): + assert False def test_EvalFactor_basics(): e = EvalFactor("a+b") @@ -577,8 +590,6 @@ def test_EvalFactor_basics(): assert e.origin is None assert e2.origin == "asdf" - assert_no_pickling(e) - def test_EvalFactor_memorize_passes_needed(): from patsy.state import stateful_transform foo = stateful_transform(lambda: "FOO-OBJ") diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py index fb2f988..2dc45fd 100644 --- a/patsy/test_highlevel.py +++ b/patsy/test_highlevel.py @@ -761,9 +761,13 @@ def test_C_and_pandas_categorical(): [1, 0]]) def test_pickle_builder_roundtrips(): + import numpy as np + # TODO Add center(x) and categorical interaction, and call to np.log to patsy formula. design_matrix = dmatrix("x + a", {"x": [1, 2, 3], "a": ["a1", "a2", "a3"]}) + # TODO Remove builder, pass design_info to dmatrix() instead. builder = design_matrix.design_info.builder + del np new_data = {"x": [10, 20, 30], "a": ["a3", "a1", "a2"]} @@ -773,3 +777,5 @@ def test_pickle_builder_roundtrips(): m2 = dmatrix(builder2, new_data) assert np.allclose(m1, m2) + + From f028c032718d6e44bed57e8510a050657df41e1e Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Tue, 18 Aug 2015 21:04:43 -0400 Subject: [PATCH 3/4] Beginning of igh-level tests for pickling. --- patsy/test_pickling.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 patsy/test_pickling.py diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py new file mode 100644 index 0000000..8f2cc9a --- /dev/null +++ b/patsy/test_pickling.py @@ -0,0 +1,22 @@ +from six.moves import cPickle as pickle + +from patsy import EvalFactor + +stuff = [ + EvalFactor("a+b"), + ] + +def test_pickling_roundtrips(): + for obj in stuff: + assert obj == pickle.loads(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)) + +def test_unpickling_future_gives_sensible_error_msg(): + pass + +# Entrypoint: python -m patsy.test_pickling ... + +if __name__ == "__main__": + # TODO Save pickle. Make sure it's running from the right directory, so + # the pickles are saved in the right place. + + From 6136e9f308b1e736e103eb77f57412dd60f92de5 Mon Sep 17 00:00:00 2001 From: Louis Potok Date: Sun, 1 May 2016 09:48:09 -0700 Subject: [PATCH 4/4] Add pickling for EvalFactor This is mainly to show the approach. I think the verbose error message might be overkill but it just shows the flexibility of this pattern. --- patsy/eval.py | 44 +++++++++++++++++++++++++++++++----------- patsy/test_pickling.py | 27 ++++++++++++-------------- 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/patsy/eval.py b/patsy/eval.py index f333b84..5a0f07f 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -24,6 +24,7 @@ from patsy.tokens import (pretty_untokenize, normalize_token_spacing, python_tokenize) from patsy.compat import call_and_wrap_exc +from patsy.version import __version__ def _all_future_flags(): flags = 0 @@ -566,19 +567,40 @@ def eval(self, memorize_state, data): data) def __getstate__(self): - return (0, self.name, self.origin) + return { + 'version': __version__, + 'code': self.code, + 'origin': self.origin + } def __setstate__(self, state): - # TODO What do we do about self.code? - (version, code, origin) = state - assert version == 0 - # TODO Give better error message when version is too recent, etc. - # Should use a single function from somewhere - # - self.code = code - -def test_EvalFactor_pickle_saves_origin(): - assert False + expected_fields = { + 'code': 'REQUIRED', + 'origin': 'OPTIONAL' + } + + for field in expected_fields: + if field in state: + self.__setattr__(field, state[field]) + continue + else: + pickling_version = state['version'] + unpickling_newer_version = pickling_version.split('+')[0] > __version__.split('+')[0] + if expected_fields[field] == 'REQUIRED' and unpickling_newer_version: + msg = "This EvalFactor was pickled with patsy version %s," \ + "and cannot be unpickled with version %s" % \ + (pickling_version, __version__) + raise KeyError, msg + elif expected_fields[field] == 'OPTIONAL' and unpickling_newer_version: + msg = "This EvalFactor was pickled with patsy version %s," \ + "and cannot be unpickled with full fidelity by version %s." \ + "In particular, you have access to `code` but not to `origin`" % \ + (pickling_version, __version__) + raise FutureWarning, msg + else: + msg = "Unable to unpickle EvalFactor field %s." % field + raise KeyError, msg + def test_EvalFactor_basics(): e = EvalFactor("a+b") diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index 8f2cc9a..5a3943c 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -1,22 +1,19 @@ from six.moves import cPickle as pickle -from patsy import EvalFactor +from patsy.eval import EvalFactor +from patsy.version import __version__ -stuff = [ - EvalFactor("a+b"), + +objects_to_test = [ + ("EvalFactor('a+b', 'mars')", { + "0.4.1+dev": "ccopy_reg\n_reconstructor\np1\n(cpatsy.eval\nEvalFactor\np2\nc__builtin__\nobject\np3\nNtRp4\n(dp5\nS\'code\'\np6\nS\'a + b\'\np7\nsS\'origin\'\np8\nS\'mars\'\np9\nsS\'version\'\np10\nS\'0.4.1+dev\'\np11\nsb." + }) ] def test_pickling_roundtrips(): - for obj in stuff: + for obj_code, pickled_history in objects_to_test: + obj = eval(obj_code) + print pickle.dumps(obj).encode('string-escape') assert obj == pickle.loads(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)) - -def test_unpickling_future_gives_sensible_error_msg(): - pass - -# Entrypoint: python -m patsy.test_pickling ... - -if __name__ == "__main__": - # TODO Save pickle. Make sure it's running from the right directory, so - # the pickles are saved in the right place. - - + for version, pickled in pickled_history.items(): + assert pickle.dumps(obj) == pickled