From e55548796ec2a9f1bf600c7583b8655611b9a496 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Wed, 20 May 2015 22:46:53 -0400 Subject: [PATCH 01/17] Check that a builder roundtrips through pickle and will give the same results afterwards. --- patsy/test_highlevel.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py index 0ef0438..2243c21 100644 --- a/patsy/test_highlevel.py +++ b/patsy/test_highlevel.py @@ -5,6 +5,7 @@ # Exhaustive end-to-end tests of the top-level API. import sys +from six.moves import cPickle as pickle import __future__ import six import numpy as np @@ -758,3 +759,17 @@ def test_C_and_pandas_categorical(): [[1, 0], [1, 1], [1, 0]]) + +def test_pickle_builder_roundtrips(): + design_matrix = dmatrix("x + a", {"x": [1, 2, 3], + "a": ["a1", "a2", "a3"]}) + builder = design_matrix.design_info.builder + + new_data = {"x": [10, 20, 30], + "a": ["a3", "a1", "a2"]} + m1 = dmatrix(builder, new_data) + + builder2 = pickle.loads(pickle.dumps(design_matrix.design_info.builder)) + m2 = dmatrix(builder2, new_data) + + assert np.allclose(m1, m2) From 4eab160a4675ef56aa732fc8d19b62e37ce144a9 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Tue, 18 Aug 2015 21:04:05 -0400 Subject: [PATCH 02/17] Beginning of work on pickling. --- patsy/eval.py | 17 ++++++++++++++--- patsy/test_highlevel.py | 6 ++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/patsy/eval.py b/patsy/eval.py index d4ed83f..f333b84 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -565,7 +565,20 @@ def eval(self, memorize_state, data): memorize_state, data) - __getstate__ = no_pickling + def __getstate__(self): + return (0, self.name, self.origin) + + def __setstate__(self, state): + # TODO What do we do about self.code? + (version, code, origin) = state + assert version == 0 + # TODO Give better error message when version is too recent, etc. + # Should use a single function from somewhere + # + self.code = code + +def test_EvalFactor_pickle_saves_origin(): + assert False def test_EvalFactor_basics(): e = EvalFactor("a+b") @@ -577,8 +590,6 @@ def test_EvalFactor_basics(): assert e.origin is None assert e2.origin == "asdf" - assert_no_pickling(e) - def test_EvalFactor_memorize_passes_needed(): from patsy.state import stateful_transform foo = stateful_transform(lambda: "FOO-OBJ") diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py index 2243c21..eb8ed66 100644 --- a/patsy/test_highlevel.py +++ b/patsy/test_highlevel.py @@ -761,9 +761,13 @@ def test_C_and_pandas_categorical(): [1, 0]]) def test_pickle_builder_roundtrips(): + import numpy as np + # TODO Add center(x) and categorical interaction, and call to np.log to patsy formula. design_matrix = dmatrix("x + a", {"x": [1, 2, 3], "a": ["a1", "a2", "a3"]}) + # TODO Remove builder, pass design_info to dmatrix() instead. builder = design_matrix.design_info.builder + del np new_data = {"x": [10, 20, 30], "a": ["a3", "a1", "a2"]} @@ -773,3 +777,5 @@ def test_pickle_builder_roundtrips(): m2 = dmatrix(builder2, new_data) assert np.allclose(m1, m2) + + From 2413dfe447f98dc87cc75ea129289fc2d1f0114f Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Tue, 18 Aug 2015 21:04:43 -0400 Subject: [PATCH 03/17] Beginning of igh-level tests for pickling. --- patsy/test_pickling.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 patsy/test_pickling.py diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py new file mode 100644 index 0000000..8f2cc9a --- /dev/null +++ b/patsy/test_pickling.py @@ -0,0 +1,22 @@ +from six.moves import cPickle as pickle + +from patsy import EvalFactor + +stuff = [ + EvalFactor("a+b"), + ] + +def test_pickling_roundtrips(): + for obj in stuff: + assert obj == pickle.loads(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)) + +def test_unpickling_future_gives_sensible_error_msg(): + pass + +# Entrypoint: python -m patsy.test_pickling ... + +if __name__ == "__main__": + # TODO Save pickle. Make sure it's running from the right directory, so + # the pickles are saved in the right place. + + From 9b11406ba29c004146ef01b426ace50ed1fcb789 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Fri, 3 Jun 2016 16:24:14 -0700 Subject: [PATCH 04/17] Remove stray whitespace and indents. --- patsy/test_highlevel.py | 2 -- patsy/test_pickling.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py index eb8ed66..03c6f97 100644 --- a/patsy/test_highlevel.py +++ b/patsy/test_highlevel.py @@ -777,5 +777,3 @@ def test_pickle_builder_roundtrips(): m2 = dmatrix(builder2, new_data) assert np.allclose(m1, m2) - - diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index 8f2cc9a..292f3c3 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -18,5 +18,3 @@ def test_unpickling_future_gives_sensible_error_msg(): if __name__ == "__main__": # TODO Save pickle. Make sure it's running from the right directory, so # the pickles are saved in the right place. - - From 0a5138938513b95fa1dbeb3df43ec5c99aa4cd79 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sat, 4 Jun 2016 16:11:25 -0700 Subject: [PATCH 05/17] In-progress work for pickling tests. --- patsy/test_pickling.py | 51 +++++++++++++++--- pickle_testcases/0.5/evalfactor_simple.pickle | Bin 0 -> 33 bytes release-checklist.txt | 1 + 3 files changed, 44 insertions(+), 8 deletions(-) create mode 100644 pickle_testcases/0.5/evalfactor_simple.pickle diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index 292f3c3..6e87e07 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -1,20 +1,55 @@ +import six from six.moves import cPickle as pickle +import os + from patsy import EvalFactor -stuff = [ - EvalFactor("a+b"), - ] +PICKE_TESTCASES_ROOTDIR = os.path.join(os.path.dirname(__file__), '..', 'pickle_testcases') + +pickling_testcases = { + "evalfactor_simple": EvalFactor("a+b"), + } + -def test_pickling_roundtrips(): - for obj in stuff: +def test_pickling_same_version_roundtrips(): + for obj in six.itervalues(pickling_testcases): + yield (check_pickling_same_version_roundtrips, obj) + +def check_pickling_same_version_roundtrips(obj): assert obj == pickle.loads(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)) +def test_pickling_old_versions_still_work(): + for (dirpath, dirnames, filenames) in os.walk(PICKE_TESTCASES_ROOTDIR): + for fname in filenames: + if os.path.splitext(fname)[1] == '.pickle': + yield check_pickling_old_versions_still_work, os.path.join(dirpath, fname) + +def check_pickling_old_versions_still_work(pickle_filename): + with open(pickle_filename, 'rb') as f: + testcase_name = os.path.splitext(os.path.basename(pickle_filename))[0] + assert pickling_testcases[testcase_name] == pickle.load(f) + def test_unpickling_future_gives_sensible_error_msg(): + # TODO How do we do this? And how do we test it then? pass -# Entrypoint: python -m patsy.test_pickling ... +def save_pickle_testcases(version): + pickle_testcases_dir = os.path.join(PICKE_TESTCASES_ROOTDIR, version) + # Fails if the directory already exists, which is what we want here + # since we don't want to overwrite testcases accidentally. + os.mkdir(pickle_testcases_dir) + + for name, obj in six.iteritems(pickling_testcases): + with open(os.path.join(pickle_testcases_dir, '{}.pickle'.format(name)), 'wb') as f: + pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) if __name__ == "__main__": - # TODO Save pickle. Make sure it's running from the right directory, so - # the pickles are saved in the right place. + import argparse + + # Should we use a "create-pickles" sub-command to make things clearer? + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument("version", help="The version of patsy for which to save a new set of pickle testcases.") + args = arg_parser.parse_args() + + save_pickle_testcases(args.version) diff --git a/pickle_testcases/0.5/evalfactor_simple.pickle b/pickle_testcases/0.5/evalfactor_simple.pickle new file mode 100644 index 0000000000000000000000000000000000000000..fb505999bf489d7e6698856844edec0f24ec2532 GIT binary patch literal 33 mcmZo*N-jt&DX!E@ElbSdas?7@iOD7TMO=l9nvI1_-V6Z77z)Aw literal 0 HcmV?d00001 diff --git a/release-checklist.txt b/release-checklist.txt index 165310c..bb006fb 100644 --- a/release-checklist.txt +++ b/release-checklist.txt @@ -5,6 +5,7 @@ * verify that the ">97% coverage" claim in overview.rst is still true. * cd docs; make clean html -- check that there are no warnings * check MANIFEST.in +* run python -m patsy.test_pickling NEW_VERSION_NUMBER * update version in doc/changes.rst, patsy/version.py * make sure there are no uncommitted changes * clone a clean source directory (so as to get a clean checkout From e1b0da348dc81484f52e890ef50f93260f30fc29 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sat, 4 Jun 2016 16:17:10 -0700 Subject: [PATCH 06/17] Add TODO comment. --- patsy/test_pickling.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index 6e87e07..2d1741e 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -28,6 +28,11 @@ def test_pickling_old_versions_still_work(): def check_pickling_old_versions_still_work(pickle_filename): with open(pickle_filename, 'rb') as f: testcase_name = os.path.splitext(os.path.basename(pickle_filename))[0] + # When adding features to a class, it will happen that there is no + # way to make an instance of that version version of that class + # equal to any instance of a previous version. How do we handle + # that? + # Maybe adding a minimum version requirement to each test? assert pickling_testcases[testcase_name] == pickle.load(f) def test_unpickling_future_gives_sensible_error_msg(): From 5ccaeb4f5b5f21a66130c64da9c4d5ca90ccd4e5 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 15:15:39 -0700 Subject: [PATCH 07/17] Pickling of EvalFactor, with testcase for making sure origin is pickled. --- patsy/eval.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/patsy/eval.py b/patsy/eval.py index f333b84..438f022 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -19,6 +19,7 @@ import ast import numbers import six +from six.moves import cPickle as pickle from patsy import PatsyError from patsy.util import PushbackAdapter, no_pickling, assert_no_pickling from patsy.tokens import (pretty_untokenize, normalize_token_spacing, @@ -566,19 +567,28 @@ def eval(self, memorize_state, data): data) def __getstate__(self): - return (0, self.name, self.origin) + return (0, self.code, self.origin) def __setstate__(self, state): - # TODO What do we do about self.code? (version, code, origin) = state assert version == 0 # TODO Give better error message when version is too recent, etc. # Should use a single function from somewhere # self.code = code + self.origin = origin def test_EvalFactor_pickle_saves_origin(): - assert False + # The pickling tests use object equality before and after pickling + # to test that pickling worked correctly. But EvalFactor's origin field + # is not used in equality comparisons, so we need a separate test to + # test that it is being pickled. + ORIGIN = 123456 + f = EvalFactor('a', ORIGIN) + new_f = pickle.loads(pickle.dumps(f)) + + assert f.origin is not None + assert f.origin == new_f.origin def test_EvalFactor_basics(): e = EvalFactor("a+b") From c158087feed5c9cef73ae84fda082de29877c830 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 15:18:16 -0700 Subject: [PATCH 08/17] More fully fleshed-out version of pickling test harness. --- patsy/test_pickling.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index 2d1741e..c23e3bc 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -1,7 +1,10 @@ +from __future__ import print_function + import six from six.moves import cPickle as pickle import os +import shutil from patsy import EvalFactor @@ -11,7 +14,6 @@ "evalfactor_simple": EvalFactor("a+b"), } - def test_pickling_same_version_roundtrips(): for obj in six.itervalues(pickling_testcases): yield (check_pickling_same_version_roundtrips, obj) @@ -39,22 +41,30 @@ def test_unpickling_future_gives_sensible_error_msg(): # TODO How do we do this? And how do we test it then? pass -def save_pickle_testcases(version): +def create_pickles(version): pickle_testcases_dir = os.path.join(PICKE_TESTCASES_ROOTDIR, version) - # Fails if the directory already exists, which is what we want here - # since we don't want to overwrite testcases accidentally. - os.mkdir(pickle_testcases_dir) + if os.path.exists(pickle_testcases_dir): + raise OSError("{} already exists. Aborting.".format(pickle_testcases_dir)) + pickle_testcases_tempdir = pickle_testcases_dir + "_inprogress" + os.mkdir(pickle_testcases_tempdir) - for name, obj in six.iteritems(pickling_testcases): - with open(os.path.join(pickle_testcases_dir, '{}.pickle'.format(name)), 'wb') as f: - pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) + try: + for name, obj in six.iteritems(pickling_testcases): + with open(os.path.join(pickle_testcases_tempdir, "{}.pickle".format(name)), "wb") as f: + pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) + except Exception: + print("Exception during creation of pickles for {}. Removing directory.".format(version)) + shutil.rmtree(pickle_testcases_tempdir) + raise + finally: + os.rename(pickle_testcases_tempdir, pickle_testcases_dir) + print("Successfully created pickle testcases for new version {}.".format(version)) if __name__ == "__main__": import argparse - # Should we use a "create-pickles" sub-command to make things clearer? - arg_parser = argparse.ArgumentParser() + arg_parser = argparse.ArgumentParser(description="Create and save pickle testcases for a new version of patsy.") arg_parser.add_argument("version", help="The version of patsy for which to save a new set of pickle testcases.") args = arg_parser.parse_args() - save_pickle_testcases(args.version) + create_pickles(args.version) From 24ce54c181c050c1c91596ba76ce8509902e6c1c Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 16:52:12 -0700 Subject: [PATCH 09/17] Better error message when unpickling a version that is not supported. --- patsy/eval.py | 7 ++----- patsy/util.py | 22 +++++++++++++++++++++- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/patsy/eval.py b/patsy/eval.py index 438f022..897d6c1 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -21,7 +21,7 @@ import six from six.moves import cPickle as pickle from patsy import PatsyError -from patsy.util import PushbackAdapter, no_pickling, assert_no_pickling +from patsy.util import PushbackAdapter, no_pickling, assert_no_pickling, check_pickle_version from patsy.tokens import (pretty_untokenize, normalize_token_spacing, python_tokenize) from patsy.compat import call_and_wrap_exc @@ -571,10 +571,7 @@ def __getstate__(self): def __setstate__(self, state): (version, code, origin) = state - assert version == 0 - # TODO Give better error message when version is too recent, etc. - # Should use a single function from somewhere - # + check_pickle_version(version, 0, self.__class__.__name__) self.code = code self.origin = origin diff --git a/patsy/util.py b/patsy/util.py index 18184ea..b3a4dad 100644 --- a/patsy/util.py +++ b/patsy/util.py @@ -24,6 +24,7 @@ ] import sys +from nose.tools import assert_raises import numpy as np import six from six.moves import cStringIO as StringIO @@ -727,7 +728,6 @@ def no_pickling(*args, **kwargs): def assert_no_pickling(obj): import pickle - from nose.tools import assert_raises assert_raises(NotImplementedError, pickle.dumps, obj) # Use like: @@ -749,3 +749,23 @@ def test_safe_string_eq(): assert safe_string_eq(unicode("foo"), "foo") assert not safe_string_eq(np.empty((2, 2)), "foo") + +def check_pickle_version(version, required_version, name=""): + if version > required_version: + error_msg = "This version of patsy is too old to load this pickle" + elif version < required_version: + error_msg = "This pickle is too old and not supported by this version of patsy anymore" + else: + return + + if name: + error_msg += " (for object {})".format(name) + error_msg += "." + + # TODO Use a better exception than ValueError. + raise ValueError(error_msg) + +def test_check_pickle_version(): + assert_raises(ValueError, check_pickle_version, 0, 1) + assert_raises(ValueError, check_pickle_version, 1, 0) + check_pickle_version(0, 0) From 3afb8787700303447243fa94aba93ca6838c97b9 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 16:54:05 -0700 Subject: [PATCH 10/17] More flesh-out version of high-level tests for pickling design_info objects. --- patsy/test_highlevel.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py index 03c6f97..1b95757 100644 --- a/patsy/test_highlevel.py +++ b/patsy/test_highlevel.py @@ -761,19 +761,24 @@ def test_C_and_pandas_categorical(): [1, 0]]) def test_pickle_builder_roundtrips(): - import numpy as np - # TODO Add center(x) and categorical interaction, and call to np.log to patsy formula. - design_matrix = dmatrix("x + a", {"x": [1, 2, 3], - "a": ["a1", "a2", "a3"]}) - # TODO Remove builder, pass design_info to dmatrix() instead. - builder = design_matrix.design_info.builder - del np - - new_data = {"x": [10, 20, 30], - "a": ["a3", "a1", "a2"]} - m1 = dmatrix(builder, new_data) - - builder2 = pickle.loads(pickle.dumps(design_matrix.design_info.builder)) - m2 = dmatrix(builder2, new_data) + formulas = ["a + b", + "center(i) + a * b + np.log(x)"] + dataset = {"i": range(3), + "x": [1.1, 2.2, 3.3], + "a": list("abc"), + "b": list("xyx")} + + for formula in formulas: + yield check_pickle_builder_roundtrips, formula, dataset + +def check_pickle_builder_roundtrips(formula, dataset): + design_matrix = dmatrix(formula, dataset) + # TODO Make new_dataset have different values from dataset? + new_dataset = dataset + + m1 = dmatrix(design_matrix.design_info, new_dataset) + + unpickled_design_info = pickle.loads(pickle.dumps(design_matrix.design_info)) + m2 = dmatrix(unpickled_design_info, new_dataset) assert np.allclose(m1, m2) From 8dd9023e8e5243c470f28a61ffe443814b37ab4d Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 16:54:58 -0700 Subject: [PATCH 11/17] Small code tweaks. --- patsy/test_pickling.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index c23e3bc..737446c 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -28,8 +28,8 @@ def test_pickling_old_versions_still_work(): yield check_pickling_old_versions_still_work, os.path.join(dirpath, fname) def check_pickling_old_versions_still_work(pickle_filename): + testcase_name = os.path.splitext(os.path.basename(pickle_filename))[0] with open(pickle_filename, 'rb') as f: - testcase_name = os.path.splitext(os.path.basename(pickle_filename))[0] # When adding features to a class, it will happen that there is no # way to make an instance of that version version of that class # equal to any instance of a previous version. How do we handle @@ -38,7 +38,7 @@ def check_pickling_old_versions_still_work(pickle_filename): assert pickling_testcases[testcase_name] == pickle.load(f) def test_unpickling_future_gives_sensible_error_msg(): - # TODO How do we do this? And how do we test it then? + # TODO How would we go about testing this? pass def create_pickles(version): @@ -53,7 +53,8 @@ def create_pickles(version): with open(os.path.join(pickle_testcases_tempdir, "{}.pickle".format(name)), "wb") as f: pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) except Exception: - print("Exception during creation of pickles for {}. Removing directory.".format(version)) + print("Exception during creation of pickles for {}. " \ + "Removing partially created directory.".format(version)) shutil.rmtree(pickle_testcases_tempdir) raise finally: From d41836878d5fd67b3b7b6527ff1391ab0e8fa8db Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 16:55:40 -0700 Subject: [PATCH 12/17] Sample test pickle. --- pickle_testcases/0.5/evalfactor_simple.pickle | Bin 33 -> 46 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/pickle_testcases/0.5/evalfactor_simple.pickle b/pickle_testcases/0.5/evalfactor_simple.pickle index fb505999bf489d7e6698856844edec0f24ec2532..1be6d51e6bb45f987d98eed29513af2cb1062a4b 100644 GIT binary patch delta 18 ZcmY$?o1n-W%9^O4t&mj6?AM;82LL9t1aANU delta 4 LcmdN>oS+B*0$>3F From f92d35bdcff46a3f3d1ef04cbe9975ea9de0d5f0 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 17:07:15 -0700 Subject: [PATCH 13/17] Add TODO note for force=True future option for creating test pickles. --- patsy/test_pickling.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index 737446c..a195c5f 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -42,6 +42,11 @@ def test_unpickling_future_gives_sensible_error_msg(): pass def create_pickles(version): + # TODO Add options to overwrite pickles directory, with force=True + # during development. + # TODO Add safety check that said force=True option will still give an + # error when trying to remove pickles for a released version, by + # comparing the version argument here with patsy.__version__. pickle_testcases_dir = os.path.join(PICKE_TESTCASES_ROOTDIR, version) if os.path.exists(pickle_testcases_dir): raise OSError("{} already exists. Aborting.".format(pickle_testcases_dir)) From 08cb4f1492dfbcb6448edfc1e53258f929e7e0c3 Mon Sep 17 00:00:00 2001 From: thequackdaddy Date: Wed, 5 Apr 2017 19:30:17 -0500 Subject: [PATCH 14/17] Adding __getstate__ and __setstate__ on relevant factors. Adding __eq__ as appropriate to make assert tests work. --- patsy/categorical.py | 38 +++++++++++++-- patsy/constraint.py | 7 ++- patsy/contrasts.py | 22 +++++++-- patsy/desc.py | 37 +++++++++++--- patsy/design_info.py | 63 +++++++++++++++++++----- patsy/eval.py | 96 ++++++++++++++++++++++++++++++++++--- patsy/infix_parser.py | 12 ++--- patsy/mgcv_cubic_splines.py | 16 +++++-- patsy/missing.py | 7 ++- patsy/origin.py | 6 ++- patsy/redundancy.py | 2 +- patsy/splines.py | 9 ++-- patsy/state.py | 30 +++++++++--- patsy/test_pickling.py | 57 +++++++++++++++++++++- 14 files changed, 342 insertions(+), 60 deletions(-) diff --git a/patsy/categorical.py b/patsy/categorical.py index 8e2a756..5812b39 100644 --- a/patsy/categorical.py +++ b/patsy/categorical.py @@ -46,20 +46,37 @@ pandas_Categorical_categories, pandas_Categorical_codes, safe_issubdtype, - no_pickling, assert_no_pickling) + no_pickling, assert_no_pickling, check_pickle_version) +from patsy.state import StatefulTransform if have_pandas: import pandas # Objects of this type will always be treated as categorical, with the # specified levels and contrast (if given). + class _CategoricalBox(object): def __init__(self, data, contrast, levels): self.data = data self.contrast = contrast self.levels = levels - __getstate__ = no_pickling + def __getstate__(self): + data = getattr(self, 'data') + contrast = getattr(self, 'contrast') + levels = getattr(self, 'levels') + return (0, data, contrast, levels) + + def __setstate__(self, pickle): + version, data, contrast, levels = pickle + check_pickle_version(version, 0, name=self.__class__.__name__) + self.data = data + self.contrast = contrast + self.levels = levels + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + def C(data, contrast=None, levels=None): """ @@ -120,7 +137,20 @@ def test_C(): assert c4.contrast == "NEW CONTRAST" assert c4.levels == "LEVELS" - assert_no_pickling(c4) + # assert_no_pickling(c4) + + +def test_C_pickle(): + from six.moves import cPickle as pickle + c1 = C("asdf") + assert c1 == pickle.loads(pickle.dumps(c1)) + c2 = C("DATA", "CONTRAST", "LEVELS") + assert c2 == pickle.loads(pickle.dumps(c2)) + c3 = C(c2, levels="NEW LEVELS") + assert c3 == pickle.loads(pickle.dumps(c3)) + c4 = C(c2, "NEW CONTRAST") + assert c4 == pickle.loads(pickle.dumps(c4)) + def guess_categorical(data): if safe_is_pandas_categorical(data): @@ -217,7 +247,7 @@ def sniff(self, data): # would be too. Otherwise we need to keep looking. return self._level_set == set([True, False]) - __getstate__ = no_pickling + # __getstate__ = no_pickling def test_CategoricalSniffer(): from patsy.missing import NAAction diff --git a/patsy/constraint.py b/patsy/constraint.py index d710a94..31288e5 100644 --- a/patsy/constraint.py +++ b/patsy/constraint.py @@ -69,7 +69,10 @@ def _repr_pretty_(self, p, cycle): return repr_pretty_impl(p, self, [self.variable_names, self.coefs, self.constants]) - __getstate__ = no_pickling + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + # __getstate__ = no_pickling @classmethod def combine(cls, constraints): @@ -125,7 +128,7 @@ def test_LinearConstraint(): assert_raises(ValueError, LinearConstraint, ["a", "b"], np.zeros((0, 2))) - assert_no_pickling(lc) + # assert_no_pickling(lc) def test_LinearConstraint_combine(): comb = LinearConstraint.combine([LinearConstraint(["a", "b"], [1, 0]), diff --git a/patsy/contrasts.py b/patsy/contrasts.py index 3f1cf54..ea3d0bc 100644 --- a/patsy/contrasts.py +++ b/patsy/contrasts.py @@ -16,7 +16,7 @@ from patsy import PatsyError from patsy.util import (repr_pretty_delegate, repr_pretty_impl, safe_issubdtype, - no_pickling, assert_no_pickling) + no_pickling, assert_no_pickling, check_pickle_version) class ContrastMatrix(object): """A simple container for a matrix used for coding categorical factors. @@ -47,7 +47,23 @@ def __init__(self, matrix, column_suffixes): def _repr_pretty_(self, p, cycle): repr_pretty_impl(p, self, [self.matrix, self.column_suffixes]) - __getstate__ = no_pickling + + def __getstate__(self): + return (0, self.matrix, self.column_suffixes) + + def __setstate__(self, pickle): + version, matrix, column_suffixes = pickle + check_pickle_version(version, 0, name=self.__class__.__name__) + self.matrix = matrix + self.column_suffixes = column_suffixes + + def __eq__(self, other): + if self.column_suffixes != other.column_suffixes: + return False + if not np.array_equal(self.matrix, other.matrix): + return False + return True + def test_ContrastMatrix(): cm = ContrastMatrix([[1, 0], [0, 1]], ["a", "b"]) @@ -59,7 +75,7 @@ def test_ContrastMatrix(): from nose.tools import assert_raises assert_raises(PatsyError, ContrastMatrix, [[1], [0]], ["a", "b"]) - assert_no_pickling(cm) + # assert_no_pickling(cm) # This always produces an object of the type that Python calls 'str' (whether # that be a Python 2 string-of-bytes or a Python 3 string-of-unicode). It does diff --git a/patsy/desc.py b/patsy/desc.py index 8842b8b..de0263c 100644 --- a/patsy/desc.py +++ b/patsy/desc.py @@ -14,7 +14,7 @@ from patsy.eval import EvalEnvironment, EvalFactor from patsy.util import uniqueify_list from patsy.util import repr_pretty_delegate, repr_pretty_impl -from patsy.util import no_pickling, assert_no_pickling +from patsy.util import no_pickling, assert_no_pickling, check_pickle_version # These are made available in the patsy.* namespace __all__ = ["Term", "ModelDesc", "INTERCEPT"] @@ -65,10 +65,19 @@ def name(self): else: return "Intercept" - __getstate__ = no_pickling + def __getstate__(self): + return (0, self.factors) + + def __setstate__(self, pickle): + version, factors = pickle + check_pickle_version(version, 0, name=self.__class__.__name__) + self.factors = factors + + # __getstate__ = no_pickling INTERCEPT = Term([]) + class _MockFactor(object): def __init__(self, name): self._name = name @@ -76,6 +85,13 @@ def __init__(self, name): def name(self): return self._name + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + def __hash__(self): + return hash((_MockFactor, str(self._name))) + + def test_Term(): assert Term([1, 2, 1]).factors == (1, 2) assert Term([1, 2]) == Term([2, 1]) @@ -86,7 +102,11 @@ def test_Term(): assert Term([f2, f1]).name() == "b:a" assert Term([]).name() == "Intercept" - assert_no_pickling(Term([])) + # assert_no_pickling(Term([])) + + from six.moves import cPickle as pickle + t = Term([f1, f2]) + assert t == pickle.loads(pickle.dumps(t, pickle.HIGHEST_PROTOCOL)) class ModelDesc(object): """A simple container representing the termlists parsed from a formula. @@ -166,7 +186,10 @@ def from_formula(cls, tree_or_string): assert isinstance(value, cls) return value - __getstate__ = no_pickling + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + # __getstate__ = no_pickling def test_ModelDesc(): f1 = _MockFactor("a") @@ -177,7 +200,9 @@ def test_ModelDesc(): print(m.describe()) assert m.describe() == "1 + a ~ 0 + a + a:b" - assert_no_pickling(m) + # assert_no_pickling(m) + from six.moves import cPickle as pickle + assert m == pickle.loads(pickle.dumps(m, pickle.HIGHEST_PROTOCOL)) assert ModelDesc([], []).describe() == "~ 0" assert ModelDesc([INTERCEPT], []).describe() == "1 ~ 0" @@ -209,7 +234,7 @@ def _pretty_repr_(self, p, cycle): # pragma: no cover [self.intercept, self.intercept_origin, self.intercept_removed, self.terms]) - __getstate__ = no_pickling + # __getstate__ = no_pickling def _maybe_add_intercept(doit, terms): if doit: diff --git a/patsy/design_info.py b/patsy/design_info.py index 438a23c..5a755af 100644 --- a/patsy/design_info.py +++ b/patsy/design_info.py @@ -32,10 +32,11 @@ from patsy.compat import OrderedDict from patsy.util import (repr_pretty_delegate, repr_pretty_impl, safe_issubdtype, - no_pickling, assert_no_pickling) + no_pickling, assert_no_pickling, check_pickle_version) from patsy.constraint import linear_constraint from patsy.contrasts import ContrastMatrix from patsy.desc import ModelDesc, Term +from patsy import __version__ class FactorInfo(object): """A FactorInfo object is a simple class that provides some metadata about @@ -120,7 +121,17 @@ def __repr__(self): kwlist.append(("categories", self.categories)) repr_pretty_impl(p, self, [], kwlist) - __getstate__ = no_pickling + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + def __hash__(self): + if not self.categories: + categories = 'NoCategories' + else: + categories = frozenset(self.categories) + return hash((FactorInfo, str(self.factor), str(self.type), + str(self.state), str(self.num_columns), categories)) + def test_FactorInfo(): fi1 = FactorInfo("asdf", "numerical", {"a": 1}, num_columns=10) @@ -234,7 +245,10 @@ def _repr_pretty_(self, p, cycle): ("contrast_matrices", self.contrast_matrices), ("num_columns", self.num_columns)]) - __getstate__ = no_pickling + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + # __getstate__ = no_pickling def test_SubtermInfo(): cm = ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]) @@ -691,16 +705,40 @@ def from_array(cls, array_like, default_column_prefix="column"): for i in columns] return DesignInfo(column_names) - __getstate__ = no_pickling + def __getstate__(self): + return (0, self.column_name_indexes, self.factor_infos, + self.term_codings, self.term_slices, self.term_name_slices) + + def __setstate__(self, pickle): + (version, column_name_indexes, factor_infos, term_codings, + term_slices, term_name_slices) = pickle + check_pickle_version(version, 0, self.__class__.__name__) + self.column_name_indexes = column_name_indexes + self.factor_infos = factor_infos + self.term_codings = term_codings + self.term_slices = term_slices + self.term_name_slices = term_name_slices + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + +class _MockFactor(object): + def __init__(self, name): + self._name = name + + def name(self): + return self._name + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + def __hash__(self): + return hash((_MockFactor, str(self._name))) + def test_DesignInfo(): from nose.tools import assert_raises - class _MockFactor(object): - def __init__(self, name): - self._name = name - - def name(self): - return self._name f_x = _MockFactor("x") f_y = _MockFactor("y") t_x = Term([f_x]) @@ -734,8 +772,9 @@ def name(self): # smoke test repr(di) + from six.moves import cPickle as pickle - assert_no_pickling(di) + assert di == pickle.loads(pickle.dumps(di, pickle.HIGHEST_PROTOCOL)) # One without term objects di = DesignInfo(["a1", "a2", "a3", "b"]) @@ -756,6 +795,8 @@ def name(self): assert di.slice("a3") == slice(2, 3) assert di.slice("b") == slice(3, 4) + assert di == pickle.loads(pickle.dumps(di, pickle.HIGHEST_PROTOCOL)) + # Check intercept handling in describe() assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b" diff --git a/patsy/eval.py b/patsy/eval.py index 897d6c1..aa7a576 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -10,7 +10,7 @@ # for __future__ flags! # These are made available in the patsy.* namespace -__all__ = ["EvalEnvironment", "EvalFactor"] +__all__ = ["EvalEnvironment", "EvalFactor", "VarLookupDict"] import sys import __future__ @@ -62,6 +62,9 @@ def __contains__(self, key): else: return True + def __eq__(self, other): + return self.__dict__ == other.__dict__ + def get(self, key, default=None): try: return self[key] @@ -71,8 +74,13 @@ def get(self, key, default=None): def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._dicts) - __getstate__ = no_pickling + def __getstate__(self): + return (0, self._dicts) + def __setstate__(self, pickle): + version, dicts = pickle + check_pickle_version(version, 0, name=self.__class__.__name__) + self._dicts = dicts def test_VarLookupDict(): d1 = {"a": 1} @@ -90,7 +98,7 @@ def test_VarLookupDict(): assert ds.get("c") is None assert isinstance(repr(ds), six.string_types) - assert_no_pickling(ds) + # assert_no_pickling(ds) def ast_names(code): """Iterator that yields all the (ast) names in a Python expression. @@ -247,7 +255,8 @@ def _namespace_ids(self): def __eq__(self, other): return (isinstance(other, EvalEnvironment) and self.flags == other.flags - and self._namespace_ids() == other._namespace_ids()) + and self.namespace == other.namespace) + # and self._namespace_ids() == other._namespace_ids()) def __ne__(self, other): return not self == other @@ -257,7 +266,80 @@ def __hash__(self): self.flags, tuple(self._namespace_ids()))) - __getstate__ = no_pickling + def __getstate__(self): + namespaces = self._namespaces + namespaces = _replace_un_pickleable(namespaces) + return (0, namespaces, self.flags) + + def __setstate__(self, pickle): + version, namespaces, flags = pickle + check_pickle_version(version, 0, self.__class__.__name__) + self.flags = flags + self._namespaces = _return_un_pickleable(namespaces) + + +class ObjectHolder(object): + def __init__(self, kind, module, name): + self.kind = kind + self.module = module + self.name = name + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + +def test_objectholder(): + x = ObjectHolder('function', 'module.name', 'functionname') + assert x.kind == 'function' + assert x.module == 'module.name' + assert x.name == 'functionname' + y = ObjectHolder('function', 'module.name', 'functionname') + assert x == y + + +def _replace_un_pickleable(namespaces): + from types import ModuleType + namespaces = [i for i in namespaces] + for i, namespace in enumerate(namespaces): + namespace = {key: namespace[key] for key in namespace.keys()} + for key in namespace.keys(): + if isinstance(namespace[key], ModuleType): + namespace[key] = ObjectHolder('module', + namespace[key].__name__, + key) + namespaces[i] = namespace + return namespaces + + +def _return_un_pickleable(namespaces): + import importlib + namespaces = [i for i in namespaces] + for i, namespace in enumerate(namespaces): + namespace = {key: namespace[key] for key in namespace.keys()} + for key in namespace.keys(): + if isinstance(namespace[key], ObjectHolder): + if namespace[key].kind == 'module': + a = importlib.import_module(namespace[key].module) + namespace[key] = a + namespaces[i] = namespace + return namespaces + + +def test_replace_functions(): + import numpy as np + x = [{'np': np}, {}] + x2 = _replace_un_pickleable(x) + y = [{'np': ObjectHolder('module', np.__name__, 'np')}, {}] + assert x2 == y + + +def test_return_function(): + import numpy as np + x = [{'np': ObjectHolder('module', np.__name__, 'np')}, {}] + x2 = _return_un_pickleable(x) + y = [{'np': np}, {}] + assert x2 == y + def _a(): # pragma: no cover _a = 1 @@ -300,7 +382,7 @@ def test_EvalEnvironment_capture_namespace(): assert_raises(TypeError, EvalEnvironment.capture, 1.2) - assert_no_pickling(EvalEnvironment.capture()) + # assert_no_pickling(EvalEnvironment.capture()) def test_EvalEnvironment_capture_flags(): if sys.version_info >= (3,): @@ -413,7 +495,7 @@ def test_EvalEnvironment_eq(): capture_local_env = lambda: EvalEnvironment.capture(0) env3 = capture_local_env() env4 = capture_local_env() - assert env3 != env4 + assert env3 != env4 # This fails... _builtins_dict = {} six.exec_("from patsy.builtins import *", {}, _builtins_dict) diff --git a/patsy/infix_parser.py b/patsy/infix_parser.py index bdf395d..f6ac31e 100644 --- a/patsy/infix_parser.py +++ b/patsy/infix_parser.py @@ -44,7 +44,7 @@ def __init__(self, print_as): def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._print_as) - __getstate__ = no_pickling + # __getstate__ = no_pickling class Token(object): """A token with possible payload. @@ -70,7 +70,7 @@ def _repr_pretty_(self, p, cycle): kwargs = [("extra", self.extra)] return repr_pretty_impl(p, self, [self.type, self.origin], kwargs) - __getstate__ = no_pickling + # __getstate__ = no_pickling class ParseNode(object): def __init__(self, type, token, args, origin): @@ -83,7 +83,7 @@ def __init__(self, type, token, args, origin): def _repr_pretty_(self, p, cycle): return repr_pretty_impl(p, self, [self.type, self.token, self.args]) - __getstate__ = no_pickling + # __getstate__ = no_pickling class Operator(object): def __init__(self, token_type, arity, precedence): @@ -95,14 +95,14 @@ def __repr__(self): return "%s(%r, %r, %r)" % (self.__class__.__name__, self.token_type, self.arity, self.precedence) - __getstate__ = no_pickling + # __getstate__ = no_pickling class _StackOperator(object): def __init__(self, op, token): self.op = op self.token = token - __getstate__ = no_pickling + # __getstate__ = no_pickling _open_paren = Operator(Token.LPAREN, -1, -9999999) @@ -115,7 +115,7 @@ def __init__(self, unary_ops, binary_ops, atomic_types, trace): self.atomic_types = atomic_types self.trace = trace - __getstate__ = no_pickling + # __getstate__ = no_pickling def _read_noun_context(token, c): if token.type == Token.LPAREN: diff --git a/patsy/mgcv_cubic_splines.py b/patsy/mgcv_cubic_splines.py index fa6c68b..7770cf7 100644 --- a/patsy/mgcv_cubic_splines.py +++ b/patsy/mgcv_cubic_splines.py @@ -11,7 +11,7 @@ from patsy.util import (have_pandas, atleast_2d_column_default, no_pickling, assert_no_pickling, safe_string_eq) -from patsy.state import stateful_transform +from patsy.state import stateful_transform, StatefulTransform if have_pandas: import pandas @@ -541,7 +541,7 @@ def _get_centering_constraint_from_dmatrix(design_matrix): return design_matrix.mean(axis=0).reshape((1, design_matrix.shape[1])) -class CubicRegressionSpline(object): +class CubicRegressionSpline(StatefulTransform): """Base class for cubic regression spline stateful transforms This class contains all the functionality for the following stateful @@ -685,7 +685,7 @@ def transform(self, x, df=None, knots=None, dm.index = x_orig.index return dm - __getstate__ = no_pickling + # __getstate__ = no_pickling class CR(CubicRegressionSpline): @@ -717,6 +717,8 @@ def __init__(self): CubicRegressionSpline.__init__(self, name='cr', cyclic=False) cr = stateful_transform(CR) +cr.__qualname__ = 'cr' +cr.__name__ = 'cr' class CC(CubicRegressionSpline): @@ -747,6 +749,8 @@ def __init__(self): CubicRegressionSpline.__init__(self, name='cc', cyclic=True) cc = stateful_transform(CC) +cc.__qualname__ = 'cc' +cc.__name__ = 'cc' def test_crs_errors(): @@ -851,7 +855,7 @@ def test_crs_with_specific_constraint(): assert np.allclose(result1, result2, rtol=1e-12, atol=0.) -class TE(object): +class TE(StatefulTransform): """te(s1, .., sn, constraints=None) Generates smooth of several covariates as a tensor product of the bases @@ -940,9 +944,11 @@ def transform(self, *args, **kwargs): return _get_te_dmatrix(args_2d, self._constraints) - __getstate__ = no_pickling + # __getstate__ = no_pickling te = stateful_transform(TE) +te.__qualname__ = 'te' +te.__name__ = 'te' def test_te_errors(): diff --git a/patsy/missing.py b/patsy/missing.py index 4563d94..2bbd1c2 100644 --- a/patsy/missing.py +++ b/patsy/missing.py @@ -180,7 +180,10 @@ def _handle_NA_drop(self, values, is_NAs, origins): # "..." to handle 1- versus 2-dim indexing return [v[good_mask, ...] for v in values] - __getstate__ = no_pickling + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + # __getstate__ = no_pickling def test_NAAction_basic(): from nose.tools import assert_raises @@ -188,7 +191,7 @@ def test_NAAction_basic(): assert_raises(ValueError, NAAction, NA_types=("NaN", "asdf")) assert_raises(ValueError, NAAction, NA_types="NaN") - assert_no_pickling(NAAction()) + # assert_no_pickling(NAAction()) def test_NAAction_NA_types_numerical(): for NA_types in [[], ["NaN"], ["None"], ["NaN", "None"]]: diff --git a/patsy/origin.py b/patsy/origin.py index 68ed71a..2859313 100644 --- a/patsy/origin.py +++ b/patsy/origin.py @@ -112,9 +112,11 @@ def __repr__(self): self.code[self.end:], self.start, self.end) + """ # We reimplement patsy.util.no_pickling, to avoid circular import issues def __getstate__(self): raise NotImplementedError + """ def test_Origin(): o1 = Origin("012345", 2, 4) @@ -138,5 +140,5 @@ def __init__(self, origin=None): assert Origin.combine([ObjWithOrigin(), ObjWithOrigin()]) is None - from patsy.util import assert_no_pickling - assert_no_pickling(Origin("", 0, 0)) + # from patsy.util import assert_no_pickling + # assert_no_pickling(Origin("", 0, 0)) diff --git a/patsy/redundancy.py b/patsy/redundancy.py index 415fa96..9fa78a8 100644 --- a/patsy/redundancy.py +++ b/patsy/redundancy.py @@ -73,7 +73,7 @@ def __repr__(self): suffix = "-" return "%r%s" % (self.factor, suffix) - __getstate__ = no_pickling + # __getstate__ = no_pickling class _Subterm(object): "Also immutable." diff --git a/patsy/splines.py b/patsy/splines.py index 2a2faa2..8a8abee 100644 --- a/patsy/splines.py +++ b/patsy/splines.py @@ -10,7 +10,7 @@ import numpy as np from patsy.util import have_pandas, no_pickling, assert_no_pickling -from patsy.state import stateful_transform +from patsy.state import stateful_transform, StatefulTransform if have_pandas: import pandas @@ -74,7 +74,7 @@ def t(x, prob, expected): t([10, 20], [0.3, 0.7], [13, 17]) t(list(range(10)), [0.3, 0.7], [2.7, 6.3]) -class BS(object): +class BS(StatefulTransform): """bs(x, df=None, knots=None, degree=3, include_intercept=False, lower_bound=None, upper_bound=None) Generates a B-spline basis for ``x``, allowing non-linear fits. The usual @@ -245,9 +245,11 @@ def transform(self, x, df=None, knots=None, degree=3, basis.index = x.index return basis - __getstate__ = no_pickling + # __getstate__ = no_pickling bs = stateful_transform(BS) +bs.__qualname__ = 'bs' +bs.__name__ = 'bs' def test_bs_compat(): from patsy.test_state import check_stateful @@ -395,7 +397,6 @@ def test_bs_errors(): bs, x, knots=[-4, 0], lower_bound=-3) - # differences between bs and ns (since the R code is a pile of copy-paste): # - degree is always 3 # - different number of interior knots given df (b/c fewer dof used at edges I diff --git a/patsy/state.py b/patsy/state.py index 69f106d..7d759a6 100644 --- a/patsy/state.py +++ b/patsy/state.py @@ -29,10 +29,10 @@ from patsy.util import (atleast_2d_column_default, asarray_or_pandas, pandas_friendly_reshape, wide_dtype_for, safe_issubdtype, - no_pickling, assert_no_pickling) + no_pickling, assert_no_pickling, check_pickle_version) # These are made available in the patsy.* namespace -__all__ = ["stateful_transform", +__all__ = ["stateful_transform", "StatefulTransform", "center", "standardize", "scale", ] @@ -76,7 +76,18 @@ def stateful_transform_wrapper(*args, **kwargs): # class QuantileEstimatingTransform(NonIncrementalStatefulTransform): # def memorize_all(self, input_data, *args, **kwargs): -class Center(object): + +class StatefulTransform(object): + def __getstate__(self): + return (0, self.__dict__) + + def __setstate__(self, pickle): + version, dicts = pickle + check_pickle_version(version, 0, name=self.__class__.__name__) + self.__dict__ = dicts + + +class Center(StatefulTransform): """center(x) A stateful transform that centers input data, i.e., subtracts the mean. @@ -116,14 +127,16 @@ def transform(self, x): centered = atleast_2d_column_default(x, preserve_pandas=True) - mean_val return pandas_friendly_reshape(centered, x.shape) - __getstate__ = no_pickling + # __getstate__ = no_pickling center = stateful_transform(Center) +center.__qualname__ = 'center' +center.__name__ = 'center' # See: # http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm # or page 232 of Knuth vol. 3 (3rd ed.). -class Standardize(object): +class Standardize(StatefulTransform): """standardize(x, center=True, rescale=True, ddof=0) A stateful transform that standardizes input data, i.e. it subtracts the @@ -174,8 +187,13 @@ def transform(self, x, center=True, rescale=True, ddof=0): x_2d /= np.sqrt(self.current_M2 / (self.current_n - ddof)) return pandas_friendly_reshape(x_2d, x.shape) - __getstate__ = no_pickling + # __getstate__ = no_pickling + standardize = stateful_transform(Standardize) +standardize.__qualname__ = 'standardize' +standardize.__name__ = 'standardize' # R compatibility: scale = standardize +scale.__qualname__ = 'scale' +scale.__name__ = 'scale' diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index a195c5f..e00d081 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -6,27 +6,80 @@ import os import shutil -from patsy import EvalFactor +from patsy import EvalFactor, EvalEnvironment, VarLookupDict + +import numpy as np +from patsy.state import center, scale, standardize +from patsy.categorical import C +from patsy.splines import bs +from patsy.desc import Term, ModelDesc +from patsy.mgcv_cubic_splines import cc, te, cr +from patsy.contrasts import ContrastMatrix +from patsy.constraint import LinearConstraint +from patsy.missing import NAAction +from patsy.origin import Origin + PICKE_TESTCASES_ROOTDIR = os.path.join(os.path.dirname(__file__), '..', 'pickle_testcases') + +class _MockFactor(object): + def __init__(self, name): + self._name = name + + def name(self): + return self._name + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + def __hash__(self): + return hash((_MockFactor, str(self._name))) + +f1 = _MockFactor("a") +f2 = _MockFactor("b") + pickling_testcases = { "evalfactor_simple": EvalFactor("a+b"), + "varlookupdict_simple": VarLookupDict([{"a": 1}, {"a": 2, "b": 3}]), + "evalenv_simple": EvalEnvironment([{"a": 1}]), + "evalenv_transform_center": EvalEnvironment([{'center': center}]), + "evalenv_transform_scale": EvalEnvironment([{'scale': scale}]), + "evalenv_transform_standardize": EvalEnvironment([{ + 'standardize': standardize + }]), + "evalenv_transform_catgorical": EvalEnvironment([{'C': C}]), + "evalenv_transform_bs": EvalEnvironment([{'cs': bs}]), + "evalenv_transform_te": EvalEnvironment([{'te': te}]), + "evalenv_transform_cr": EvalEnvironment([{'cs': cr}]), + "evalenv_transform_cc": EvalEnvironment([{'cc': cc}]), + "evalenv_pickle": EvalEnvironment([{'np': np}]), + "term": Term([1, 2, 1]), + "contrast_matrix": ContrastMatrix([[1, 0], [0, 1]], ["a", "b"]), + "linear_constraint": LinearConstraint(["a"], [[0]]), + "model_desc": ModelDesc([Term([]), Term([f1])], + [Term([f1]), Term([f1, f2])]), + "na_action": NAAction(NA_types=["NaN", "None"]), + "origin": Origin("012345", 2, 5) } + def test_pickling_same_version_roundtrips(): for obj in six.itervalues(pickling_testcases): yield (check_pickling_same_version_roundtrips, obj) + def check_pickling_same_version_roundtrips(obj): assert obj == pickle.loads(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)) + def test_pickling_old_versions_still_work(): for (dirpath, dirnames, filenames) in os.walk(PICKE_TESTCASES_ROOTDIR): for fname in filenames: if os.path.splitext(fname)[1] == '.pickle': yield check_pickling_old_versions_still_work, os.path.join(dirpath, fname) + def check_pickling_old_versions_still_work(pickle_filename): testcase_name = os.path.splitext(os.path.basename(pickle_filename))[0] with open(pickle_filename, 'rb') as f: @@ -37,10 +90,12 @@ def check_pickling_old_versions_still_work(pickle_filename): # Maybe adding a minimum version requirement to each test? assert pickling_testcases[testcase_name] == pickle.load(f) + def test_unpickling_future_gives_sensible_error_msg(): # TODO How would we go about testing this? pass + def create_pickles(version): # TODO Add options to overwrite pickles directory, with force=True # during development. From 0301817c06f33b8f7123637a0594d88a22762249 Mon Sep 17 00:00:00 2001 From: thequackdaddy Date: Thu, 6 Apr 2017 08:14:38 -0500 Subject: [PATCH 15/17] Added __getstate__ more appropriately, tests for pickling objects, and special tests for transform equivalence, new assert statement that makes the pickle a dict (not the most robust... but it works), added PickleError, --- patsy/categorical.py | 29 ++-- patsy/constraint.py | 16 ++- patsy/contrasts.py | 1 - patsy/desc.py | 45 ++++--- patsy/design_info.py | 67 +++++---- patsy/eval.py | 24 ++-- patsy/infix_parser.py | 12 +- patsy/mgcv_cubic_splines.py | 43 +++++- patsy/missing.py | 16 ++- patsy/origin.py | 16 ++- patsy/redundancy.py | 2 +- patsy/splines.py | 18 ++- patsy/state.py | 39 +++--- patsy/test_pickling.py | 121 ++++++++++++++--- patsy/util.py | 127 +++++++++++++++++- pickle_testcases/0.5/contrast_matrix.pickle | Bin 0 -> 224 bytes pickle_testcases/0.5/evalenv_pickle.pickle | Bin 0 -> 128 bytes pickle_testcases/0.5/evalenv_simple.pickle | Bin 0 -> 67 bytes .../0.5/evalenv_transform_bs.pickle | Bin 0 -> 89 bytes .../0.5/evalenv_transform_categorical.pickle | Bin 0 -> 89 bytes .../0.5/evalenv_transform_cc.pickle | Bin 0 -> 97 bytes .../0.5/evalenv_transform_center.pickle | Bin 0 -> 88 bytes .../0.5/evalenv_transform_cr.pickle | Bin 0 -> 100 bytes .../0.5/evalenv_transform_scale.pickle | Bin 0 -> 87 bytes .../0.5/evalenv_transform_standardize.pickle | Bin 0 -> 99 bytes .../0.5/evalenv_transform_te.pickle | Bin 0 -> 97 bytes pickle_testcases/0.5/evalfactor_simple.pickle | Bin 46 -> 85 bytes pickle_testcases/0.5/linear_constraint.pickle | Bin 0 -> 303 bytes pickle_testcases/0.5/model_desc.pickle | Bin 0 -> 241 bytes pickle_testcases/0.5/na_action.pickle | Bin 0 -> 102 bytes pickle_testcases/0.5/origin.pickle | Bin 0 -> 92 bytes pickle_testcases/0.5/subterm_info.pickle | Bin 0 -> 361 bytes pickle_testcases/0.5/term.pickle | Bin 0 -> 70 bytes pickle_testcases/0.5/transform_bs_df3.pickle | Bin 0 -> 279 bytes .../0.5/transform_bs_knots_13_15_17.pickle | Bin 0 -> 303 bytes pickle_testcases/0.5/transform_cc_df3.pickle | Bin 0 -> 284 bytes .../0.5/transform_cc_knots_13_15_17.pickle | Bin 0 -> 292 bytes pickle_testcases/0.5/transform_center.pickle | Bin 0 -> 218 bytes pickle_testcases/0.5/transform_cr_df3.pickle | Bin 0 -> 276 bytes .../0.5/transform_cr_knots_13_15_17.pickle | Bin 0 -> 292 bytes .../transform_standardize_norescale.pickle | Bin 0 -> 287 bytes .../0.5/transform_standardize_rescale.pickle | Bin 0 -> 287 bytes pickle_testcases/0.5/transform_te_cr5.pickle | Bin 0 -> 81 bytes .../0.5/transform_te_cr5_center.pickle | Bin 0 -> 81 bytes .../0.5/varlookupdict_simple.pickle | Bin 0 -> 80 bytes 45 files changed, 420 insertions(+), 156 deletions(-) create mode 100644 pickle_testcases/0.5/contrast_matrix.pickle create mode 100644 pickle_testcases/0.5/evalenv_pickle.pickle create mode 100644 pickle_testcases/0.5/evalenv_simple.pickle create mode 100644 pickle_testcases/0.5/evalenv_transform_bs.pickle create mode 100644 pickle_testcases/0.5/evalenv_transform_categorical.pickle create mode 100644 pickle_testcases/0.5/evalenv_transform_cc.pickle create mode 100644 pickle_testcases/0.5/evalenv_transform_center.pickle create mode 100644 pickle_testcases/0.5/evalenv_transform_cr.pickle create mode 100644 pickle_testcases/0.5/evalenv_transform_scale.pickle create mode 100644 pickle_testcases/0.5/evalenv_transform_standardize.pickle create mode 100644 pickle_testcases/0.5/evalenv_transform_te.pickle create mode 100644 pickle_testcases/0.5/linear_constraint.pickle create mode 100644 pickle_testcases/0.5/model_desc.pickle create mode 100644 pickle_testcases/0.5/na_action.pickle create mode 100644 pickle_testcases/0.5/origin.pickle create mode 100644 pickle_testcases/0.5/subterm_info.pickle create mode 100644 pickle_testcases/0.5/term.pickle create mode 100644 pickle_testcases/0.5/transform_bs_df3.pickle create mode 100644 pickle_testcases/0.5/transform_bs_knots_13_15_17.pickle create mode 100644 pickle_testcases/0.5/transform_cc_df3.pickle create mode 100644 pickle_testcases/0.5/transform_cc_knots_13_15_17.pickle create mode 100644 pickle_testcases/0.5/transform_center.pickle create mode 100644 pickle_testcases/0.5/transform_cr_df3.pickle create mode 100644 pickle_testcases/0.5/transform_cr_knots_13_15_17.pickle create mode 100644 pickle_testcases/0.5/transform_standardize_norescale.pickle create mode 100644 pickle_testcases/0.5/transform_standardize_rescale.pickle create mode 100644 pickle_testcases/0.5/transform_te_cr5.pickle create mode 100644 pickle_testcases/0.5/transform_te_cr5_center.pickle create mode 100644 pickle_testcases/0.5/varlookupdict_simple.pickle diff --git a/patsy/categorical.py b/patsy/categorical.py index 5812b39..131f932 100644 --- a/patsy/categorical.py +++ b/patsy/categorical.py @@ -47,7 +47,6 @@ pandas_Categorical_codes, safe_issubdtype, no_pickling, assert_no_pickling, check_pickle_version) -from patsy.state import StatefulTransform if have_pandas: import pandas @@ -65,17 +64,14 @@ def __getstate__(self): data = getattr(self, 'data') contrast = getattr(self, 'contrast') levels = getattr(self, 'levels') - return (0, data, contrast, levels) + return {'version': 0, 'data': data, 'contrast': contrast, + 'levels': levels} def __setstate__(self, pickle): - version, data, contrast, levels = pickle - check_pickle_version(version, 0, name=self.__class__.__name__) - self.data = data - self.contrast = contrast - self.levels = levels - - def __eq__(self, other): - return self.__dict__ == other.__dict__ + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self.data = pickle['data'] + self.contrast = pickle['contrast'] + self.levels = pickle['levels'] def C(data, contrast=None, levels=None): @@ -137,19 +133,18 @@ def test_C(): assert c4.contrast == "NEW CONTRAST" assert c4.levels == "LEVELS" - # assert_no_pickling(c4) - def test_C_pickle(): from six.moves import cPickle as pickle + from patsy.util import assert_pickled_equals c1 = C("asdf") - assert c1 == pickle.loads(pickle.dumps(c1)) + assert_pickled_equals(c1, pickle.loads(pickle.dumps(c1))) c2 = C("DATA", "CONTRAST", "LEVELS") - assert c2 == pickle.loads(pickle.dumps(c2)) + assert_pickled_equals(c2, pickle.loads(pickle.dumps(c2))) c3 = C(c2, levels="NEW LEVELS") - assert c3 == pickle.loads(pickle.dumps(c3)) + assert_pickled_equals(c3, pickle.loads(pickle.dumps(c3))) c4 = C(c2, "NEW CONTRAST") - assert c4 == pickle.loads(pickle.dumps(c4)) + assert_pickled_equals(c4, pickle.loads(pickle.dumps(c4))) def guess_categorical(data): @@ -247,7 +242,7 @@ def sniff(self, data): # would be too. Otherwise we need to keep looking. return self._level_set == set([True, False]) - # __getstate__ = no_pickling + __getstate__ = no_pickling def test_CategoricalSniffer(): from patsy.missing import NAAction diff --git a/patsy/constraint.py b/patsy/constraint.py index 31288e5..a2a0bef 100644 --- a/patsy/constraint.py +++ b/patsy/constraint.py @@ -20,7 +20,7 @@ from patsy.origin import Origin from patsy.util import (atleast_2d_column_default, repr_pretty_delegate, repr_pretty_impl, - no_pickling, assert_no_pickling) + no_pickling, assert_no_pickling, check_pickle_version) from patsy.infix_parser import Token, Operator, infix_parse from patsy.parse_formula import _parsing_error_test @@ -69,10 +69,16 @@ def _repr_pretty_(self, p, cycle): return repr_pretty_impl(p, self, [self.variable_names, self.coefs, self.constants]) - def __eq__(self, other): - return self.__dict__ == other.__dict__ + def __getstate__(self): + return {'version': 0, 'variable_names': self.variable_names, + 'coefs': self.coefs, 'constants': self.constants} + + def __setstate__(self, pickle): + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self.variable_names = pickle['variable_names'] + self.coefs = pickle['coefs'] + self.constants = pickle['constants'] - # __getstate__ = no_pickling @classmethod def combine(cls, constraints): @@ -128,8 +134,6 @@ def test_LinearConstraint(): assert_raises(ValueError, LinearConstraint, ["a", "b"], np.zeros((0, 2))) - # assert_no_pickling(lc) - def test_LinearConstraint_combine(): comb = LinearConstraint.combine([LinearConstraint(["a", "b"], [1, 0]), LinearConstraint(["a", "b"], [0, 1], [1])]) diff --git a/patsy/contrasts.py b/patsy/contrasts.py index ea3d0bc..4173f2e 100644 --- a/patsy/contrasts.py +++ b/patsy/contrasts.py @@ -75,7 +75,6 @@ def test_ContrastMatrix(): from nose.tools import assert_raises assert_raises(PatsyError, ContrastMatrix, [[1], [0]], ["a", "b"]) - # assert_no_pickling(cm) # This always produces an object of the type that Python calls 'str' (whether # that be a Python 2 string-of-bytes or a Python 3 string-of-unicode). It does diff --git a/patsy/desc.py b/patsy/desc.py index de0263c..10b9844 100644 --- a/patsy/desc.py +++ b/patsy/desc.py @@ -66,14 +66,12 @@ def name(self): return "Intercept" def __getstate__(self): - return (0, self.factors) + return {'version': 0, 'factors': self.factors} def __setstate__(self, pickle): - version, factors = pickle - check_pickle_version(version, 0, name=self.__class__.__name__) - self.factors = factors + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self.factors = pickle['factors'] - # __getstate__ = no_pickling INTERCEPT = Term([]) @@ -85,12 +83,6 @@ def __init__(self, name): def name(self): return self._name - def __eq__(self, other): - return self.__dict__ == other.__dict__ - - def __hash__(self): - return hash((_MockFactor, str(self._name))) - def test_Term(): assert Term([1, 2, 1]).factors == (1, 2) @@ -102,11 +94,12 @@ def test_Term(): assert Term([f2, f1]).name() == "b:a" assert Term([]).name() == "Intercept" - # assert_no_pickling(Term([])) - from six.moves import cPickle as pickle + from patsy.util import assert_pickled_equals t = Term([f1, f2]) - assert t == pickle.loads(pickle.dumps(t, pickle.HIGHEST_PROTOCOL)) + t2 = pickle.loads(pickle.dumps(t, pickle.HIGHEST_PROTOCOL)) + assert_pickled_equals(t, t2) + class ModelDesc(object): """A simple container representing the termlists parsed from a formula. @@ -168,7 +161,7 @@ def term_code(term): if term != INTERCEPT] result += " + ".join(term_names) return result - + @classmethod def from_formula(cls, tree_or_string): """Construct a :class:`ModelDesc` from a formula string. @@ -186,10 +179,15 @@ def from_formula(cls, tree_or_string): assert isinstance(value, cls) return value - def __eq__(self, other): - return self.__dict__ == other.__dict__ + def __getstate__(self): + return {'version': 0, 'lhs_termlist': self.lhs_termlist, + 'rhs_termlist': self.rhs_termlist} + + def __setstate__(self, pickle): + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self.lhs_termlist = pickle['lhs_termlist'] + self.rhs_termlist = pickle['rhs_termlist'] - # __getstate__ = no_pickling def test_ModelDesc(): f1 = _MockFactor("a") @@ -202,7 +200,9 @@ def test_ModelDesc(): # assert_no_pickling(m) from six.moves import cPickle as pickle - assert m == pickle.loads(pickle.dumps(m, pickle.HIGHEST_PROTOCOL)) + from patsy.util import assert_pickled_equals + m2 = pickle.loads(pickle.dumps(m, pickle.HIGHEST_PROTOCOL)) + assert_pickled_equals(m, m2) assert ModelDesc([], []).describe() == "~ 0" assert ModelDesc([INTERCEPT], []).describe() == "1 ~ 0" @@ -234,7 +234,12 @@ def _pretty_repr_(self, p, cycle): # pragma: no cover [self.intercept, self.intercept_origin, self.intercept_removed, self.terms]) - # __getstate__ = no_pickling + __getstate__ = no_pickling + + +def test_IntermediateExpr_smoke(): + assert_no_pickling(IntermediateExpr(False, None, True, [])) + def _maybe_add_intercept(doit, terms): if doit: diff --git a/patsy/design_info.py b/patsy/design_info.py index 5a755af..885503f 100644 --- a/patsy/design_info.py +++ b/patsy/design_info.py @@ -121,16 +121,18 @@ def __repr__(self): kwlist.append(("categories", self.categories)) repr_pretty_impl(p, self, [], kwlist) - def __eq__(self, other): - return self.__dict__ == other.__dict__ + def __getstate__(self): + return {'version': 0, 'factor': self.factor, 'type': self.type, + 'state': self.state, 'num_columns': self.num_columns, + 'categories': self.categories} - def __hash__(self): - if not self.categories: - categories = 'NoCategories' - else: - categories = frozenset(self.categories) - return hash((FactorInfo, str(self.factor), str(self.type), - str(self.state), str(self.num_columns), categories)) + def __setstate__(self, pickle): + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self.factor = pickle['factor'] + self.type = pickle['type'] + self.state = pickle['state'] + self.num_columns = pickle['num_columns'] + self.categories = pickle['categories'] def test_FactorInfo(): @@ -245,10 +247,17 @@ def _repr_pretty_(self, p, cycle): ("contrast_matrices", self.contrast_matrices), ("num_columns", self.num_columns)]) - def __eq__(self, other): - return self.__dict__ == other.__dict__ + def __getstate__(self): + return {'version': 0, 'factors': self.factors, + 'contrast_matrices': self.contrast_matrices, + 'num_columns': self.num_columns} + + def __setstate__(self, pickle): + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self.factors = pickle['factors'] + self.contrast_matrices = pickle['contrast_matrices'] + self.num_columns = pickle['num_columns'] - # __getstate__ = no_pickling def test_SubtermInfo(): cm = ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]) @@ -706,21 +715,19 @@ def from_array(cls, array_like, default_column_prefix="column"): return DesignInfo(column_names) def __getstate__(self): - return (0, self.column_name_indexes, self.factor_infos, - self.term_codings, self.term_slices, self.term_name_slices) + return {'version': 0, 'column_name_indexes': self.column_name_indexes, + 'factor_infos': self.factor_infos, + 'term_codings': self.term_codings, + 'term_slices': self.term_slices, + 'term_name_slices': self.term_name_slices} def __setstate__(self, pickle): - (version, column_name_indexes, factor_infos, term_codings, - term_slices, term_name_slices) = pickle - check_pickle_version(version, 0, self.__class__.__name__) - self.column_name_indexes = column_name_indexes - self.factor_infos = factor_infos - self.term_codings = term_codings - self.term_slices = term_slices - self.term_name_slices = term_name_slices - - def __eq__(self, other): - return self.__dict__ == other.__dict__ + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self.column_name_indexes = pickle['column_name_indexes'] + self.factor_infos = pickle['factor_infos'] + self.term_codings = pickle['term_codings'] + self.term_slices = pickle['term_slices'] + self.term_name_slices = pickle['term_name_slices'] class _MockFactor(object): @@ -772,9 +779,12 @@ def test_DesignInfo(): # smoke test repr(di) - from six.moves import cPickle as pickle - assert di == pickle.loads(pickle.dumps(di, pickle.HIGHEST_PROTOCOL)) + # Pickling check + from six.moves import cPickle as pickle + from patsy.util import assert_pickled_equals + di2 = pickle.loads(pickle.dumps(di, pickle.HIGHEST_PROTOCOL)) + assert_pickled_equals(di, di2) # One without term objects di = DesignInfo(["a1", "a2", "a3", "b"]) @@ -795,7 +805,8 @@ def test_DesignInfo(): assert di.slice("a3") == slice(2, 3) assert di.slice("b") == slice(3, 4) - assert di == pickle.loads(pickle.dumps(di, pickle.HIGHEST_PROTOCOL)) + di2 = pickle.loads(pickle.dumps(di, pickle.HIGHEST_PROTOCOL)) + assert_pickled_equals(di, di2) # Check intercept handling in describe() assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b" diff --git a/patsy/eval.py b/patsy/eval.py index aa7a576..a0d5913 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -10,7 +10,7 @@ # for __future__ flags! # These are made available in the patsy.* namespace -__all__ = ["EvalEnvironment", "EvalFactor", "VarLookupDict"] +__all__ = ["EvalEnvironment", "EvalFactor"] import sys import __future__ @@ -62,9 +62,6 @@ def __contains__(self, key): else: return True - def __eq__(self, other): - return self.__dict__ == other.__dict__ - def get(self, key, default=None): try: return self[key] @@ -98,7 +95,6 @@ def test_VarLookupDict(): assert ds.get("c") is None assert isinstance(repr(ds), six.string_types) - # assert_no_pickling(ds) def ast_names(code): """Iterator that yields all the (ast) names in a Python expression. @@ -255,8 +251,7 @@ def _namespace_ids(self): def __eq__(self, other): return (isinstance(other, EvalEnvironment) and self.flags == other.flags - and self.namespace == other.namespace) - # and self._namespace_ids() == other._namespace_ids()) + and self._namespace_ids() == other._namespace_ids()) def __ne__(self, other): return not self == other @@ -382,7 +377,6 @@ def test_EvalEnvironment_capture_namespace(): assert_raises(TypeError, EvalEnvironment.capture, 1.2) - # assert_no_pickling(EvalEnvironment.capture()) def test_EvalEnvironment_capture_flags(): if sys.version_info >= (3,): @@ -649,15 +643,15 @@ def eval(self, memorize_state, data): data) def __getstate__(self): - return (0, self.code, self.origin) + return {'version': 0, 'code': self.code, 'origin': self.origin} - def __setstate__(self, state): - (version, code, origin) = state - check_pickle_version(version, 0, self.__class__.__name__) - self.code = code - self.origin = origin + def __setstate__(self, pickle): + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self.code = pickle['code'] + self.origin = pickle['origin'] def test_EvalFactor_pickle_saves_origin(): + from patsy.util import assert_pickled_equals # The pickling tests use object equality before and after pickling # to test that pickling worked correctly. But EvalFactor's origin field # is not used in equality comparisons, so we need a separate test to @@ -667,7 +661,7 @@ def test_EvalFactor_pickle_saves_origin(): new_f = pickle.loads(pickle.dumps(f)) assert f.origin is not None - assert f.origin == new_f.origin + assert_pickled_equals(f, new_f) def test_EvalFactor_basics(): e = EvalFactor("a+b") diff --git a/patsy/infix_parser.py b/patsy/infix_parser.py index f6ac31e..bdf395d 100644 --- a/patsy/infix_parser.py +++ b/patsy/infix_parser.py @@ -44,7 +44,7 @@ def __init__(self, print_as): def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._print_as) - # __getstate__ = no_pickling + __getstate__ = no_pickling class Token(object): """A token with possible payload. @@ -70,7 +70,7 @@ def _repr_pretty_(self, p, cycle): kwargs = [("extra", self.extra)] return repr_pretty_impl(p, self, [self.type, self.origin], kwargs) - # __getstate__ = no_pickling + __getstate__ = no_pickling class ParseNode(object): def __init__(self, type, token, args, origin): @@ -83,7 +83,7 @@ def __init__(self, type, token, args, origin): def _repr_pretty_(self, p, cycle): return repr_pretty_impl(p, self, [self.type, self.token, self.args]) - # __getstate__ = no_pickling + __getstate__ = no_pickling class Operator(object): def __init__(self, token_type, arity, precedence): @@ -95,14 +95,14 @@ def __repr__(self): return "%s(%r, %r, %r)" % (self.__class__.__name__, self.token_type, self.arity, self.precedence) - # __getstate__ = no_pickling + __getstate__ = no_pickling class _StackOperator(object): def __init__(self, op, token): self.op = op self.token = token - # __getstate__ = no_pickling + __getstate__ = no_pickling _open_paren = Operator(Token.LPAREN, -1, -9999999) @@ -115,7 +115,7 @@ def __init__(self, unary_ops, binary_ops, atomic_types, trace): self.atomic_types = atomic_types self.trace = trace - # __getstate__ = no_pickling + __getstate__ = no_pickling def _read_noun_context(token, c): if token.type == Token.LPAREN: diff --git a/patsy/mgcv_cubic_splines.py b/patsy/mgcv_cubic_splines.py index 7770cf7..11ec055 100644 --- a/patsy/mgcv_cubic_splines.py +++ b/patsy/mgcv_cubic_splines.py @@ -10,8 +10,9 @@ import numpy as np from patsy.util import (have_pandas, atleast_2d_column_default, - no_pickling, assert_no_pickling, safe_string_eq) -from patsy.state import stateful_transform, StatefulTransform + no_pickling, assert_no_pickling, safe_string_eq, + check_pickle_version) +from patsy.state import stateful_transform if have_pandas: import pandas @@ -541,7 +542,7 @@ def _get_centering_constraint_from_dmatrix(design_matrix): return design_matrix.mean(axis=0).reshape((1, design_matrix.shape[1])) -class CubicRegressionSpline(StatefulTransform): +class CubicRegressionSpline(object): """Base class for cubic regression spline stateful transforms This class contains all the functionality for the following stateful @@ -685,7 +686,7 @@ def transform(self, x, df=None, knots=None, dm.index = x_orig.index return dm - # __getstate__ = no_pickling + __getstate__ = no_pickling class CR(CubicRegressionSpline): @@ -716,6 +717,18 @@ class CR(CubicRegressionSpline): def __init__(self): CubicRegressionSpline.__init__(self, name='cr', cyclic=False) + def __getstate__(self): + return {'version': 0, 'name': self._name, 'cyclic': self._cyclic, + 'all_knots': self._all_knots, 'constraints': self._constraints} + + def __setstate__(self, pickle): + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self._name = pickle['name'] + self._cyclic = pickle['cyclic'] + self._all_knots = pickle['all_knots'] + self._constraints = pickle['constraints'] + + cr = stateful_transform(CR) cr.__qualname__ = 'cr' cr.__name__ = 'cr' @@ -748,6 +761,18 @@ class CC(CubicRegressionSpline): def __init__(self): CubicRegressionSpline.__init__(self, name='cc', cyclic=True) + def __getstate__(self): + return {'version': 0, 'name': self._name, 'cyclic': self._cyclic, + 'all_knots': self._all_knots, 'constraints': self._constraints} + + def __setstate__(self, pickle): + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self._name = pickle['name'] + self._cyclic = pickle['cyclic'] + self._all_knots = pickle['all_knots'] + self._constraints = pickle['constraints'] + + cc = stateful_transform(CC) cc.__qualname__ = 'cc' cc.__name__ = 'cc' @@ -855,7 +880,7 @@ def test_crs_with_specific_constraint(): assert np.allclose(result1, result2, rtol=1e-12, atol=0.) -class TE(StatefulTransform): +class TE(object): """te(s1, .., sn, constraints=None) Generates smooth of several covariates as a tensor product of the bases @@ -944,7 +969,13 @@ def transform(self, *args, **kwargs): return _get_te_dmatrix(args_2d, self._constraints) - # __getstate__ = no_pickling + def __getstate__(self): + return {'version': 0, 'constraints': self._constraints} + + def __setstate__(self, pickle): + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self._constraints = pickle['constraints'] + te = stateful_transform(TE) te.__qualname__ = 'te' diff --git a/patsy/missing.py b/patsy/missing.py index 2bbd1c2..7481c57 100644 --- a/patsy/missing.py +++ b/patsy/missing.py @@ -39,7 +39,7 @@ import numpy as np from patsy import PatsyError from patsy.util import (safe_isnan, safe_scalar_isnan, - no_pickling, assert_no_pickling) + no_pickling, assert_no_pickling, check_pickle_version) # These are made available in the patsy.* namespace __all__ = ["NAAction"] @@ -180,10 +180,15 @@ def _handle_NA_drop(self, values, is_NAs, origins): # "..." to handle 1- versus 2-dim indexing return [v[good_mask, ...] for v in values] - def __eq__(self, other): - return self.__dict__ == other.__dict__ + def __getstate__(self): + return {'version': 0, 'NA_types': self.NA_types, + 'on_NA': self.on_NA} + + def __setstate__(self, pickle): + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self.NA_types = pickle['NA_types'] + self.on_NA = pickle['on_NA'] - # __getstate__ = no_pickling def test_NAAction_basic(): from nose.tools import assert_raises @@ -191,7 +196,6 @@ def test_NAAction_basic(): assert_raises(ValueError, NAAction, NA_types=("NaN", "asdf")) assert_raises(ValueError, NAAction, NA_types="NaN") - # assert_no_pickling(NAAction()) def test_NAAction_NA_types_numerical(): for NA_types in [[], ["NaN"], ["None"], ["NaN", "None"]]: @@ -236,7 +240,7 @@ def test_NAAction_drop(): assert np.array_equal(out_values[0], [2, 4]) assert np.array_equal(out_values[1], [20.0, 40.0]) assert np.array_equal(out_values[2], [[3.0, 4.0], [6.0, 7.0]]) - + def test_NAAction_raise(): action = NAAction(on_NA="raise") diff --git a/patsy/origin.py b/patsy/origin.py index 2859313..20be583 100644 --- a/patsy/origin.py +++ b/patsy/origin.py @@ -10,6 +10,7 @@ # These are made available in the patsy.* namespace __all__ = ["Origin"] + class Origin(object): """This represents the origin of some object in some string. @@ -118,6 +119,18 @@ def __getstate__(self): raise NotImplementedError """ + def __getstate__(self): + return {'version': 0, 'code': self.code, 'start': self.start, + 'end': self.end} + + def __setstate__(self, pickle): + from patsy.util import check_pickle_version + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self.code = pickle['code'] + self.start = pickle['start'] + self.end = pickle['end'] + + def test_Origin(): o1 = Origin("012345", 2, 4) o2 = Origin("012345", 4, 5) @@ -140,5 +153,4 @@ def __init__(self, origin=None): assert Origin.combine([ObjWithOrigin(), ObjWithOrigin()]) is None - # from patsy.util import assert_no_pickling - # assert_no_pickling(Origin("", 0, 0)) + from patsy.util import assert_no_pickling diff --git a/patsy/redundancy.py b/patsy/redundancy.py index 9fa78a8..415fa96 100644 --- a/patsy/redundancy.py +++ b/patsy/redundancy.py @@ -73,7 +73,7 @@ def __repr__(self): suffix = "-" return "%r%s" % (self.factor, suffix) - # __getstate__ = no_pickling + __getstate__ = no_pickling class _Subterm(object): "Also immutable." diff --git a/patsy/splines.py b/patsy/splines.py index 8a8abee..96668fc 100644 --- a/patsy/splines.py +++ b/patsy/splines.py @@ -9,8 +9,9 @@ import numpy as np -from patsy.util import have_pandas, no_pickling, assert_no_pickling -from patsy.state import stateful_transform, StatefulTransform +from patsy.util import (have_pandas, no_pickling, assert_no_pickling, + check_pickle_version) +from patsy.state import stateful_transform if have_pandas: import pandas @@ -74,7 +75,7 @@ def t(x, prob, expected): t([10, 20], [0.3, 0.7], [13, 17]) t(list(range(10)), [0.3, 0.7], [2.7, 6.3]) -class BS(StatefulTransform): +class BS(object): """bs(x, df=None, knots=None, degree=3, include_intercept=False, lower_bound=None, upper_bound=None) Generates a B-spline basis for ``x``, allowing non-linear fits. The usual @@ -245,12 +246,21 @@ def transform(self, x, df=None, knots=None, degree=3, basis.index = x.index return basis - # __getstate__ = no_pickling + def __getstate__(self): + return {'version': 0, 'degree': self._degree, + 'all_knots': self._all_knots} + + def __setstate__(self, pickle): + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self._degree = pickle['degree'] + self._all_knots = pickle['all_knots'] + bs = stateful_transform(BS) bs.__qualname__ = 'bs' bs.__name__ = 'bs' + def test_bs_compat(): from patsy.test_state import check_stateful from patsy.test_splines_bs_data import (R_bs_test_x, diff --git a/patsy/state.py b/patsy/state.py index 7d759a6..a45ed72 100644 --- a/patsy/state.py +++ b/patsy/state.py @@ -32,8 +32,7 @@ no_pickling, assert_no_pickling, check_pickle_version) # These are made available in the patsy.* namespace -__all__ = ["stateful_transform", "StatefulTransform", - "center", "standardize", "scale", +__all__ = ["stateful_transform", "center", "standardize", "scale", ] def stateful_transform(class_): @@ -76,18 +75,8 @@ def stateful_transform_wrapper(*args, **kwargs): # class QuantileEstimatingTransform(NonIncrementalStatefulTransform): # def memorize_all(self, input_data, *args, **kwargs): - -class StatefulTransform(object): - def __getstate__(self): - return (0, self.__dict__) - - def __setstate__(self, pickle): - version, dicts = pickle - check_pickle_version(version, 0, name=self.__class__.__name__) - self.__dict__ = dicts - - -class Center(StatefulTransform): + +class Center(object): """center(x) A stateful transform that centers input data, i.e., subtracts the mean. @@ -127,7 +116,14 @@ def transform(self, x): centered = atleast_2d_column_default(x, preserve_pandas=True) - mean_val return pandas_friendly_reshape(centered, x.shape) - # __getstate__ = no_pickling + def __getstate__(self): + return {'version': 0, 'sum': self._sum, 'count': self._count} + + def __setstate__(self, pickle): + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self._sum = pickle['sum'] + self._count = pickle['count'] + center = stateful_transform(Center) center.__qualname__ = 'center' @@ -136,7 +132,7 @@ def transform(self, x): # See: # http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm # or page 232 of Knuth vol. 3 (3rd ed.). -class Standardize(StatefulTransform): +class Standardize(object): """standardize(x, center=True, rescale=True, ddof=0) A stateful transform that standardizes input data, i.e. it subtracts the @@ -187,7 +183,16 @@ def transform(self, x, center=True, rescale=True, ddof=0): x_2d /= np.sqrt(self.current_M2 / (self.current_n - ddof)) return pandas_friendly_reshape(x_2d, x.shape) - # __getstate__ = no_pickling + def __getstate__(self): + return {'version': 0, 'current_n': self.current_n, + 'current_mean': self.current_mean, + 'current_M2': self.current_M2} + + def __setstate__(self, pickle): + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self.current_M2 = pickle['current_M2'] + self.current_mean = pickle['current_mean'] + self.current_n = pickle['current_n'] standardize = stateful_transform(Standardize) diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index e00d081..fb34667 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -6,38 +6,39 @@ import os import shutil -from patsy import EvalFactor, EvalEnvironment, VarLookupDict +from patsy import EvalFactor, EvalEnvironment import numpy as np +from patsy.eval import VarLookupDict from patsy.state import center, scale, standardize from patsy.categorical import C from patsy.splines import bs -from patsy.desc import Term, ModelDesc +from patsy.desc import Term, ModelDesc, _MockFactor from patsy.mgcv_cubic_splines import cc, te, cr from patsy.contrasts import ContrastMatrix from patsy.constraint import LinearConstraint from patsy.missing import NAAction from patsy.origin import Origin +from patsy.design_info import SubtermInfo +from patsy.util import assert_pickled_equals -PICKE_TESTCASES_ROOTDIR = os.path.join(os.path.dirname(__file__), '..', 'pickle_testcases') +PICKLE_TESTCASES_ROOTDIR = os.path.join(os.path.dirname(__file__), '..', + 'pickle_testcases') +f1 = _MockFactor("a") +f2 = _MockFactor("b") -class _MockFactor(object): - def __init__(self, name): - self._name = name - - def name(self): - return self._name +cm = ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]) +si = SubtermInfo(["a", "x"], {"a": cm}, 4) - def __eq__(self, other): - return self.__dict__ == other.__dict__ - def __hash__(self): - return hash((_MockFactor, str(self._name))) +def _unwrap_stateful_function(function, *args, **kwargs): + obj = function.__patsy_stateful_transform__() + obj.memorize_chunk(*args, **kwargs) + obj.memorize_finish() + return (obj, args, kwargs) -f1 = _MockFactor("a") -f2 = _MockFactor("b") pickling_testcases = { "evalfactor_simple": EvalFactor("a+b"), @@ -48,7 +49,7 @@ def __hash__(self): "evalenv_transform_standardize": EvalEnvironment([{ 'standardize': standardize }]), - "evalenv_transform_catgorical": EvalEnvironment([{'C': C}]), + "evalenv_transform_categorical": EvalEnvironment([{'C': C}]), "evalenv_transform_bs": EvalEnvironment([{'cs': bs}]), "evalenv_transform_te": EvalEnvironment([{'te': te}]), "evalenv_transform_cr": EvalEnvironment([{'cs': cr}]), @@ -56,25 +57,80 @@ def __hash__(self): "evalenv_pickle": EvalEnvironment([{'np': np}]), "term": Term([1, 2, 1]), "contrast_matrix": ContrastMatrix([[1, 0], [0, 1]], ["a", "b"]), + "subterm_info": si, "linear_constraint": LinearConstraint(["a"], [[0]]), "model_desc": ModelDesc([Term([]), Term([f1])], [Term([f1]), Term([f1, f2])]), "na_action": NAAction(NA_types=["NaN", "None"]), - "origin": Origin("012345", 2, 5) + "origin": Origin("012345", 2, 5), + "transform_center": _unwrap_stateful_function(center, + np.arange(10, 20, 0.1)), + "transform_standardize_norescale": _unwrap_stateful_function( + standardize, + np.arange(10, 20, 0.1), + ), + "transform_standardize_rescale": _unwrap_stateful_function( + standardize, + np.arange(10, 20, 0.1), + rescale=True + ), + "transform_bs_df3": _unwrap_stateful_function( + bs, + np.arange(10, 20, 0.1), + df=3 + ), + "transform_bs_knots_13_15_17": _unwrap_stateful_function( + bs, + np.arange(10, 20, 0.1), + knots=[13, 15, 17] + ), + "transform_cc_df3": _unwrap_stateful_function( + cc, + np.arange(10, 20, 0.1), + df=3 + ), + "transform_cc_knots_13_15_17": _unwrap_stateful_function( + cc, + np.arange(10, 20, 0.1), + knots=[13, 15, 17] + ), + "transform_cr_df3": _unwrap_stateful_function( + cr, + np.arange(10, 20, 0.1), + df=3 + ), + "transform_cr_knots_13_15_17": _unwrap_stateful_function( + cr, + np.arange(10, 20, 0.1), + knots=[13, 15, 17] + ), + "transform_te_cr5": _unwrap_stateful_function( + te, + cr(np.arange(10, 20, 0.1), df=5) + ), + "transform_te_cr5_center": _unwrap_stateful_function( + te, + cr(np.arange(10, 20, 0.1), df=5), + constraint='center' + ), } def test_pickling_same_version_roundtrips(): for obj in six.itervalues(pickling_testcases): - yield (check_pickling_same_version_roundtrips, obj) + if isinstance(obj, tuple): + yield (check_pickling_same_version_roundtrips, obj[0]) + else: + yield (check_pickling_same_version_roundtrips, obj) def check_pickling_same_version_roundtrips(obj): - assert obj == pickle.loads(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)) + pickled_obj = pickle.loads(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)) + assert_pickled_equals(obj, pickled_obj) def test_pickling_old_versions_still_work(): - for (dirpath, dirnames, filenames) in os.walk(PICKE_TESTCASES_ROOTDIR): + for (dirpath, dirnames, filenames) in os.walk(PICKLE_TESTCASES_ROOTDIR): for fname in filenames: if os.path.splitext(fname)[1] == '.pickle': yield check_pickling_old_versions_still_work, os.path.join(dirpath, fname) @@ -88,7 +144,26 @@ def check_pickling_old_versions_still_work(pickle_filename): # equal to any instance of a previous version. How do we handle # that? # Maybe adding a minimum version requirement to each test? - assert pickling_testcases[testcase_name] == pickle.load(f) + obj = pickling_testcases[testcase_name] + if isinstance(obj, tuple): + assert_pickled_equals(pickling_testcases[testcase_name][0], + pickle.load(f)) + else: + assert_pickled_equals(pickling_testcases[testcase_name], + pickle.load(f)) + + +def test_pickling_transforms(): + for obj in six.itervalues(pickling_testcases): + if isinstance(obj, tuple): + obj, args, kwargs = obj + yield (check_pickling_transforms, obj, args, kwargs) + + +def check_pickling_transforms(obj, args, kwargs): + pickled_obj = pickle.loads(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)) + np.testing.assert_allclose(obj.transform(*args, **kwargs), + pickled_obj.transform(*args, **kwargs)) def test_unpickling_future_gives_sensible_error_msg(): @@ -102,7 +177,7 @@ def create_pickles(version): # TODO Add safety check that said force=True option will still give an # error when trying to remove pickles for a released version, by # comparing the version argument here with patsy.__version__. - pickle_testcases_dir = os.path.join(PICKE_TESTCASES_ROOTDIR, version) + pickle_testcases_dir = os.path.join(PICKLE_TESTCASES_ROOTDIR, version) if os.path.exists(pickle_testcases_dir): raise OSError("{} already exists. Aborting.".format(pickle_testcases_dir)) pickle_testcases_tempdir = pickle_testcases_dir + "_inprogress" @@ -110,6 +185,8 @@ def create_pickles(version): try: for name, obj in six.iteritems(pickling_testcases): + if isinstance(obj, tuple): + obj = obj[0] with open(os.path.join(pickle_testcases_tempdir, "{}.pickle".format(name)), "wb") as f: pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) except Exception: diff --git a/patsy/util.py b/patsy/util.py index b3a4dad..70909c8 100644 --- a/patsy/util.py +++ b/patsy/util.py @@ -20,7 +20,8 @@ "safe_issubdtype", "no_pickling", "assert_no_pickling", - "safe_string_eq", + "assert_pickled_equals", + "safe_string_eq" ] import sys @@ -28,7 +29,7 @@ import numpy as np import six from six.moves import cStringIO as StringIO -from .compat import optional_dep_ok +from patsy.compat import optional_dep_ok try: import pandas @@ -763,9 +764,125 @@ def check_pickle_version(version, required_version, name=""): error_msg += "." # TODO Use a better exception than ValueError. - raise ValueError(error_msg) + raise PickleError(error_msg) def test_check_pickle_version(): - assert_raises(ValueError, check_pickle_version, 0, 1) - assert_raises(ValueError, check_pickle_version, 1, 0) + assert_raises(PickleError, check_pickle_version, 0, 1) + assert_raises(PickleError, check_pickle_version, 1, 0) check_pickle_version(0, 0) + + +def assert_pickled_equals(obj1, obj2): + def _walk_dict(obj): + _dict = {key: obj.__dict__[key] for key in six.iterkeys(obj.__dict__)} + for key in six.iterkeys(_dict): + if isinstance(_dict[key], dict): + newdict = {} + for key2 in six.iterkeys(_dict[key]): + if hasattr(key2, '__dict__'): + newkey = [str(key2.__dict__[i])[:6] for i in + sorted(six.iterkeys(key2.__dict__))] + newkey = str(newkey) + newdict[newkey] = _dict[key][key2] + else: + newdict[key2] = _dict[key][key2] + for key2 in six.iterkeys(newdict): + if isinstance(newdict[key2], (list, tuple)): + newdict[key2] = [_walk_dict(i) if + hasattr(i, '__dict__') else i for i in + newdict[key2]] + _dict[key] = {key2: _walk_dict(newdict[key2]) if + hasattr(newdict[key2], '__dict__') else + newdict[key2] for key2 in six.iterkeys(newdict)} + elif hasattr(_dict[key], '__dict__'): + _dict[key] = _walk_dict(_dict[key]) + if isinstance(_dict[key], (list, tuple)): + _dict[key] = [_walk_dict(i) if hasattr(i, '__dict__') else i + for i in _dict[key]] + return _dict + + if hasattr(obj1, '__dict__') and hasattr(obj2, '__dict__'): + obj1 = _walk_dict(obj1) + obj2 = _walk_dict(obj2) + + def _walk_dict_numpy_equals(_obj1, _obj2): + for key in six.iterkeys(_obj1): + if isinstance(_obj1[key], np.ndarray): + np.testing.assert_allclose(_obj1[key], _obj2[key]) + elif isinstance(_obj1[key], dict): + _walk_dict_numpy_equals(_obj1[key], _obj2[key]) + + def _walk_dict_remove_numpy(_obj1): + for key in six.iterkeys(_obj1): + if isinstance(_obj1[key], np.ndarray): + _obj1[key] = 0 + elif isinstance(_obj1[key], dict): + _walk_dict_remove_numpy(_obj1[key]) + + _walk_dict_numpy_equals(obj1, obj2) + _walk_dict_numpy_equals(obj2, obj1) + _walk_dict_remove_numpy(obj1) + _walk_dict_remove_numpy(obj2) + + assert obj1 == obj2 + + +def test_assert_pickled_equals(): + class _MockObject(object): + def __init__(self, foo): + self.foo = foo + + obj1 = _MockObject('bar') + obj2 = _MockObject('bar') + + assert_pickled_equals(obj1, obj2) + + obj3 = _MockObject('baz') + + assert_raises(AssertionError, assert_pickled_equals, obj1, obj3) + + obj4 = _MockObject(obj1) + obj5 = _MockObject(obj2) + + assert_pickled_equals(obj4, obj5) + + obj6 = _MockObject(_MockObject(np.array([[1, 2], [3, 4]]))) + obj7 = _MockObject(_MockObject(np.array([[1, 2], [3, 4]]))) + + assert_pickled_equals(obj6, obj7) + + obj8 = _MockObject(_MockObject(np.array([[1, 2], [3, 5]]))) + + assert_raises(AssertionError, assert_pickled_equals, obj6, obj8) + + obj9 = _MockObject([_MockObject('a'), _MockObject('b')]) + obj10 = _MockObject([_MockObject('a'), _MockObject('b')]) + + assert_pickled_equals(obj9, obj10) + + obj11 = _MockObject({_MockObject('a'): _MockObject('c')}) + obj12 = _MockObject({_MockObject('a'): _MockObject('c')}) + + assert_pickled_equals(obj11, obj12) + + obj13 = _MockObject({_MockObject('a'): _MockObject('d')}) + + assert_raises(AssertionError, assert_pickled_equals, obj11, obj13) + + +class PickleError(Exception): + """This is the error type for pickle problems. + + For ordinary display to the user with default formatting, use + ``str(exc)``. If you want to do something cleverer, you can use the + ``.message`` attribute directly. + """ + def __init__(self, message): + Exception.__init__(self, message) + self.message = message + + def __str__(self): + if self.origin is None: + return self.message + else: + return self.message diff --git a/pickle_testcases/0.5/contrast_matrix.pickle b/pickle_testcases/0.5/contrast_matrix.pickle new file mode 100644 index 0000000000000000000000000000000000000000..0d63acd1d751148ab69e119a9c00cdade3449416 GIT binary patch literal 224 zcmXYru?oUK5JYoPNh50GKWLN6(k`8cDeOfq=D>i)aM^(Av``VYbNn>FNAHqMu{$hp zn7#HtU#}I93q@re?$0?HRZk5bz+Sy3j7QeqL^BUdfcnQ#2c=cr(OKsh!fp zlvgk%gQ17VKPf9Uxx^zsCndEAqM&w)Mh{DNW?ssa9=6>4l+v8kDH%LHta+um1(j2J VSn?8cfdXu$NyUlY4DC~r^Z<%&FTnr+ literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/evalenv_simple.pickle b/pickle_testcases/0.5/evalenv_simple.pickle new file mode 100644 index 0000000000000000000000000000000000000000..7b8258a0eb3d9eb7b09ce279adf20ab2bc0d595e GIT binary patch literal 67 zcmZo*nQFlR0XQeqL^BUdfcnQ#2c=cr(OKsh!fp Rm^j6ou{hD2p?yk{9stk*7a0Hm literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/evalenv_transform_bs.pickle b/pickle_testcases/0.5/evalenv_transform_bs.pickle new file mode 100644 index 0000000000000000000000000000000000000000..547608f749616cdabd5aadff158b9a814410eee1 GIT binary patch literal 89 zcmZo*nd-*?0XQeqL^BUdfcnQ#2c=cr(OKsh!fp hlw3TehZmx}xF9DpFBQmVN-72_Do*rfXrGd#2LM)lAprmY literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/evalenv_transform_categorical.pickle b/pickle_testcases/0.5/evalenv_transform_categorical.pickle new file mode 100644 index 0000000000000000000000000000000000000000..33d403c63b468c16bd43a4febbf3b4d614236184 GIT binary patch literal 89 zcmZo*nd-*?0XQeqL^BUdfcnQ#2c=cr(OKsh!fp i=scxI5TZIcu_QG;zbG>qs40VO@|5C4Z-(|MNqPWF3m|9! literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/evalenv_transform_cc.pickle b/pickle_testcases/0.5/evalenv_transform_cc.pickle new file mode 100644 index 0000000000000000000000000000000000000000..217977d3cb19195fc35b3188463b56092a966b2a GIT binary patch literal 97 zcmZo*nHt6b0XQeqL^BUdfcnQ#2c=cr(OKsh!fp rl$<=JM*^ZeH$Ay5KDjh0GdaGvASW{~wRlPf+vF+5iQWwDQQeqL^BUdfcnQ#2c=cr(OKsh!fp hmJAe0Et=B94bf6ul30>DC4+79l;T8hhW05*dH^zYAV&ZI literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/evalenv_transform_cr.pickle b/pickle_testcases/0.5/evalenv_transform_cr.pickle new file mode 100644 index 0000000000000000000000000000000000000000..550190cec70165730a8f02759ff092ac53d91249 GIT binary patch literal 100 zcmZo*nHtFe0XQeqL^BUdfcnQ#2c=cr(OKsh!fp slw3TeM*^ZeH$Ay5KDjh0GdaGvASW{~6)44&Tm)2KoaoKaJ|#&H0N`pRhyVZp literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/evalenv_transform_scale.pickle b/pickle_testcases/0.5/evalenv_transform_scale.pickle new file mode 100644 index 0000000000000000000000000000000000000000..bd2ea177a20ad1aaa82fd26528e4bc1e69e71008 GIT binary patch literal 87 zcmZo*nd-v;0XQeqL^BUdfcnQ#2c=cr(OKsh!fp hTAZAilRBk`8=|AQB(WrQN(S5HDaDE24DC~r^Z*Z~AIty% literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/evalenv_transform_standardize.pickle b/pickle_testcases/0.5/evalenv_transform_standardize.pickle new file mode 100644 index 0000000000000000000000000000000000000000..95f90304a2fa24f29e4e56972efcf3d5cf07a6bc GIT binary patch literal 99 zcmZo*nHs?W0XQeqL^BUdfcnQ#2c=cr(OKsh!fp oU0jlwmy%eNl3A4sQeqL^BUdfcnQ#2c=cr(OKsh!fp rRFXQSM*^ZeH$Ay5KDjh0GdaGvASW{~wRlPf+vF+5iQWwDQd&dkr7 j;?2;*lANECI;Dp-Q9)ZFX-W@Ueon@4pInOs0h>L`S)r* z3eg!DnBn){V_wd`)=~_`%Zj{f!mKpTThhjZmk-(~veECU#qBGevEk`SS*J^bSn%bE ztfus+c7_V&@C23w;vg&4!GW($T~xiZRbf#dy(Vj^0nc|)t3D-P{mr=fr>WEM2P$1}vH$=8 literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/model_desc.pickle b/pickle_testcases/0.5/model_desc.pickle new file mode 100644 index 0000000000000000000000000000000000000000..5286dde5e20ca4928cd8bf5b62f04c0fe060880d GIT binary patch literal 241 zcmZo*nfi`sSyk=D2{^lc#7lPN|)u(ZgPrT2!2wpEt#u zp@%0Yqd2}KwJ0|yv$$kR>=ca*h8~s>ARn$ggB7TZJuNZ0B)_P5ie_n21}DTEkVFO# zP(63NZ+>#N8(0NQTMuh|USe+Qlpe;!DaA>xQ%aLkdw7b_?Lsw45Tp#vbioV(ux%N_ QJ&Z{}liENgl_u!{0G6p(V*mgE literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/na_action.pickle b/pickle_testcases/0.5/na_action.pickle new file mode 100644 index 0000000000000000000000000000000000000000..dc5b370aab137e600d0d1e87f37d5a02a5f67dd5 GIT binary patch literal 102 zcmZo*nHtRi0X@70i6zCAdbyd!#hH2OQ+hc3937KOGV}AMOrE0IIHh)qMh|;gYEdyr w#G3&m9bZyekXk&YhuJUDZ%Pk~Uw&Tdl(s27toeEIevUwal%o8CDWyqz00f&QEdT%j literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/origin.pickle b/pickle_testcases/0.5/origin.pickle new file mode 100644 index 0000000000000000000000000000000000000000..4759d5e1a088a877f1fbfac7fb3d38b62162cb67 GIT binary patch literal 92 zcmZo*nHtCd0X;kgi6zCAdih0}>6v*`df5CSjLB0p8>iGx(dc0>OD!tS%+H(R&CtV= moS%{kRAXRhWNczOrH8e+B(bPuiZ@dab82476mQnjBs~C)Zy$vK literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/subterm_info.pickle b/pickle_testcases/0.5/subterm_info.pickle new file mode 100644 index 0000000000000000000000000000000000000000..3b301ff634be2cec051232ba5da05a3ec019bf2d GIT binary patch literal 361 zcmZ{gze~hW49DC1L5CCN5O>FODd^-NICRp79dtZud2L_4!u7rKe#mtYbnp%#+r|IW z|3K4LM*~R+`FxX**U9Ij@AU@rhFjByg)r5VOe#XxQ-ACCy>`T}Po%4x zHq4b4VeQto;#%_-;ULw5z&czK>6#ft>R4l@HYx+=}p M+SSq^o473e0k1%Zp8x;= literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/term.pickle b/pickle_testcases/0.5/term.pickle new file mode 100644 index 0000000000000000000000000000000000000000..3c4e48e50bc2b107653290344e8288a3439b77ba GIT binary patch literal 70 zcmZo*nQF}d0X#Z!8ioPwuJo}$?}rFM!&4|`c^QE_H|-V|?! z9=4R!^rF<%Dc;OIoQXL(@!5I#B|!C}d8N4pm3qnfMX7qZr8y;;iA6<;l~a0n;)_y~ z^Ye;Jib|79fF|~^f)!5bVb4o}D47g2wRMU!W73rNDM3>-ycxYYTBl@i_pqjvR2HOy zv@oSv0M&RiLgbly7;UEb`T2SM2LdqR%}_EWsngj3;uRH~bW9o$JTrE{MNa`5Q<|g) E0G2mnqW}N^ literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/transform_bs_knots_13_15_17.pickle b/pickle_testcases/0.5/transform_bs_knots_13_15_17.pickle new file mode 100644 index 0000000000000000000000000000000000000000..21252318aab5c004206e704d1bb050cf8ff2b978 GIT binary patch literal 303 zcmZo*nX1Cb00uq01&Jlam3qYmIhlE>#Z!8ioPwuJo}$?}rFM!&4|`c^QE_H|-V|?! z9=4R!^rF<%Dc;OIoQXL(@!5I#B|!C}d8N4pm3qnfMX7qZr8y;;iA6<;l~a0n;)_y~ z^Ye;Jib|79fF|~^f)!5bVb4o}D47g2wRMU!W73rNDM3>-ycxZ@Tc>1j_pqjvR2HOy zv@oSv0M&RiLgbly7;UEb`T2SM2LdqR%}_EWsnae*_!82o= L^c0}+rAc}K&9!A8 literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/transform_cc_df3.pickle b/pickle_testcases/0.5/transform_cc_df3.pickle new file mode 100644 index 0000000000000000000000000000000000000000..064b17b9636a083dc83e8428fcac53475d53e9a8 GIT binary patch literal 284 zcmXYsF;BxV5QXECQXybqKw@NIC=3x23j;%>Y*@;`Tq$y{E?78DWIL5SAi+=p(rrBk z{vdw?+=Oqq8{WIScdzN!2}=g>tP|data*fW1%0ica-CL2I>Jd&(3URW=!NDuTTAOy zYltV98qvt80}$>*0Ih&N@IYu?J)73Y(NohmUC5xdGHZJ6m9SO>!lTMcXpQr>2OpQ? zbTAUm%yP7B@vU|BVcVC!Xrh i;(UDYBB$>iSO5JwU#ucOH#tQc9*${*GBI}9*V!K}OmF`H literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/transform_cc_knots_13_15_17.pickle b/pickle_testcases/0.5/transform_cc_knots_13_15_17.pickle new file mode 100644 index 0000000000000000000000000000000000000000..15af1463b7a1fd855c34496113438f3873322c52 GIT binary patch literal 292 zcmXX>u}Z{15Y3(!4^c}&dy8nXAT|~jfpi8^SPNk@x2KDnP1tM>wh**9RG99@U%cPY zHG8j^SG@OT-n`F$E?6=F&wJqp&)a8M*Dy8;>Y>-l$boQH6ttzQ4|<~v7i(z;)fp-i zoEy=~paT#df(NaDzVJwBUB8;nh3J_X+un2NtmN&eof6gxPk2&W37r|79l?d=m`+B* zg;_?+HXPVc!5aFcN6Jc8rW<-W#dPU>FaIO6)m`wEEb0fy?y0J(hu?r{l!>F}yU5~$ fmwDvtJ@Y-^oZ;P$tx`#)kg^52|Th literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/transform_center.pickle b/pickle_testcases/0.5/transform_center.pickle new file mode 100644 index 0000000000000000000000000000000000000000..f98f8d26057ba5e397af150337c1f414b72e1874 GIT binary patch literal 218 zcmXAhKa0XZ6vVS?RP@^W4qlVm!a_*rLJC`pBY##5m`&JyE2I##5D{MI9QU#A>)3g# znPTA0{N9`fKYu|do;E4jHnNnG0uN)QNg3Snb;c1h9vzgiWvw9t?ps%ZKeelxHp*+G zqRQ>5OpQrf@W;1Nd95uOmy;jh@C|}Ty6BL*AGshF8Ln7kCWRCi>?Rx*)Hdq%(%a5G mBSqKuSuBtw$?EC#HW?7ITReWelFjcD{^T&PohHb_Wzj$Xs#+ZY literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/transform_cr_df3.pickle b/pickle_testcases/0.5/transform_cr_df3.pickle new file mode 100644 index 0000000000000000000000000000000000000000..3f7e1d3ab4da70f77a5e4a6a8f4cfae3d2539b80 GIT binary patch literal 276 zcmXX>yH3ME5cK6i6aghtK7b+=xS^q-h~$owT%<;_&YKIIIG?S1M7jbAiU>v8F3T_G zH^N~v#cnY(JF}mYpE=8V;Gz@W#k_fiT?L_5P`OSkBOT$WSksa2zUYININM3#uC+m#~O&4=$t<0OCy%N@nNO)CQ39WJ72Jq=PPJ1EY z%xnh9F$L_YU^RWynwFgL@t)qUak}xblYf@c_AzNV8~8D@Co0SG`JB)Xo_VSd#bS8y acJ;DNINks6hZQ9nFNYRFnN+t7b^Z&QkZV`~ literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/transform_cr_knots_13_15_17.pickle b/pickle_testcases/0.5/transform_cr_knots_13_15_17.pickle new file mode 100644 index 0000000000000000000000000000000000000000..512b8ab6e093915e89be75643ad0583f3640d595 GIT binary patch literal 292 zcmXX>Jxjzu6wIC%4^c}&dy8nXAT|~jfpjjUxLOFCy**vrY{GsVY$0fI$ieIG`4{~g zx+XWpn__0(oA)vQz9QKOxE#25k#;X)(}>V2(YQe?BOSu5TH+pWKJgt3SZt(qsyE0o zm>b^7qzj91ETYg#;0{i>*3FygeTrU~&<&A_-b&g9?G?9{M})J+O3@qVZ4f>!hkP_5 zEX-=M?9+iQR;0x*Tw=ip%eVM?4*AN*LHxblFWYUj-JYrqfPrnJ{V425uR~6HP f*X6{w2c`$UE1%a1KlddjJDg6fxH74J9a{PaECgwM literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/transform_standardize_norescale.pickle b/pickle_testcases/0.5/transform_standardize_norescale.pickle new file mode 100644 index 0000000000000000000000000000000000000000..72182430b551310540da2f8c38da2a288f2ab511 GIT binary patch literal 287 zcmYjLyGq1B6wRp1x|(B(o%7CSh_%h#+XOh;q9<{*~Y1 zZ|F>9!Bbo~-1Bb9lA$wo!->*Er&#vGpPq+JS a&QVUwgHTc){;xRkmah;p`4}uNOMU?nWpbAQ literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/transform_standardize_rescale.pickle b/pickle_testcases/0.5/transform_standardize_rescale.pickle new file mode 100644 index 0000000000000000000000000000000000000000..72182430b551310540da2f8c38da2a288f2ab511 GIT binary patch literal 287 zcmYjLyGq1B6wRp1x|(B(o%7CSh_%h#+XOh;q9<{*~Y1 zZ|F>9!Bbo~-1Bb9lA$wo!->*Er&#vGpPq+JS a&QVUwgHTc){;xRkmah;p`4}uNOMU?nWpbAQ literal 0 HcmV?d00001 diff --git a/pickle_testcases/0.5/transform_te_cr5.pickle b/pickle_testcases/0.5/transform_te_cr5.pickle new file mode 100644 index 0000000000000000000000000000000000000000..3c641bcd460619b9b78d9363131e265b0ec2194d GIT binary patch literal 81 zcmZo*nd-&>0X-50i6zCAdb#PzW%0?SNtwy<#RWN;d8x%ydYD37r%axr**K+kibfB6 gS!z*nW`5ojZ-yT30X-50i6zCAdb#PzW%0?SNtwy<#RWN;d8x%ydYD37r%axr**K+kibfB6 gS!z*nW`5ojZ-yT3p^7FGx3tTdjOQuYoqS-jbn;~|JM(vc^ gDLssdQ@k0AYo};ruzNH0FeXj$W-d)_o06ml02jF%S^xk5 literal 0 HcmV?d00001 From 019094db05bbf124010f2c3346978a666ff4349e Mon Sep 17 00:00:00 2001 From: thequackdaddy Date: Thu, 6 Apr 2017 20:15:02 -0500 Subject: [PATCH 16/17] Clean the EvalEnvironment before pickling to removing patsy's stateful transforms which have different names/qualnames from expected. --- patsy/eval.py | 31 +++++++++++++++++- patsy/mgcv_cubic_splines.py | 6 ---- patsy/splines.py | 2 -- patsy/state.py | 6 ---- .../0.5/evalenv_transform_bs.pickle | Bin 89 -> 60 bytes .../0.5/evalenv_transform_cc.pickle | Bin 97 -> 60 bytes .../0.5/evalenv_transform_center.pickle | Bin 88 -> 60 bytes .../0.5/evalenv_transform_cr.pickle | Bin 100 -> 60 bytes .../0.5/evalenv_transform_scale.pickle | Bin 87 -> 60 bytes .../0.5/evalenv_transform_standardize.pickle | Bin 99 -> 60 bytes .../0.5/evalenv_transform_te.pickle | Bin 97 -> 60 bytes 11 files changed, 30 insertions(+), 15 deletions(-) diff --git a/patsy/eval.py b/patsy/eval.py index a0d5913..c853988 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -262,6 +262,7 @@ def __hash__(self): tuple(self._namespace_ids()))) def __getstate__(self): + self.clean() namespaces = self._namespaces namespaces = _replace_un_pickleable(namespaces) return (0, namespaces, self.flags) @@ -272,6 +273,17 @@ def __setstate__(self, pickle): self.flags = flags self._namespaces = _return_un_pickleable(namespaces) + def clean(self): + """The EvalEnvironment doesn't need the stateful transformation + functions once the design matrix has been built. This will delete + it. Called by __getstate__ to prepare for pickling.""" + namespaces = [] + for namespace in self._namespaces: + ns = {key: namespace[key] for key in six.iterkeys(namespace) if not + hasattr(namespace[key], '__patsy_stateful_transform__')} + namespaces.append(ns) + self._namespaces = namespaces + class ObjectHolder(object): def __init__(self, kind, module, name): @@ -489,7 +501,23 @@ def test_EvalEnvironment_eq(): capture_local_env = lambda: EvalEnvironment.capture(0) env3 = capture_local_env() env4 = capture_local_env() - assert env3 != env4 # This fails... + assert env3 != env4 + + +def test_EvalEnvironment_clean(): + from patsy.state import center, standardize + from patsy.splines import bs + + env1 = EvalEnvironment([{'center': center}]) + env2 = EvalEnvironment([{'standardize': standardize}]) + env3 = EvalEnvironment([{'bs': bs}]) + env1.clean() + env2.clean() + env3.clean() + + env1._namespaces == [{}] + env2._namespaces == [{}] + env3._namespaces == [{}] _builtins_dict = {} six.exec_("from patsy.builtins import *", {}, _builtins_dict) @@ -650,6 +678,7 @@ def __setstate__(self, pickle): self.code = pickle['code'] self.origin = pickle['origin'] + def test_EvalFactor_pickle_saves_origin(): from patsy.util import assert_pickled_equals # The pickling tests use object equality before and after pickling diff --git a/patsy/mgcv_cubic_splines.py b/patsy/mgcv_cubic_splines.py index 11ec055..f1f53b2 100644 --- a/patsy/mgcv_cubic_splines.py +++ b/patsy/mgcv_cubic_splines.py @@ -730,8 +730,6 @@ def __setstate__(self, pickle): cr = stateful_transform(CR) -cr.__qualname__ = 'cr' -cr.__name__ = 'cr' class CC(CubicRegressionSpline): @@ -774,8 +772,6 @@ def __setstate__(self, pickle): cc = stateful_transform(CC) -cc.__qualname__ = 'cc' -cc.__name__ = 'cc' def test_crs_errors(): @@ -978,8 +974,6 @@ def __setstate__(self, pickle): te = stateful_transform(TE) -te.__qualname__ = 'te' -te.__name__ = 'te' def test_te_errors(): diff --git a/patsy/splines.py b/patsy/splines.py index 96668fc..0cba98c 100644 --- a/patsy/splines.py +++ b/patsy/splines.py @@ -257,8 +257,6 @@ def __setstate__(self, pickle): bs = stateful_transform(BS) -bs.__qualname__ = 'bs' -bs.__name__ = 'bs' def test_bs_compat(): diff --git a/patsy/state.py b/patsy/state.py index a45ed72..1d59d5b 100644 --- a/patsy/state.py +++ b/patsy/state.py @@ -126,8 +126,6 @@ def __setstate__(self, pickle): center = stateful_transform(Center) -center.__qualname__ = 'center' -center.__name__ = 'center' # See: # http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm @@ -196,9 +194,5 @@ def __setstate__(self, pickle): standardize = stateful_transform(Standardize) -standardize.__qualname__ = 'standardize' -standardize.__name__ = 'standardize' # R compatibility: scale = standardize -scale.__qualname__ = 'scale' -scale.__name__ = 'scale' diff --git a/pickle_testcases/0.5/evalenv_transform_bs.pickle b/pickle_testcases/0.5/evalenv_transform_bs.pickle index 547608f749616cdabd5aadff158b9a814410eee1..1d48cbd684bd13abf9c9d4aa0e85da001e5316b3 100644 GIT binary patch delta 18 Zcma#FVQFBQYB-U_kUi0xp?yk{9sn(@1qc8D delta 47 zcmcDCWNBcT>NkN}CeP`QUKIW?~&wP;EYcR^xFaiv~yNn%Orlnl1XQ;HM48QP~L=>Y(7 C0uY%1 diff --git a/pickle_testcases/0.5/evalenv_transform_cr.pickle b/pickle_testcases/0.5/evalenv_transform_cr.pickle index 550190cec70165730a8f02759ff092ac53d91249..1d48cbd684bd13abf9c9d4aa0e85da001e5316b3 100644 GIT binary patch delta 18 ZcmYeXVQFBQYB-U_kUi0xp?yk{9sn+K1rq=O delta 58 zcmcCXVQFBQ8aa{0P``&Mxp+#CL_uOnaiv~vdU9EOa%oa#a(rLzJXv*X% M#fjbw?NgHU0A-&Q@&Et; diff --git a/pickle_testcases/0.5/evalenv_transform_scale.pickle b/pickle_testcases/0.5/evalenv_transform_scale.pickle index bd2ea177a20ad1aaa82fd26528e4bc1e69e71008..1d48cbd684bd13abf9c9d4aa0e85da001e5316b3 100644 GIT binary patch delta 18 ZcmWHyVQFBQYB-U_kUi0xp?yk{9sn(f1qJ{B delta 45 zcmcBsXK7%W>NAnWP^pKtI5{yVbxIF+L1Ia9rCxDKVoB Date: Sat, 8 Apr 2017 07:07:52 +0000 Subject: [PATCH 17/17] Getting closer on pickling * Now run subset and clean the eval_env after removing bare funcalls * Had some straggling tuples * Altered test so that stateful_transforms won't be saved in eval_env * Removed the clean method on EvalEnvironment --- patsy/contrasts.py | 17 ++--- patsy/design_info.py | 6 -- patsy/eval.py | 64 ++++++------------ patsy/test_pickling.py | 13 ---- patsy/util.py | 2 +- pickle_testcases/0.5/contrast_matrix.pickle | Bin 224 -> 279 bytes pickle_testcases/0.5/evalenv_pickle.pickle | Bin 128 -> 0 bytes pickle_testcases/0.5/evalenv_simple.pickle | Bin 67 -> 0 bytes .../0.5/evalenv_transform_bs.pickle | Bin 60 -> 0 bytes .../0.5/evalenv_transform_categorical.pickle | Bin 89 -> 0 bytes .../0.5/evalenv_transform_cc.pickle | Bin 60 -> 0 bytes .../0.5/evalenv_transform_center.pickle | Bin 60 -> 0 bytes .../0.5/evalenv_transform_cr.pickle | Bin 60 -> 0 bytes .../0.5/evalenv_transform_scale.pickle | Bin 60 -> 0 bytes .../0.5/evalenv_transform_standardize.pickle | Bin 60 -> 0 bytes .../0.5/evalenv_transform_te.pickle | Bin 60 -> 0 bytes pickle_testcases/0.5/subterm_info.pickle | Bin 361 -> 392 bytes pickle_testcases/0.5/transform_center.pickle | Bin 218 -> 227 bytes .../transform_standardize_norescale.pickle | Bin 287 -> 304 bytes .../0.5/transform_standardize_rescale.pickle | Bin 287 -> 304 bytes .../0.5/varlookupdict_simple.pickle | Bin 80 -> 0 bytes 21 files changed, 26 insertions(+), 76 deletions(-) delete mode 100644 pickle_testcases/0.5/evalenv_pickle.pickle delete mode 100644 pickle_testcases/0.5/evalenv_simple.pickle delete mode 100644 pickle_testcases/0.5/evalenv_transform_bs.pickle delete mode 100644 pickle_testcases/0.5/evalenv_transform_categorical.pickle delete mode 100644 pickle_testcases/0.5/evalenv_transform_cc.pickle delete mode 100644 pickle_testcases/0.5/evalenv_transform_center.pickle delete mode 100644 pickle_testcases/0.5/evalenv_transform_cr.pickle delete mode 100644 pickle_testcases/0.5/evalenv_transform_scale.pickle delete mode 100644 pickle_testcases/0.5/evalenv_transform_standardize.pickle delete mode 100644 pickle_testcases/0.5/evalenv_transform_te.pickle delete mode 100644 pickle_testcases/0.5/varlookupdict_simple.pickle diff --git a/patsy/contrasts.py b/patsy/contrasts.py index 4173f2e..4d13d20 100644 --- a/patsy/contrasts.py +++ b/patsy/contrasts.py @@ -49,20 +49,13 @@ def _repr_pretty_(self, p, cycle): def __getstate__(self): - return (0, self.matrix, self.column_suffixes) + return {'version': 0, 'matrix': self.matrix, + 'column_suffixes': self.column_suffixes} def __setstate__(self, pickle): - version, matrix, column_suffixes = pickle - check_pickle_version(version, 0, name=self.__class__.__name__) - self.matrix = matrix - self.column_suffixes = column_suffixes - - def __eq__(self, other): - if self.column_suffixes != other.column_suffixes: - return False - if not np.array_equal(self.matrix, other.matrix): - return False - return True + check_pickle_version(pickle['version'], 0, name=self.__class__.__name__) + self.matrix = pickle['matrix'] + self.column_suffixes = pickle['column_suffixes'] def test_ContrastMatrix(): diff --git a/patsy/design_info.py b/patsy/design_info.py index 885503f..b6e2fa9 100644 --- a/patsy/design_info.py +++ b/patsy/design_info.py @@ -737,12 +737,6 @@ def __init__(self, name): def name(self): return self._name - def __eq__(self, other): - return self.__dict__ == other.__dict__ - - def __hash__(self): - return hash((_MockFactor, str(self._name))) - def test_DesignInfo(): from nose.tools import assert_raises diff --git a/patsy/eval.py b/patsy/eval.py index c853988..cab3dc6 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -25,6 +25,7 @@ from patsy.tokens import (pretty_untokenize, normalize_token_spacing, python_tokenize) from patsy.compat import call_and_wrap_exc +from nose.tools import assert_raises def _all_future_flags(): flags = 0 @@ -71,13 +72,8 @@ def get(self, key, default=None): def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._dicts) - def __getstate__(self): - return (0, self._dicts) + __getstate__ = no_pickling - def __setstate__(self, pickle): - version, dicts = pickle - check_pickle_version(version, 0, name=self.__class__.__name__) - self._dicts = dicts def test_VarLookupDict(): d1 = {"a": 1} @@ -262,27 +258,15 @@ def __hash__(self): tuple(self._namespace_ids()))) def __getstate__(self): - self.clean() + # self.clean() namespaces = self._namespaces namespaces = _replace_un_pickleable(namespaces) - return (0, namespaces, self.flags) + return {'version': 0, 'namespaces': namespaces, 'flags': self.flags} def __setstate__(self, pickle): - version, namespaces, flags = pickle - check_pickle_version(version, 0, self.__class__.__name__) - self.flags = flags - self._namespaces = _return_un_pickleable(namespaces) - - def clean(self): - """The EvalEnvironment doesn't need the stateful transformation - functions once the design matrix has been built. This will delete - it. Called by __getstate__ to prepare for pickling.""" - namespaces = [] - for namespace in self._namespaces: - ns = {key: namespace[key] for key in six.iterkeys(namespace) if not - hasattr(namespace[key], '__patsy_stateful_transform__')} - namespaces.append(ns) - self._namespaces = namespaces + check_pickle_version(pickle['version'], 0, self.__class__.__name__) + self.flags = pickle['flags'] + self._namespaces = _return_un_pickleable(pickle['namespaces']) class ObjectHolder(object): @@ -504,21 +488,6 @@ def test_EvalEnvironment_eq(): assert env3 != env4 -def test_EvalEnvironment_clean(): - from patsy.state import center, standardize - from patsy.splines import bs - - env1 = EvalEnvironment([{'center': center}]) - env2 = EvalEnvironment([{'standardize': standardize}]) - env3 = EvalEnvironment([{'bs': bs}]) - env1.clean() - env2.clean() - env3.clean() - - env1._namespaces == [{}] - env2._namespaces == [{}] - env3._namespaces == [{}] - _builtins_dict = {} six.exec_("from patsy.builtins import *", {}, _builtins_dict) # This is purely to make the existence of patsy.builtins visible to systems @@ -576,10 +545,6 @@ def memorize_passes_needed(self, state, eval_env): eval_env = eval_env.with_outer_namespace(_builtins_dict) env_namespace = eval_env.namespace - subset_names = [name for name in ast_names(self.code) - if name in env_namespace] - eval_env = eval_env.subset(subset_names) - state["eval_env"] = eval_env # example code: == "2 * center(x)" i = [0] @@ -596,6 +561,12 @@ def new_name_maker(token): # example eval_code: == "2 * _patsy_stobj0__center__.transform(x)" eval_code = replace_bare_funcalls(self.code, new_name_maker) state["eval_code"] = eval_code + + subset_names = [name for name in ast_names(eval_code) + if name in env_namespace] + eval_env = eval_env.subset(subset_names) + state["eval_env"] = eval_env + # paranoia: verify that none of our new names appeared anywhere in the # original code if has_bare_variable_reference(state["transforms"], self.code): @@ -716,7 +687,10 @@ def test_EvalFactor_memorize_passes_needed(): print(state) assert passes == 2 for name in ["foo", "bar", "quux"]: - assert state["eval_env"].namespace[name] is locals()[name] + # name should be locally defined, but since its a stateful_transform, + # its unnecessary to keep it in eval_env + assert name in locals() + assert_raises(KeyError, state["eval_env"].namespace.__getitem__, name) for name in ["w", "x", "y", "z", "e", "state"]: assert name not in state["eval_env"].namespace assert state["transforms"] == {"_patsy_stobj0__foo__": "FOO-OBJ", @@ -772,7 +746,9 @@ def test_EvalFactor_end_to_end(): print(passes) print(state) assert passes == 2 - assert state["eval_env"].namespace["foo"] is foo + # We don't want to save the stateful transforms in the eval_env, actually. + # Just + assert_raises(KeyError, state["eval_env"].namespace.__getitem__, 'foo') for name in ["x", "y", "e", "state"]: assert name not in state["eval_env"].namespace import numpy as np diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index fb34667..b666d6d 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -42,19 +42,6 @@ def _unwrap_stateful_function(function, *args, **kwargs): pickling_testcases = { "evalfactor_simple": EvalFactor("a+b"), - "varlookupdict_simple": VarLookupDict([{"a": 1}, {"a": 2, "b": 3}]), - "evalenv_simple": EvalEnvironment([{"a": 1}]), - "evalenv_transform_center": EvalEnvironment([{'center': center}]), - "evalenv_transform_scale": EvalEnvironment([{'scale': scale}]), - "evalenv_transform_standardize": EvalEnvironment([{ - 'standardize': standardize - }]), - "evalenv_transform_categorical": EvalEnvironment([{'C': C}]), - "evalenv_transform_bs": EvalEnvironment([{'cs': bs}]), - "evalenv_transform_te": EvalEnvironment([{'te': te}]), - "evalenv_transform_cr": EvalEnvironment([{'cs': cr}]), - "evalenv_transform_cc": EvalEnvironment([{'cc': cc}]), - "evalenv_pickle": EvalEnvironment([{'np': np}]), "term": Term([1, 2, 1]), "contrast_matrix": ContrastMatrix([[1, 0], [0, 1]], ["a", "b"]), "subterm_info": si, diff --git a/patsy/util.py b/patsy/util.py index 70909c8..afcbc0c 100644 --- a/patsy/util.py +++ b/patsy/util.py @@ -29,7 +29,7 @@ import numpy as np import six from six.moves import cStringIO as StringIO -from patsy.compat import optional_dep_ok +from .compat import optional_dep_ok try: import pandas diff --git a/pickle_testcases/0.5/contrast_matrix.pickle b/pickle_testcases/0.5/contrast_matrix.pickle index 0d63acd1d751148ab69e119a9c00cdade3449416..44ac6f109fa5789bbd4e5cffafcf05def4d9ef4d 100644 GIT binary patch delta 131 zcmaFBIGu^Lfn_QW<3v_HiP|X|J?v$vMa7xQeqL^BUdfcnQ#2c=cr(OKsh!fp zlvgk%gQ17VKPf9Uxx^zsCndEAqM&w)Mh{DNW?ssa9=6>4l+v8kDH%LHta+um1(j2J VSn?8cfdXu$NyUlY4DC~r^Z<%&FTnr+ diff --git a/pickle_testcases/0.5/evalenv_simple.pickle b/pickle_testcases/0.5/evalenv_simple.pickle deleted file mode 100644 index 7b8258a0eb3d9eb7b09ce279adf20ab2bc0d595e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 67 zcmZo*nQFlR0XQeqL^BUdfcnQ#2c=cr(OKsh!fp Rm^j6ou{hD2p?yk{9stk*7a0Hm diff --git a/pickle_testcases/0.5/evalenv_transform_bs.pickle b/pickle_testcases/0.5/evalenv_transform_bs.pickle deleted file mode 100644 index 1d48cbd684bd13abf9c9d4aa0e85da001e5316b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 60 zcmZo*nQF)Y0XQeqL^BUdfcnQ#2c=cr(OKshyJO K&Cot2Ne=)szZB5` diff --git a/pickle_testcases/0.5/evalenv_transform_categorical.pickle b/pickle_testcases/0.5/evalenv_transform_categorical.pickle deleted file mode 100644 index 33d403c63b468c16bd43a4febbf3b4d614236184..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 89 zcmZo*nd-*?0XQeqL^BUdfcnQ#2c=cr(OKsh!fp i=scxI5TZIcu_QG;zbG>qs40VO@|5C4Z-(|MNqPWF3m|9! diff --git a/pickle_testcases/0.5/evalenv_transform_cc.pickle b/pickle_testcases/0.5/evalenv_transform_cc.pickle deleted file mode 100644 index 1d48cbd684bd13abf9c9d4aa0e85da001e5316b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 60 zcmZo*nQF)Y0XQeqL^BUdfcnQ#2c=cr(OKshyJO K&Cot2Ne=)szZB5` diff --git a/pickle_testcases/0.5/evalenv_transform_center.pickle b/pickle_testcases/0.5/evalenv_transform_center.pickle deleted file mode 100644 index 1d48cbd684bd13abf9c9d4aa0e85da001e5316b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 60 zcmZo*nQF)Y0XQeqL^BUdfcnQ#2c=cr(OKshyJO K&Cot2Ne=)szZB5` diff --git a/pickle_testcases/0.5/evalenv_transform_cr.pickle b/pickle_testcases/0.5/evalenv_transform_cr.pickle deleted file mode 100644 index 1d48cbd684bd13abf9c9d4aa0e85da001e5316b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 60 zcmZo*nQF)Y0XQeqL^BUdfcnQ#2c=cr(OKshyJO K&Cot2Ne=)szZB5` diff --git a/pickle_testcases/0.5/evalenv_transform_scale.pickle b/pickle_testcases/0.5/evalenv_transform_scale.pickle deleted file mode 100644 index 1d48cbd684bd13abf9c9d4aa0e85da001e5316b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 60 zcmZo*nQF)Y0XQeqL^BUdfcnQ#2c=cr(OKshyJO K&Cot2Ne=)szZB5` diff --git a/pickle_testcases/0.5/evalenv_transform_standardize.pickle b/pickle_testcases/0.5/evalenv_transform_standardize.pickle deleted file mode 100644 index 1d48cbd684bd13abf9c9d4aa0e85da001e5316b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 60 zcmZo*nQF)Y0XQeqL^BUdfcnQ#2c=cr(OKshyJO K&Cot2Ne=)szZB5` diff --git a/pickle_testcases/0.5/evalenv_transform_te.pickle b/pickle_testcases/0.5/evalenv_transform_te.pickle deleted file mode 100644 index 1d48cbd684bd13abf9c9d4aa0e85da001e5316b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 60 zcmZo*nQF)Y0XQeqL^BUdfcnQ#2c=cr(OKshyJO K&Cot2Ne=)szZB5` diff --git a/pickle_testcases/0.5/subterm_info.pickle b/pickle_testcases/0.5/subterm_info.pickle index 3b301ff634be2cec051232ba5da05a3ec019bf2d..a09808c748731b4f2978ff24db6cb65fa08dc137 100644 GIT binary patch delta 94 zcmaFK)WOWsz%sRVBFjX9+9?_ttlkVgY`KXgMVS>-CQc7#6rK1w+@gm+IX|Z~H!r@p tG%YQ&BDHu*>=cb2=4iv%DL~RFc1mh#QgIJ=UTJPTL}Bq1Z delta 63 zcmeBRe#ykrz%n&%BFjW3Z-$9WgBgV<{tQ=-oubjh9Bmjo1xOmjPDyQ_l2qKoomZM0 SpPZjlnwwWV#haxxNe=+N*A~D4 diff --git a/pickle_testcases/0.5/transform_center.pickle b/pickle_testcases/0.5/transform_center.pickle index f98f8d26057ba5e397af150337c1f414b72e1874..369a585eb2a26181f807a788eccd5374652093ab 100644 GIT binary patch delta 55 zcmcb`_?VHUfo1BAi7ZoDn9~f+CeBe&5>Pnw9|-R6;&fmDgDE9bl6qK^^Gowerg*26 HCg}kHP%ISo delta 46 zcmaFNc#Dyxfo1CXi7Zo@nbIsK&Q+1(`0*bIqNh7dDVdVg!3B~y}0lk@3B~y}0lk@p^7FGx3tTdjOQuYoqS-jbn;~|JM(vc^ gDLssdQ@k0AYo};ruzNH0FeXj$W-d)_o06ml02jF%S^xk5