From 470c997f4f2bbd0381e7ed88edd8c43b2ab2498a Mon Sep 17 00:00:00 2001 From: thequackdaddy Date: Wed, 28 Dec 2016 22:25:26 -0600 Subject: [PATCH 1/6] ENH: Support for var_names which are missing from environment --- patsy/build.py | 35 +++++++++++++++++++++++++---------- patsy/desc.py | 17 ++++++++++++++++- patsy/eval.py | 26 ++++++++++++++++++++++++++ patsy/test_build.py | 28 ++++++++++++++++++++++++++++ patsy/user_util.py | 4 ++++ 5 files changed, 99 insertions(+), 11 deletions(-) diff --git a/patsy/build.py b/patsy/build.py index 470a83d..fca541f 100644 --- a/patsy/build.py +++ b/patsy/build.py @@ -344,7 +344,7 @@ def test__subterm_column_names_iter_and__build_subterm(): mat3) assert np.allclose(mat3, 1) -def _factors_memorize(factors, data_iter_maker, eval_env): +def _factors_memorize(factors, data_iter_maker, eval_env, var_names): # First, start off the memorization process by setting up each factor's # state and finding out how many passes it will need: factor_states = {} @@ -362,7 +362,7 @@ def _factors_memorize(factors, data_iter_maker, eval_env): memorize_needed.add(factor) which_pass = 0 while memorize_needed: - for data in data_iter_maker(): + for data in safe_data_maker(data_iter_maker, var_names): for factor in memorize_needed: state = factor_states[factor] factor.memorize_chunk(state, which_pass, data) @@ -373,6 +373,15 @@ def _factors_memorize(factors, data_iter_maker, eval_env): which_pass += 1 return factor_states + +def safe_data_maker(data_iter_maker, var_names): + var_names = list(var_names) + try: + return data_iter_maker(var_names) + except: + return data_iter_maker() + + def test__factors_memorize(): class MockFactor(object): def __init__(self, requested_passes, token): @@ -408,7 +417,7 @@ def __call__(self): f1 = MockFactor(1, "f1") f2a = MockFactor(2, "f2a") f2b = MockFactor(2, "f2b") - factor_states = _factors_memorize(set([f0, f1, f2a, f2b]), data, {}) + factor_states = _factors_memorize(set([f0, f1, f2a, f2b]), data, {}, []) assert data.calls == 2 mem_chunks0 = [("memorize_chunk", 0)] * data.CHUNKS mem_chunks1 = [("memorize_chunk", 1)] * data.CHUNKS @@ -434,11 +443,12 @@ def __call__(self): } assert factor_states == expected -def _examine_factor_types(factors, factor_states, data_iter_maker, NA_action): +def _examine_factor_types(factors, factor_states, data_iter_maker, NA_action, + var_names): num_column_counts = {} cat_sniffers = {} examine_needed = set(factors) - for data in data_iter_maker(): + for data in safe_data_maker(data_iter_maker, var_names): for factor in list(examine_needed): value = factor.eval(factor_states[factor], data) if factor in cat_sniffers or guess_categorical(value): @@ -519,9 +529,10 @@ def next(self): } it = DataIterMaker() + var_names = [] (num_column_counts, cat_levels_contrasts, ) = _examine_factor_types(factor_states.keys(), factor_states, it, - NAAction()) + NAAction(), var_names) assert it.i == 2 iterations = 0 assert num_column_counts == {num_1dim: 1, num_1col: 1, num_4col: 4} @@ -537,7 +548,7 @@ def next(self): no_read_necessary = [num_1dim, num_1col, num_4col, categ_1col, bool_1col] (num_column_counts, cat_levels_contrasts, ) = _examine_factor_types(no_read_necessary, factor_states, it, - NAAction()) + NAAction(), var_names) assert it.i == 0 assert num_column_counts == {num_1dim: 1, num_1col: 1, num_4col: 4} assert cat_levels_contrasts == { @@ -562,7 +573,7 @@ def next(self): it = DataIterMaker() try: _examine_factor_types([illegal_factor], illegal_factor_states, it, - NAAction()) + NAAction(), var_names) except PatsyError as e: assert e.origin is illegal_factor.origin else: @@ -686,14 +697,18 @@ def design_matrix_builders(termlists, data_iter_maker, eval_env, for termlist in termlists: for term in termlist: all_factors.update(term.factors) - factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env) + var_names = {i for f in all_factors + for i in f.var_names(eval_env=eval_env)} + factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env, + var_names) # Now all the factors have working eval methods, so we can evaluate them # on some data to find out what type of data they return. (num_column_counts, cat_levels_contrasts) = _examine_factor_types(all_factors, factor_states, data_iter_maker, - NA_action) + NA_action, + var_names) # Now we need the factor infos, which encapsulate the knowledge of # how to turn any given factor into a chunk of data: factor_infos = {} diff --git a/patsy/desc.py b/patsy/desc.py index 8842b8b..0f80941 100644 --- a/patsy/desc.py +++ b/patsy/desc.py @@ -65,6 +65,15 @@ def name(self): else: return "Intercept" + def var_names(self, eval_env=0): + if not eval_env: + eval_env = EvalEnvironment.capture(0) + if self.factors: + return {i for f in self.factors + for i in f.var_names(eval_env=eval_env)} + else: + return {} + __getstate__ = no_pickling INTERCEPT = Term([]) @@ -76,6 +85,9 @@ def __init__(self, name): def name(self): return self._name + def var_names(self, eval_env=0): + return {'{}_var'.format(self._name)} + def test_Term(): assert Term([1, 2, 1]).factors == (1, 2) assert Term([1, 2]) == Term([2, 1]) @@ -85,6 +97,9 @@ def test_Term(): assert Term([f1, f2]).name() == "a:b" assert Term([f2, f1]).name() == "b:a" assert Term([]).name() == "Intercept" + assert Term([f1]).var_names() == {'a_var'} + assert Term([f1, f2]).var_names() == {'a_var', 'b_var'} + assert Term([]).var_names() == {} assert_no_pickling(Term([])) @@ -148,7 +163,7 @@ def term_code(term): if term != INTERCEPT] result += " + ".join(term_names) return result - + @classmethod def from_formula(cls, tree_or_string): """Construct a :class:`ModelDesc` from a formula string. diff --git a/patsy/eval.py b/patsy/eval.py index d4ed83f..9d54ce0 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -448,6 +448,15 @@ def __init__(self, code, origin=None): self.code = normalize_token_spacing(code) self.origin = origin + def var_names(self, eval_env=0): + if not eval_env: + eval_env = EvalEnvironment.capture(eval_env) + eval_env = eval_env.with_outer_namespace(_builtins_dict) + env_namespace = eval_env.namespace + names = set(name for name in ast_names(self.code) + if name not in env_namespace) + return names + def name(self): return self.code @@ -691,6 +700,23 @@ def test_EvalFactor_end_to_end(): "y": np.array([10, 11, 100, 3])}) == [254, 256, 355, 236]) + +def test_EvalFactor_varnames(): + e = EvalFactor('a + b') + assert e.var_names() == {'a', 'b'} + from patsy.state import stateful_transform + + class bar(object): + pass + + foo = stateful_transform(lambda: "FOO-OBJ") + zed = stateful_transform(lambda: "ZED-OBJ") + bah = stateful_transform(lambda: "BAH-OBJ") + eval_env = EvalEnvironment.capture(0) + e = EvalFactor('foo(a) + bar.qux(b) + zed(bah(c))+ d') + assert e.var_names(eval_env=eval_env) == {'a', 'b', 'c', 'd'} + + def annotated_tokens(code): prev_was_dot = False it = PushbackAdapter(python_tokenize(code)) diff --git a/patsy/test_build.py b/patsy/test_build.py index c843f9f..6a4f782 100644 --- a/patsy/test_build.py +++ b/patsy/test_build.py @@ -740,3 +740,31 @@ def t(which_terms, variables, columns): min_di_subset = min_di.subset(["c", "a"]) assert min_di_subset.column_names == ["c", "a"] assert min_di_subset.terms is None + + +def test_safe_data_maker(): + from patsy.build import safe_data_maker + if not have_pandas: + return + from pandas.util.testing import assert_frame_equal + data = pandas.DataFrame({'a': [1, 2, 3], + 'b': [4, 5, 6], + 'c': [7, 8, 9]}) + + def iter_maker(): + for i in range(0, 3, 2): + yield data.iloc[i:i+2] + d = safe_data_maker(iter_maker, ['a', 'b']) + d2 = next(d) + assert_frame_equal(d2, data.iloc[:2]) + d2 = next(d) + assert_frame_equal(d2, data.iloc[2:]) + + def iter_maker(var_names): + for i in range(0, 3, 2): + yield data[var_names].iloc[i:i+2] + d = safe_data_maker(iter_maker, ['a', 'b']) + d2 = next(d) + assert_frame_equal(d2, data[['a', 'b']].iloc[:2]) + d2 = next(d) + assert_frame_equal(d2, data[['a', 'b']].iloc[2:]) diff --git a/patsy/user_util.py b/patsy/user_util.py index b0aa7e8..bf8746e 100644 --- a/patsy/user_util.py +++ b/patsy/user_util.py @@ -183,6 +183,9 @@ def __init__(self, varname, def name(self): return self._varname + def var_names(self, eval_env=0): + return {'{}_var'.format(self._varname)} + def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._varname) @@ -220,6 +223,7 @@ def eval(self, memorize_state, data): def test_LookupFactor(): l_a = LookupFactor("a") assert l_a.name() == "a" + assert l_a.var_names() == {'a_var'} assert l_a == LookupFactor("a") assert l_a != LookupFactor("b") assert hash(l_a) == hash(LookupFactor("a")) From 2e94be592a0f294664a927e2f7b739f08da15d53 Mon Sep 17 00:00:00 2001 From: thequackdaddy Date: Thu, 29 Dec 2016 15:05:51 -0600 Subject: [PATCH 2/6] DOC: Fixes --- patsy/build.py | 5 ++++- patsy/desc.py | 17 ++++++++++++++++- patsy/design_info.py | 33 +++++++++++++++++++++++++++++++++ patsy/eval.py | 18 +++++++++++++++++- 4 files changed, 70 insertions(+), 3 deletions(-) diff --git a/patsy/build.py b/patsy/build.py index fca541f..74b13aa 100644 --- a/patsy/build.py +++ b/patsy/build.py @@ -375,10 +375,13 @@ def _factors_memorize(factors, data_iter_maker, eval_env, var_names): def safe_data_maker(data_iter_maker, var_names): + """Safely test if the `data_iter_maker` can accept var_names as a + parameter. + """ var_names = list(var_names) try: return data_iter_maker(var_names) - except: + except TypeError: return data_iter_maker() diff --git a/patsy/desc.py b/patsy/desc.py index 0f80941..40d0bbf 100644 --- a/patsy/desc.py +++ b/patsy/desc.py @@ -66,8 +66,23 @@ def name(self): return "Intercept" def var_names(self, eval_env=0): + """Returns a set of variable names that are used in the :class:`Term`, + but not available in the current evalulation environment. These are + likely to be provided by data. + + :arg eval_env: Either a :class:`EvalEnvironment` which will be used to + look up any variables referenced in the :class:`Term` that cannot be + found in :class:`EvalEnvironment`, or else a depth represented as an + integer which will be passed to :meth:`EvalEnvironment.capture`. + ``eval_env=0`` means to use the context of the function calling + :meth:`var_names` for lookups. If calling this function from a + library, you probably want ``eval_env=1``, which means that variables + should be resolved in *your* caller's namespace. + + :returns: A set of strings of the potential variable names. + """ if not eval_env: - eval_env = EvalEnvironment.capture(0) + eval_env = EvalEnvironment.capture(eval_env, reference=1) if self.factors: return {i for f in self.factors for i in f.var_names(eval_env=eval_env)} diff --git a/patsy/design_info.py b/patsy/design_info.py index 438a23c..f4b5822 100644 --- a/patsy/design_info.py +++ b/patsy/design_info.py @@ -659,6 +659,31 @@ def subset(self, which_terms): factor_infos=new_factor_infos, term_codings=new_term_codings) + def var_names(self, eval_env=0): + """Returns a set of variable names that are used in the + :class:`DesignInfo`, but not available in the current evalulation + environment. These are likely to be provided by data. + + :arg eval_env: Either a :class:`EvalEnvironment` which will be used to + look up any variables referenced in the :class:`DesignInfo` that + cannot be found in :class:`EvalEnvironment`, or else a depth + represented as an integer which will be passed to + :meth:`EvalEnvironment.capture`. ``eval_env=0`` means to use the + context of the function calling :meth:`var_names` for lookups. + If calling this function from a library, you probably want + ``eval_env=1``, which means that variables should be resolved in + *your* caller's namespace. + + :returns: A set of strings of the potential variable names. + """ + if not eval_env: + from patsy.eval import EvalEnvironment + eval_env = EvalEnvironment.capture(eval_env, reference=1) + if self.terms: + return {i for t in self.terms for i in t.var_names(eval_env)} + else: + return {} + @classmethod def from_array(cls, array_like, default_column_prefix="column"): """Find or construct a DesignInfo appropriate for a given array_like. @@ -701,6 +726,10 @@ def __init__(self, name): def name(self): return self._name + + def var_names(self, eval_env=0): + return {'{}_var'.format(self._name)} + f_x = _MockFactor("x") f_y = _MockFactor("y") t_x = Term([f_x]) @@ -735,6 +764,8 @@ def name(self): # smoke test repr(di) + assert di.var_names() == {'x_var', 'y_var'} + assert_no_pickling(di) # One without term objects @@ -756,6 +787,8 @@ def name(self): assert di.slice("a3") == slice(2, 3) assert di.slice("b") == slice(3, 4) + assert di.var_names() == {} + # Check intercept handling in describe() assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b" diff --git a/patsy/eval.py b/patsy/eval.py index 9d54ce0..a9c7d66 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -449,8 +449,24 @@ def __init__(self, code, origin=None): self.origin = origin def var_names(self, eval_env=0): + """Returns a set of variable names that are used in the + :class:`EvalFactor`, but not available in the current evalulation + environment. These are likely to be provided by data. + + :arg eval_env: Either a :class:`EvalEnvironment` which will be used to + look up any variables referenced in the :class:`EvalFactor` that + cannot be found in :class:`EvalEnvironment`, or else a depth + represented as an integer which will be passed to + :meth:`EvalEnvironment.capture`. ``eval_env=0`` means to use the + context of the function calling :meth:`var_names` for lookups. + If calling this function from a library, you probably want + ``eval_env=1``, which means that variables should be resolved in + *your* caller's namespace. + + :returns: A set of strings of the potential variable names. + """ if not eval_env: - eval_env = EvalEnvironment.capture(eval_env) + eval_env = EvalEnvironment.capture(eval_env, reference=1) eval_env = eval_env.with_outer_namespace(_builtins_dict) env_namespace = eval_env.namespace names = set(name for name in ast_names(self.code) From 5f662a9b40e063124b8c7256ddcb3ea94714d6a3 Mon Sep 17 00:00:00 2001 From: thequackdaddy Date: Sat, 4 Mar 2017 16:15:40 -0600 Subject: [PATCH 3/6] Added partial function --- patsy/design_info.py | 103 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/patsy/design_info.py b/patsy/design_info.py index f4b5822..2f98968 100644 --- a/patsy/design_info.py +++ b/patsy/design_info.py @@ -36,6 +36,7 @@ from patsy.constraint import linear_constraint from patsy.contrasts import ContrastMatrix from patsy.desc import ModelDesc, Term +from collections import OrderedDict class FactorInfo(object): """A FactorInfo object is a simple class that provides some metadata about @@ -684,6 +685,49 @@ def var_names(self, eval_env=0): else: return {} + def partial(self, columns, product=False): + """Returns a partial prediction array where only the variables in the + dict ``columns`` are tranformed per the :class:`DesignInfo` + transformations. The terms that are not influenced by ``columns`` + return as zero. + + This is useful to perform a partial prediction on unseen data and to + view marginal differences in factors. + + :arg columns: A dict with the keys as the column names for the marginal + predictions desired and values as the marginal values to be predicted. + + :arg product: When `True`, the resturned numpy array represents the + Cartesian product of the values ``columns``. + + :returns: A numpy array of the partial design matrix. + """ + from .highlevel import dmatrix + if product: + columns = _column_product(columns) + rows = None + for col in columns: + if rows and rows != len(columns[col]): + raise ValueError('all columns must be of same length') + rows = len(columns[col]) + parts = [] + for term, subterm in six.iteritems(self.term_codings): + term_vars = term.var_names() + present = True + for term_var in term_vars: + if term_var not in columns: + present = False + if present and (term.name() != 'Intercept'): + # This seems like an inelegent way to not having the Intercept + # in the output + di = self.subset('0 + {}'.format(term.name())) + parts.append(dmatrix(di, columns)) + else: + num_columns = np.sum(s.num_columns for s in subterm) + dm = np.zeros((rows, num_columns)) + parts.append(dm) + return np.hstack(parts) + @classmethod def from_array(cls, array_like, default_column_prefix="column"): """Find or construct a DesignInfo appropriate for a given array_like. @@ -1230,3 +1274,62 @@ def test_design_matrix(): repr(DesignMatrix(np.zeros((1, 0)))) repr(DesignMatrix(np.zeros((0, 1)))) repr(DesignMatrix(np.zeros((0, 0)))) + + +def test_DesignInfo_partial(): + from .highlevel import dmatrix + from numpy.testing import assert_allclose + a = np.array(['a', 'b', 'a', 'b', 'a', 'a', 'b', 'a']) + b = np.array([1, 3, 2, 4, 1, 3, 1, 1]) + c = np.array([4, 3, 2, 1, 6, 4, 2, 1]) + dm = dmatrix('a + bs(b, df=3, degree=3) + np.log(c)') + x = np.zeros((3, 6)) + x[1, 1] = 1 + y = dm.design_info.partial({'a': ['a', 'b', 'a']}) + assert_allclose(x, y) + + x = np.zeros((2, 6)) + x[1, 1] = 1 + x[1, 5] = np.log(3) + p = OrderedDict([('a', ['a', 'b']), ('c', [1, 3])]) + y = dm.design_info.partial(p) + assert_allclose(x, y) + + x = np.zeros((4, 6)) + x[2, 1] = 1 + x[3, 1] = 1 + x[1, 5] = np.log(3) + x[3, 5] = np.log(3) + y = dm.design_info.partial(p, product=True) + assert_allclose(x, y) + + dm = dmatrix('a * c') + y = dm.design_info.partial(p) + x = np.array([[0, 0, 1, 0], [0, 1, 3, 3]]) + assert_allclose(x, y) + + from nose.tools import assert_raises + assert_raises(ValueError, dm.design_info.partial, {'a': ['a', 'b'], + 'b': [1, 2, 3]}) + + +def _column_product(columns): + from itertools import product + cols = [] + values = [] + for col, value in six.iteritems(columns): + cols.append(col) + values.append(value) + values = [value for value in product(*values)] + values = [value for value in zip(*values)] + return OrderedDict([(col, list(value)) + for col, value in zip(cols, values)]) + + +def test_column_product(): + x = OrderedDict([('a', [1, 2, 3]), ('b', ['a', 'b'])]) + y = OrderedDict([('a', [1, 1, 2, 2, 3, 3]), + ('b', ['a', 'b', 'a', 'b', 'a', 'b'])]) + x = _column_product(x) + assert x['a'] == y['a'] + assert x['b'] == y['b'] From 807cc93ee0fa1603394bd9ece7887f5381d52c2c Mon Sep 17 00:00:00 2001 From: thequackdaddy Date: Sat, 8 Apr 2017 18:53:05 -0500 Subject: [PATCH 4/6] Added logic to handle modules and user-defined functions --- patsy/design_info.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/patsy/design_info.py b/patsy/design_info.py index 2f98968..7cd74a0 100644 --- a/patsy/design_info.py +++ b/patsy/design_info.py @@ -685,7 +685,7 @@ def var_names(self, eval_env=0): else: return {} - def partial(self, columns, product=False): + def partial(self, columns, product=False, eval_env=0): """Returns a partial prediction array where only the variables in the dict ``columns`` are tranformed per the :class:`DesignInfo` transformations. The terms that are not influenced by ``columns`` @@ -703,6 +703,18 @@ def partial(self, columns, product=False): :returns: A numpy array of the partial design matrix. """ from .highlevel import dmatrix + from types import ModuleType + + if not eval_env: + from patsy.eval import EvalEnvironment + eval_env = EvalEnvironment.capture(eval_env, reference=1) + + # We need to get rid of the non-callable items from the eval_env + namespaces = [{key: value} for ns in eval_env._namespaces + for key, value in six.iteritems(ns) + if callable(value) or isinstance(value, ModuleType)] + eval_env._namespaces = namespaces + if product: columns = _column_product(columns) rows = None @@ -712,7 +724,7 @@ def partial(self, columns, product=False): rows = len(columns[col]) parts = [] for term, subterm in six.iteritems(self.term_codings): - term_vars = term.var_names() + term_vars = term.var_names(eval_env) present = True for term_var in term_vars: if term_var not in columns: @@ -1312,6 +1324,16 @@ def test_DesignInfo_partial(): assert_raises(ValueError, dm.design_info.partial, {'a': ['a', 'b'], 'b': [1, 2, 3]}) + def some_function(x): + return np.where(x > 2, 1, 2) + + dm = dmatrix('1 + some_function(c)') + x = np.array([[0, 2], + [0, 2], + [0, 1]]) + y = dm.design_info.partial({'c': np.array([1, 2, 3])}) + assert_allclose(x, y) + def _column_product(columns): from itertools import product From ac612d084cfd5cfc7c4b620fa072b5d188e8f2a7 Mon Sep 17 00:00:00 2001 From: thequackdaddy Date: Sat, 3 Nov 2018 14:16:04 -0500 Subject: [PATCH 5/6] Use sum instead of np.sum on a generator --- patsy/design_info.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/patsy/design_info.py b/patsy/design_info.py index 7cd74a0..75e84b9 100644 --- a/patsy/design_info.py +++ b/patsy/design_info.py @@ -735,7 +735,7 @@ def partial(self, columns, product=False, eval_env=0): di = self.subset('0 + {}'.format(term.name())) parts.append(dmatrix(di, columns)) else: - num_columns = np.sum(s.num_columns for s in subterm) + num_columns = sum(s.num_columns for s in subterm) dm = np.zeros((rows, num_columns)) parts.append(dm) return np.hstack(parts) @@ -1063,7 +1063,7 @@ def _format_float_column(precision, col): else: break return col_strs - + def test__format_float_column(): def t(precision, numbers, expected): got = _format_float_column(precision, np.asarray(numbers)) @@ -1188,7 +1188,7 @@ def max_width(col): + np.sum(column_widths)) print_numbers = (total_width < MAX_TOTAL_WIDTH) else: - print_numbers = False + print_numbers = False p.begin_group(INDENT, "DesignMatrix with shape %s" % (self.shape,)) p.breakable("\n" + " " * p.indentation) From 544effd68909f8beea21789799018aff66fcd4d5 Mon Sep 17 00:00:00 2001 From: thequackdaddy Date: Sat, 3 Nov 2018 17:57:44 -0500 Subject: [PATCH 6/6] Improve test coverage --- patsy/design_info.py | 13 +++++++++++++ patsy/eval.py | 8 ++++++++ patsy/test_build.py | 46 ++++++++++++++++++++++---------------------- 3 files changed, 44 insertions(+), 23 deletions(-) diff --git a/patsy/design_info.py b/patsy/design_info.py index 75e84b9..69d460e 100644 --- a/patsy/design_info.py +++ b/patsy/design_info.py @@ -774,8 +774,11 @@ def from_array(cls, array_like, default_column_prefix="column"): __getstate__ = no_pickling + def test_DesignInfo(): from nose.tools import assert_raises + from patsy.eval import EvalEnvironment + class _MockFactor(object): def __init__(self, name): self._name = name @@ -821,6 +824,8 @@ def var_names(self, eval_env=0): repr(di) assert di.var_names() == {'x_var', 'y_var'} + eval_env = EvalEnvironment.capture(0) + assert di.var_names(eval_env) == {'x_var', 'y_var'} assert_no_pickling(di) @@ -844,6 +849,8 @@ def var_names(self, eval_env=0): assert di.slice("b") == slice(3, 4) assert di.var_names() == {} + eval_env = EvalEnvironment.capture(0) + assert di.var_names(eval_env) == {} # Check intercept handling in describe() assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b" @@ -1291,6 +1298,8 @@ def test_design_matrix(): def test_DesignInfo_partial(): from .highlevel import dmatrix from numpy.testing import assert_allclose + from patsy.eval import EvalEnvironment + eval_env = EvalEnvironment.capture(0) a = np.array(['a', 'b', 'a', 'b', 'a', 'a', 'b', 'a']) b = np.array([1, 3, 2, 4, 1, 3, 1, 1]) c = np.array([4, 3, 2, 1, 6, 4, 2, 1]) @@ -1299,6 +1308,8 @@ def test_DesignInfo_partial(): x[1, 1] = 1 y = dm.design_info.partial({'a': ['a', 'b', 'a']}) assert_allclose(x, y) + y = dm.design_info.partial({'a': ['a', 'b', 'a']}, eval_env=eval_env) + assert_allclose(x, y) x = np.zeros((2, 6)) x[1, 1] = 1 @@ -1306,6 +1317,8 @@ def test_DesignInfo_partial(): p = OrderedDict([('a', ['a', 'b']), ('c', [1, 3])]) y = dm.design_info.partial(p) assert_allclose(x, y) + y = dm.design_info.partial(p, eval_env=eval_env) + assert_allclose(x, y) x = np.zeros((4, 6)) x[2, 1] = 1 diff --git a/patsy/eval.py b/patsy/eval.py index a9c7d66..bac2c65 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -730,6 +730,14 @@ class bar(object): bah = stateful_transform(lambda: "BAH-OBJ") eval_env = EvalEnvironment.capture(0) e = EvalFactor('foo(a) + bar.qux(b) + zed(bah(c))+ d') + state = {} + eval_env = EvalEnvironment.capture(0) + passes = e.memorize_passes_needed(state, eval_env) + print(passes) + print(state) + assert passes == 2 + for name in ["foo", "bah", "zed"]: + assert state["eval_env"].namespace[name] is locals()[name] assert e.var_names(eval_env=eval_env) == {'a', 'b', 'c', 'd'} diff --git a/patsy/test_build.py b/patsy/test_build.py index 6a4f782..5a24c37 100644 --- a/patsy/test_build.py +++ b/patsy/test_build.py @@ -31,7 +31,7 @@ def assert_full_rank(m): u, s, v = np.linalg.svd(m) rank = np.sum(s > 1e-10) assert rank == m.shape[1] - + def test_assert_full_rank(): assert_full_rank(np.eye(10)) assert_full_rank([[1, 0], [1, 0], [1, 0], [1, 1]]) @@ -44,7 +44,7 @@ def test_assert_full_rank(): # col1 + col2 = col3 assert_raises(AssertionError, assert_full_rank, [[1, 2, 3], [1, 5, 6], [1, 6, 7]]) - + def make_termlist(*entries): terms = [] for entry in entries: @@ -116,11 +116,11 @@ def test_simple(): [1, 0, x1[1], 0], [0, 1, x1[2], x1[2]], [0, 1, x1[3], x1[3]]]) - + m = make_matrix(data, 3, [["x1"], ["x2"], ["x2", "x1"]], column_names=["x1", "x2", "x2:x1"]) assert np.allclose(m, np.column_stack((x1, x2, x1 * x2))) - + def test_R_bugs(): data = balanced(a=2, b=2, c=2) data["x"] = np.linspace(0, 1, len(data["a"])) @@ -253,7 +253,7 @@ def test_return_type(): def iter_maker(): yield data builder = design_matrix_builders([make_termlist("x")], iter_maker, 0)[0] - + # Check explicitly passing return_type="matrix" works mat = build_design_matrices([builder], data, return_type="matrix")[0] assert isinstance(mat, DesignMatrix) @@ -298,7 +298,7 @@ def iter_maker(): assert mat.shape == (2, 3) # According to this (and only this) function, NaN == NaN. np.testing.assert_array_equal(mat, [[1.0, 0.0, 10.0], [0.0, 1.0, np.nan]]) - + # NA_action="raise" assert_raises(PatsyError, build_design_matrices, @@ -596,7 +596,7 @@ def iter_maker(): def test_contrast(): from patsy.contrasts import ContrastMatrix, Sum values = ["a1", "a3", "a1", "a2"] - + # No intercept in model, full-rank coding of 'a' m = make_matrix({"a": C(values)}, 3, [["a"]], column_names=["a[a1]", "a[a2]", "a[a3]"]) @@ -605,7 +605,7 @@ def test_contrast(): [0, 0, 1], [1, 0, 0], [0, 1, 0]]) - + for s in (Sum, Sum()): m = make_matrix({"a": C(values, s)}, 3, [["a"]], column_names=["a[mean]", "a[S.a1]", "a[S.a2]"]) @@ -614,7 +614,7 @@ def test_contrast(): [1,-1, -1], [1, 1, 0], [1, 0, 1]]) - + m = make_matrix({"a": C(values, Sum(omit=0))}, 3, [["a"]], column_names=["a[mean]", "a[S.a2]", "a[S.a3]"]) # Output from R @@ -631,7 +631,7 @@ def test_contrast(): [1, 0, 1], [1, 0, 0], [1, 1, 0]]) - + for s in (Sum, Sum()): m = make_matrix({"a": C(values, s)}, 3, [[], ["a"]], column_names=["Intercept", "a[S.a1]", "a[S.a2]"]) @@ -640,7 +640,7 @@ def test_contrast(): [1,-1, -1], [1, 1, 0], [1, 0, 1]]) - + m = make_matrix({"a": C(values, Sum(omit=0))}, 3, [[], ["a"]], column_names=["Intercept", "a[S.a2]", "a[S.a3]"]) # Output from R @@ -747,24 +747,24 @@ def test_safe_data_maker(): if not have_pandas: return from pandas.util.testing import assert_frame_equal - data = pandas.DataFrame({'a': [1, 2, 3], - 'b': [4, 5, 6], - 'c': [7, 8, 9]}) + data = pandas.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8, 9], + 'b': [4, 5, 6, 7, 8, 9, 1, 2, 3], + 'c': [7, 8, 9, 1, 2, 3, 4, 5, 6]}) def iter_maker(): - for i in range(0, 3, 2): - yield data.iloc[i:i+2] + yield data.iloc[:4] + yield data.iloc[4:] d = safe_data_maker(iter_maker, ['a', 'b']) d2 = next(d) - assert_frame_equal(d2, data.iloc[:2]) + assert_frame_equal(d2, data.iloc[:4]) d2 = next(d) - assert_frame_equal(d2, data.iloc[2:]) + assert_frame_equal(d2, data.iloc[4:]) - def iter_maker(var_names): - for i in range(0, 3, 2): - yield data[var_names].iloc[i:i+2] + def iter_maker(varnames): + yield data[varnames].iloc[:4] + yield data[varnames].iloc[4:] d = safe_data_maker(iter_maker, ['a', 'b']) d2 = next(d) - assert_frame_equal(d2, data[['a', 'b']].iloc[:2]) + assert_frame_equal(d2, data[['a', 'b']].iloc[:4]) d2 = next(d) - assert_frame_equal(d2, data[['a', 'b']].iloc[2:]) + assert_frame_equal(d2, data[['a', 'b']].iloc[4:])