From 470c997f4f2bbd0381e7ed88edd8c43b2ab2498a Mon Sep 17 00:00:00 2001
From: thequackdaddy <pquack@gmail.com>
Date: Wed, 28 Dec 2016 22:25:26 -0600
Subject: [PATCH 1/6] ENH: Support for var_names which are missing from
 environment

---
 patsy/build.py      | 35 +++++++++++++++++++++++++----------
 patsy/desc.py       | 17 ++++++++++++++++-
 patsy/eval.py       | 26 ++++++++++++++++++++++++++
 patsy/test_build.py | 28 ++++++++++++++++++++++++++++
 patsy/user_util.py  |  4 ++++
 5 files changed, 99 insertions(+), 11 deletions(-)

diff --git a/patsy/build.py b/patsy/build.py
index 470a83d..fca541f 100644
--- a/patsy/build.py
+++ b/patsy/build.py
@@ -344,7 +344,7 @@ def test__subterm_column_names_iter_and__build_subterm():
                    mat3)
     assert np.allclose(mat3, 1)
 
-def _factors_memorize(factors, data_iter_maker, eval_env):
+def _factors_memorize(factors, data_iter_maker, eval_env, var_names):
     # First, start off the memorization process by setting up each factor's
     # state and finding out how many passes it will need:
     factor_states = {}
@@ -362,7 +362,7 @@ def _factors_memorize(factors, data_iter_maker, eval_env):
             memorize_needed.add(factor)
     which_pass = 0
     while memorize_needed:
-        for data in data_iter_maker():
+        for data in safe_data_maker(data_iter_maker, var_names):
             for factor in memorize_needed:
                 state = factor_states[factor]
                 factor.memorize_chunk(state, which_pass, data)
@@ -373,6 +373,15 @@ def _factors_memorize(factors, data_iter_maker, eval_env):
         which_pass += 1
     return factor_states
 
+
+def safe_data_maker(data_iter_maker, var_names):
+    var_names = list(var_names)
+    try:
+        return data_iter_maker(var_names)
+    except:
+        return data_iter_maker()
+
+
 def test__factors_memorize():
     class MockFactor(object):
         def __init__(self, requested_passes, token):
@@ -408,7 +417,7 @@ def __call__(self):
     f1 = MockFactor(1, "f1")
     f2a = MockFactor(2, "f2a")
     f2b = MockFactor(2, "f2b")
-    factor_states = _factors_memorize(set([f0, f1, f2a, f2b]), data, {})
+    factor_states = _factors_memorize(set([f0, f1, f2a, f2b]), data, {}, [])
     assert data.calls == 2
     mem_chunks0 = [("memorize_chunk", 0)] * data.CHUNKS
     mem_chunks1 = [("memorize_chunk", 1)] * data.CHUNKS
@@ -434,11 +443,12 @@ def __call__(self):
         }
     assert factor_states == expected
 
-def _examine_factor_types(factors, factor_states, data_iter_maker, NA_action):
+def _examine_factor_types(factors, factor_states, data_iter_maker, NA_action,
+                          var_names):
     num_column_counts = {}
     cat_sniffers = {}
     examine_needed = set(factors)
-    for data in data_iter_maker():
+    for data in safe_data_maker(data_iter_maker, var_names):
         for factor in list(examine_needed):
             value = factor.eval(factor_states[factor], data)
             if factor in cat_sniffers or guess_categorical(value):
@@ -519,9 +529,10 @@ def next(self):
         }
 
     it = DataIterMaker()
+    var_names = []
     (num_column_counts, cat_levels_contrasts,
      ) = _examine_factor_types(factor_states.keys(), factor_states, it,
-                               NAAction())
+                               NAAction(), var_names)
     assert it.i == 2
     iterations = 0
     assert num_column_counts == {num_1dim: 1, num_1col: 1, num_4col: 4}
@@ -537,7 +548,7 @@ def next(self):
     no_read_necessary = [num_1dim, num_1col, num_4col, categ_1col, bool_1col]
     (num_column_counts, cat_levels_contrasts,
      ) = _examine_factor_types(no_read_necessary, factor_states, it,
-                               NAAction())
+                               NAAction(), var_names)
     assert it.i == 0
     assert num_column_counts == {num_1dim: 1, num_1col: 1, num_4col: 4}
     assert cat_levels_contrasts == {
@@ -562,7 +573,7 @@ def next(self):
         it = DataIterMaker()
         try:
             _examine_factor_types([illegal_factor], illegal_factor_states, it,
-                                  NAAction())
+                                  NAAction(), var_names)
         except PatsyError as e:
             assert e.origin is illegal_factor.origin
         else:
@@ -686,14 +697,18 @@ def design_matrix_builders(termlists, data_iter_maker, eval_env,
     for termlist in termlists:
         for term in termlist:
             all_factors.update(term.factors)
-    factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env)
+    var_names = {i for f in all_factors
+                 for i in f.var_names(eval_env=eval_env)}
+    factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env,
+                                      var_names)
     # Now all the factors have working eval methods, so we can evaluate them
     # on some data to find out what type of data they return.
     (num_column_counts,
      cat_levels_contrasts) = _examine_factor_types(all_factors,
                                                    factor_states,
                                                    data_iter_maker,
-                                                   NA_action)
+                                                   NA_action,
+                                                   var_names)
     # Now we need the factor infos, which encapsulate the knowledge of
     # how to turn any given factor into a chunk of data:
     factor_infos = {}
diff --git a/patsy/desc.py b/patsy/desc.py
index 8842b8b..0f80941 100644
--- a/patsy/desc.py
+++ b/patsy/desc.py
@@ -65,6 +65,15 @@ def name(self):
         else:
             return "Intercept"
 
+    def var_names(self, eval_env=0):
+        if not eval_env:
+            eval_env = EvalEnvironment.capture(0)
+        if self.factors:
+            return {i for f in self.factors
+                    for i in f.var_names(eval_env=eval_env)}
+        else:
+            return {}
+
     __getstate__ = no_pickling
 
 INTERCEPT = Term([])
@@ -76,6 +85,9 @@ def __init__(self, name):
     def name(self):
         return self._name
 
+    def var_names(self, eval_env=0):
+        return {'{}_var'.format(self._name)}
+
 def test_Term():
     assert Term([1, 2, 1]).factors == (1, 2)
     assert Term([1, 2]) == Term([2, 1])
@@ -85,6 +97,9 @@ def test_Term():
     assert Term([f1, f2]).name() == "a:b"
     assert Term([f2, f1]).name() == "b:a"
     assert Term([]).name() == "Intercept"
+    assert Term([f1]).var_names() == {'a_var'}
+    assert Term([f1, f2]).var_names() == {'a_var', 'b_var'}
+    assert Term([]).var_names() == {}
 
     assert_no_pickling(Term([]))
 
@@ -148,7 +163,7 @@ def term_code(term):
                            if term != INTERCEPT]
             result += " + ".join(term_names)
         return result
-            
+
     @classmethod
     def from_formula(cls, tree_or_string):
         """Construct a :class:`ModelDesc` from a formula string.
diff --git a/patsy/eval.py b/patsy/eval.py
index d4ed83f..9d54ce0 100644
--- a/patsy/eval.py
+++ b/patsy/eval.py
@@ -448,6 +448,15 @@ def __init__(self, code, origin=None):
         self.code = normalize_token_spacing(code)
         self.origin = origin
 
+    def var_names(self, eval_env=0):
+        if not eval_env:
+            eval_env = EvalEnvironment.capture(eval_env)
+        eval_env = eval_env.with_outer_namespace(_builtins_dict)
+        env_namespace = eval_env.namespace
+        names = set(name for name in ast_names(self.code)
+                    if name not in env_namespace)
+        return names
+
     def name(self):
         return self.code
 
@@ -691,6 +700,23 @@ def test_EvalFactor_end_to_end():
                           "y": np.array([10, 11, 100, 3])})
                   == [254, 256, 355, 236])
 
+
+def test_EvalFactor_varnames():
+    e = EvalFactor('a + b')
+    assert e.var_names() == {'a', 'b'}
+    from patsy.state import stateful_transform
+
+    class bar(object):
+        pass
+
+    foo = stateful_transform(lambda: "FOO-OBJ")
+    zed = stateful_transform(lambda: "ZED-OBJ")
+    bah = stateful_transform(lambda: "BAH-OBJ")
+    eval_env = EvalEnvironment.capture(0)
+    e = EvalFactor('foo(a) + bar.qux(b) + zed(bah(c))+ d')
+    assert e.var_names(eval_env=eval_env) == {'a', 'b', 'c', 'd'}
+
+
 def annotated_tokens(code):
     prev_was_dot = False
     it = PushbackAdapter(python_tokenize(code))
diff --git a/patsy/test_build.py b/patsy/test_build.py
index c843f9f..6a4f782 100644
--- a/patsy/test_build.py
+++ b/patsy/test_build.py
@@ -740,3 +740,31 @@ def t(which_terms, variables, columns):
     min_di_subset = min_di.subset(["c", "a"])
     assert min_di_subset.column_names == ["c", "a"]
     assert min_di_subset.terms is None
+
+
+def test_safe_data_maker():
+    from patsy.build import safe_data_maker
+    if not have_pandas:
+        return
+    from pandas.util.testing import assert_frame_equal
+    data = pandas.DataFrame({'a': [1, 2, 3],
+                             'b': [4, 5, 6],
+                             'c': [7, 8, 9]})
+
+    def iter_maker():
+        for i in range(0, 3, 2):
+            yield data.iloc[i:i+2]
+    d = safe_data_maker(iter_maker, ['a', 'b'])
+    d2 = next(d)
+    assert_frame_equal(d2, data.iloc[:2])
+    d2 = next(d)
+    assert_frame_equal(d2, data.iloc[2:])
+
+    def iter_maker(var_names):
+        for i in range(0, 3, 2):
+            yield data[var_names].iloc[i:i+2]
+    d = safe_data_maker(iter_maker, ['a', 'b'])
+    d2 = next(d)
+    assert_frame_equal(d2, data[['a', 'b']].iloc[:2])
+    d2 = next(d)
+    assert_frame_equal(d2, data[['a', 'b']].iloc[2:])
diff --git a/patsy/user_util.py b/patsy/user_util.py
index b0aa7e8..bf8746e 100644
--- a/patsy/user_util.py
+++ b/patsy/user_util.py
@@ -183,6 +183,9 @@ def __init__(self, varname,
     def name(self):
         return self._varname
 
+    def var_names(self, eval_env=0):
+        return {'{}_var'.format(self._varname)}
+
     def __repr__(self):
         return "%s(%r)" % (self.__class__.__name__, self._varname)
        
@@ -220,6 +223,7 @@ def eval(self, memorize_state, data):
 def test_LookupFactor():
     l_a = LookupFactor("a")
     assert l_a.name() == "a"
+    assert l_a.var_names() == {'a_var'}
     assert l_a == LookupFactor("a")
     assert l_a != LookupFactor("b")
     assert hash(l_a) == hash(LookupFactor("a"))

From 2e94be592a0f294664a927e2f7b739f08da15d53 Mon Sep 17 00:00:00 2001
From: thequackdaddy <pquack@gmail.com>
Date: Thu, 29 Dec 2016 15:05:51 -0600
Subject: [PATCH 2/6] DOC: Fixes

---
 patsy/build.py       |  5 ++++-
 patsy/desc.py        | 17 ++++++++++++++++-
 patsy/design_info.py | 33 +++++++++++++++++++++++++++++++++
 patsy/eval.py        | 18 +++++++++++++++++-
 4 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/patsy/build.py b/patsy/build.py
index fca541f..74b13aa 100644
--- a/patsy/build.py
+++ b/patsy/build.py
@@ -375,10 +375,13 @@ def _factors_memorize(factors, data_iter_maker, eval_env, var_names):
 
 
 def safe_data_maker(data_iter_maker, var_names):
+    """Safely test if the `data_iter_maker` can accept var_names as a
+    parameter.
+    """
     var_names = list(var_names)
     try:
         return data_iter_maker(var_names)
-    except:
+    except TypeError:
         return data_iter_maker()
 
 
diff --git a/patsy/desc.py b/patsy/desc.py
index 0f80941..40d0bbf 100644
--- a/patsy/desc.py
+++ b/patsy/desc.py
@@ -66,8 +66,23 @@ def name(self):
             return "Intercept"
 
     def var_names(self, eval_env=0):
+        """Returns a set of variable names that are used in the :class:`Term`,
+        but not available in the current evalulation environment. These are
+        likely to be provided by data.
+
+        :arg eval_env: Either a :class:`EvalEnvironment` which will be used to
+          look up any variables referenced in the :class:`Term`  that cannot be
+          found in :class:`EvalEnvironment`, or else a depth represented as an
+          integer which will be passed to :meth:`EvalEnvironment.capture`.
+          ``eval_env=0`` means to use the context of the function calling
+          :meth:`var_names` for lookups. If calling this function from a
+          library, you probably want ``eval_env=1``, which means that variables
+          should be resolved in *your* caller's namespace.
+
+        :returns: A set of strings of the potential variable names.
+        """
         if not eval_env:
-            eval_env = EvalEnvironment.capture(0)
+            eval_env = EvalEnvironment.capture(eval_env, reference=1)
         if self.factors:
             return {i for f in self.factors
                     for i in f.var_names(eval_env=eval_env)}
diff --git a/patsy/design_info.py b/patsy/design_info.py
index 438a23c..f4b5822 100644
--- a/patsy/design_info.py
+++ b/patsy/design_info.py
@@ -659,6 +659,31 @@ def subset(self, which_terms):
                               factor_infos=new_factor_infos,
                               term_codings=new_term_codings)
 
+    def var_names(self, eval_env=0):
+        """Returns a set of variable names that are used in the
+        :class:`DesignInfo`, but not available in the current evalulation
+        environment. These are likely to be provided by data.
+
+        :arg eval_env: Either a :class:`EvalEnvironment` which will be used to
+          look up any variables referenced in the :class:`DesignInfo` that
+          cannot be found in :class:`EvalEnvironment`, or else a depth
+          represented as an integer which will be passed to
+          :meth:`EvalEnvironment.capture`. ``eval_env=0`` means to use the
+          context of the function calling :meth:`var_names` for lookups.
+          If calling this function from a library, you probably want
+          ``eval_env=1``, which means that variables should be resolved in
+          *your* caller's namespace.
+
+        :returns: A set of strings of the potential variable names.
+        """
+        if not eval_env:
+            from patsy.eval import EvalEnvironment
+            eval_env = EvalEnvironment.capture(eval_env, reference=1)
+        if self.terms:
+            return {i for t in self.terms for i in t.var_names(eval_env)}
+        else:
+            return {}
+
     @classmethod
     def from_array(cls, array_like, default_column_prefix="column"):
         """Find or construct a DesignInfo appropriate for a given array_like.
@@ -701,6 +726,10 @@ def __init__(self, name):
 
         def name(self):
             return self._name
+
+        def var_names(self, eval_env=0):
+            return {'{}_var'.format(self._name)}
+
     f_x = _MockFactor("x")
     f_y = _MockFactor("y")
     t_x = Term([f_x])
@@ -735,6 +764,8 @@ def name(self):
     # smoke test
     repr(di)
 
+    assert di.var_names() == {'x_var', 'y_var'}
+
     assert_no_pickling(di)
 
     # One without term objects
@@ -756,6 +787,8 @@ def name(self):
     assert di.slice("a3") == slice(2, 3)
     assert di.slice("b") == slice(3, 4)
 
+    assert di.var_names() == {}
+
     # Check intercept handling in describe()
     assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b"
 
diff --git a/patsy/eval.py b/patsy/eval.py
index 9d54ce0..a9c7d66 100644
--- a/patsy/eval.py
+++ b/patsy/eval.py
@@ -449,8 +449,24 @@ def __init__(self, code, origin=None):
         self.origin = origin
 
     def var_names(self, eval_env=0):
+        """Returns a set of variable names that are used in the
+        :class:`EvalFactor`, but not available in the current evalulation
+        environment. These are likely to be provided by data.
+
+        :arg eval_env: Either a :class:`EvalEnvironment` which will be used to
+          look up any variables referenced in the :class:`EvalFactor` that
+          cannot be found in :class:`EvalEnvironment`, or else a depth
+          represented as an integer which will be passed to
+          :meth:`EvalEnvironment.capture`. ``eval_env=0`` means to use the
+          context of the function calling :meth:`var_names` for lookups.
+          If calling this function from a library, you probably want
+          ``eval_env=1``, which means that variables should be resolved in
+          *your* caller's namespace.
+
+        :returns: A set of strings of the potential variable names.
+        """
         if not eval_env:
-            eval_env = EvalEnvironment.capture(eval_env)
+            eval_env = EvalEnvironment.capture(eval_env, reference=1)
         eval_env = eval_env.with_outer_namespace(_builtins_dict)
         env_namespace = eval_env.namespace
         names = set(name for name in ast_names(self.code)

From 5f662a9b40e063124b8c7256ddcb3ea94714d6a3 Mon Sep 17 00:00:00 2001
From: thequackdaddy <pquack@gmail.com>
Date: Sat, 4 Mar 2017 16:15:40 -0600
Subject: [PATCH 3/6] Added partial function

---
 patsy/design_info.py | 103 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)

diff --git a/patsy/design_info.py b/patsy/design_info.py
index f4b5822..2f98968 100644
--- a/patsy/design_info.py
+++ b/patsy/design_info.py
@@ -36,6 +36,7 @@
 from patsy.constraint import linear_constraint
 from patsy.contrasts import ContrastMatrix
 from patsy.desc import ModelDesc, Term
+from collections import OrderedDict
 
 class FactorInfo(object):
     """A FactorInfo object is a simple class that provides some metadata about
@@ -684,6 +685,49 @@ def var_names(self, eval_env=0):
         else:
             return {}
 
+    def partial(self, columns, product=False):
+        """Returns a partial prediction array where only the variables in the
+        dict ``columns`` are tranformed per the :class:`DesignInfo`
+        transformations. The terms that are not influenced by ``columns``
+        return as zero.
+
+        This is useful to perform a partial prediction on unseen data and to
+        view marginal differences in factors.
+
+        :arg columns: A dict with the keys as the column names for the marginal
+        predictions desired and values as the marginal values to be predicted.
+
+        :arg product: When `True`, the resturned numpy array represents the
+        Cartesian product of the values ``columns``.
+
+        :returns: A numpy array of the partial design matrix.
+        """
+        from .highlevel import dmatrix
+        if product:
+            columns = _column_product(columns)
+        rows = None
+        for col in columns:
+            if rows and rows != len(columns[col]):
+                raise ValueError('all columns must be of same length')
+            rows = len(columns[col])
+        parts = []
+        for term, subterm in six.iteritems(self.term_codings):
+            term_vars = term.var_names()
+            present = True
+            for term_var in term_vars:
+                if term_var not in columns:
+                    present = False
+            if present and (term.name() != 'Intercept'):
+                # This seems like an inelegent way to not having the Intercept
+                # in the output
+                di = self.subset('0 + {}'.format(term.name()))
+                parts.append(dmatrix(di, columns))
+            else:
+                num_columns = np.sum(s.num_columns for s in subterm)
+                dm = np.zeros((rows, num_columns))
+                parts.append(dm)
+        return np.hstack(parts)
+
     @classmethod
     def from_array(cls, array_like, default_column_prefix="column"):
         """Find or construct a DesignInfo appropriate for a given array_like.
@@ -1230,3 +1274,62 @@ def test_design_matrix():
     repr(DesignMatrix(np.zeros((1, 0))))
     repr(DesignMatrix(np.zeros((0, 1))))
     repr(DesignMatrix(np.zeros((0, 0))))
+
+
+def test_DesignInfo_partial():
+    from .highlevel import dmatrix
+    from numpy.testing import assert_allclose
+    a = np.array(['a', 'b', 'a', 'b', 'a', 'a', 'b', 'a'])
+    b = np.array([1, 3, 2, 4, 1, 3, 1, 1])
+    c = np.array([4, 3, 2, 1, 6, 4, 2, 1])
+    dm = dmatrix('a + bs(b, df=3, degree=3) + np.log(c)')
+    x = np.zeros((3, 6))
+    x[1, 1] = 1
+    y = dm.design_info.partial({'a': ['a', 'b', 'a']})
+    assert_allclose(x, y)
+
+    x = np.zeros((2, 6))
+    x[1, 1] = 1
+    x[1, 5] = np.log(3)
+    p = OrderedDict([('a', ['a', 'b']), ('c', [1, 3])])
+    y = dm.design_info.partial(p)
+    assert_allclose(x, y)
+
+    x = np.zeros((4, 6))
+    x[2, 1] = 1
+    x[3, 1] = 1
+    x[1, 5] = np.log(3)
+    x[3, 5] = np.log(3)
+    y = dm.design_info.partial(p, product=True)
+    assert_allclose(x, y)
+
+    dm = dmatrix('a * c')
+    y = dm.design_info.partial(p)
+    x = np.array([[0, 0, 1, 0], [0, 1, 3, 3]])
+    assert_allclose(x, y)
+
+    from nose.tools import assert_raises
+    assert_raises(ValueError, dm.design_info.partial, {'a': ['a', 'b'],
+                                                       'b': [1, 2, 3]})
+
+
+def _column_product(columns):
+    from itertools import product
+    cols = []
+    values = []
+    for col, value in six.iteritems(columns):
+        cols.append(col)
+        values.append(value)
+    values = [value for value in product(*values)]
+    values = [value for value in zip(*values)]
+    return OrderedDict([(col, list(value))
+                        for col, value in zip(cols, values)])
+
+
+def test_column_product():
+    x = OrderedDict([('a', [1, 2, 3]), ('b', ['a', 'b'])])
+    y = OrderedDict([('a', [1, 1, 2, 2, 3, 3]),
+                     ('b', ['a', 'b', 'a', 'b', 'a', 'b'])])
+    x = _column_product(x)
+    assert x['a'] == y['a']
+    assert x['b'] == y['b']

From 807cc93ee0fa1603394bd9ece7887f5381d52c2c Mon Sep 17 00:00:00 2001
From: thequackdaddy <pquack@gmail.com>
Date: Sat, 8 Apr 2017 18:53:05 -0500
Subject: [PATCH 4/6] Added logic to handle modules and user-defined functions

---
 patsy/design_info.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/patsy/design_info.py b/patsy/design_info.py
index 2f98968..7cd74a0 100644
--- a/patsy/design_info.py
+++ b/patsy/design_info.py
@@ -685,7 +685,7 @@ def var_names(self, eval_env=0):
         else:
             return {}
 
-    def partial(self, columns, product=False):
+    def partial(self, columns, product=False, eval_env=0):
         """Returns a partial prediction array where only the variables in the
         dict ``columns`` are tranformed per the :class:`DesignInfo`
         transformations. The terms that are not influenced by ``columns``
@@ -703,6 +703,18 @@ def partial(self, columns, product=False):
         :returns: A numpy array of the partial design matrix.
         """
         from .highlevel import dmatrix
+        from types import ModuleType
+
+        if not eval_env:
+            from patsy.eval import EvalEnvironment
+            eval_env = EvalEnvironment.capture(eval_env, reference=1)
+
+        # We need to get rid of the non-callable items from the eval_env
+        namespaces = [{key: value} for ns in eval_env._namespaces
+                      for key, value in six.iteritems(ns)
+                      if callable(value) or isinstance(value, ModuleType)]
+        eval_env._namespaces = namespaces
+
         if product:
             columns = _column_product(columns)
         rows = None
@@ -712,7 +724,7 @@ def partial(self, columns, product=False):
             rows = len(columns[col])
         parts = []
         for term, subterm in six.iteritems(self.term_codings):
-            term_vars = term.var_names()
+            term_vars = term.var_names(eval_env)
             present = True
             for term_var in term_vars:
                 if term_var not in columns:
@@ -1312,6 +1324,16 @@ def test_DesignInfo_partial():
     assert_raises(ValueError, dm.design_info.partial, {'a': ['a', 'b'],
                                                        'b': [1, 2, 3]})
 
+    def some_function(x):
+        return np.where(x > 2, 1, 2)
+
+    dm = dmatrix('1 + some_function(c)')
+    x = np.array([[0, 2],
+                  [0, 2],
+                  [0, 1]])
+    y = dm.design_info.partial({'c': np.array([1, 2, 3])})
+    assert_allclose(x, y)
+
 
 def _column_product(columns):
     from itertools import product

From ac612d084cfd5cfc7c4b620fa072b5d188e8f2a7 Mon Sep 17 00:00:00 2001
From: thequackdaddy <pquack@gmail.com>
Date: Sat, 3 Nov 2018 14:16:04 -0500
Subject: [PATCH 5/6] Use sum instead of np.sum on a generator

---
 patsy/design_info.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/patsy/design_info.py b/patsy/design_info.py
index 7cd74a0..75e84b9 100644
--- a/patsy/design_info.py
+++ b/patsy/design_info.py
@@ -735,7 +735,7 @@ def partial(self, columns, product=False, eval_env=0):
                 di = self.subset('0 + {}'.format(term.name()))
                 parts.append(dmatrix(di, columns))
             else:
-                num_columns = np.sum(s.num_columns for s in subterm)
+                num_columns = sum(s.num_columns for s in subterm)
                 dm = np.zeros((rows, num_columns))
                 parts.append(dm)
         return np.hstack(parts)
@@ -1063,7 +1063,7 @@ def _format_float_column(precision, col):
                 else:
                     break
     return col_strs
-            
+
 def test__format_float_column():
     def t(precision, numbers, expected):
         got = _format_float_column(precision, np.asarray(numbers))
@@ -1188,7 +1188,7 @@ def max_width(col):
                            + np.sum(column_widths))
             print_numbers = (total_width < MAX_TOTAL_WIDTH)
         else:
-            print_numbers = False   
+            print_numbers = False
 
         p.begin_group(INDENT, "DesignMatrix with shape %s" % (self.shape,))
         p.breakable("\n" + " " * p.indentation)

From 544effd68909f8beea21789799018aff66fcd4d5 Mon Sep 17 00:00:00 2001
From: thequackdaddy <pquack@gmail.com>
Date: Sat, 3 Nov 2018 17:57:44 -0500
Subject: [PATCH 6/6] Improve test coverage

---
 patsy/design_info.py | 13 +++++++++++++
 patsy/eval.py        |  8 ++++++++
 patsy/test_build.py  | 46 ++++++++++++++++++++++----------------------
 3 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/patsy/design_info.py b/patsy/design_info.py
index 75e84b9..69d460e 100644
--- a/patsy/design_info.py
+++ b/patsy/design_info.py
@@ -774,8 +774,11 @@ def from_array(cls, array_like, default_column_prefix="column"):
 
     __getstate__ = no_pickling
 
+
 def test_DesignInfo():
     from nose.tools import assert_raises
+    from patsy.eval import EvalEnvironment
+
     class _MockFactor(object):
         def __init__(self, name):
             self._name = name
@@ -821,6 +824,8 @@ def var_names(self, eval_env=0):
     repr(di)
 
     assert di.var_names() == {'x_var', 'y_var'}
+    eval_env = EvalEnvironment.capture(0)
+    assert di.var_names(eval_env) == {'x_var', 'y_var'}
 
     assert_no_pickling(di)
 
@@ -844,6 +849,8 @@ def var_names(self, eval_env=0):
     assert di.slice("b") == slice(3, 4)
 
     assert di.var_names() == {}
+    eval_env = EvalEnvironment.capture(0)
+    assert di.var_names(eval_env) == {}
 
     # Check intercept handling in describe()
     assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b"
@@ -1291,6 +1298,8 @@ def test_design_matrix():
 def test_DesignInfo_partial():
     from .highlevel import dmatrix
     from numpy.testing import assert_allclose
+    from patsy.eval import EvalEnvironment
+    eval_env = EvalEnvironment.capture(0)
     a = np.array(['a', 'b', 'a', 'b', 'a', 'a', 'b', 'a'])
     b = np.array([1, 3, 2, 4, 1, 3, 1, 1])
     c = np.array([4, 3, 2, 1, 6, 4, 2, 1])
@@ -1299,6 +1308,8 @@ def test_DesignInfo_partial():
     x[1, 1] = 1
     y = dm.design_info.partial({'a': ['a', 'b', 'a']})
     assert_allclose(x, y)
+    y = dm.design_info.partial({'a': ['a', 'b', 'a']}, eval_env=eval_env)
+    assert_allclose(x, y)
 
     x = np.zeros((2, 6))
     x[1, 1] = 1
@@ -1306,6 +1317,8 @@ def test_DesignInfo_partial():
     p = OrderedDict([('a', ['a', 'b']), ('c', [1, 3])])
     y = dm.design_info.partial(p)
     assert_allclose(x, y)
+    y = dm.design_info.partial(p, eval_env=eval_env)
+    assert_allclose(x, y)
 
     x = np.zeros((4, 6))
     x[2, 1] = 1
diff --git a/patsy/eval.py b/patsy/eval.py
index a9c7d66..bac2c65 100644
--- a/patsy/eval.py
+++ b/patsy/eval.py
@@ -730,6 +730,14 @@ class bar(object):
     bah = stateful_transform(lambda: "BAH-OBJ")
     eval_env = EvalEnvironment.capture(0)
     e = EvalFactor('foo(a) + bar.qux(b) + zed(bah(c))+ d')
+    state = {}
+    eval_env = EvalEnvironment.capture(0)
+    passes = e.memorize_passes_needed(state, eval_env)
+    print(passes)
+    print(state)
+    assert passes == 2
+    for name in ["foo", "bah", "zed"]:
+        assert state["eval_env"].namespace[name] is locals()[name]
     assert e.var_names(eval_env=eval_env) == {'a', 'b', 'c', 'd'}
 
 
diff --git a/patsy/test_build.py b/patsy/test_build.py
index 6a4f782..5a24c37 100644
--- a/patsy/test_build.py
+++ b/patsy/test_build.py
@@ -31,7 +31,7 @@ def assert_full_rank(m):
     u, s, v = np.linalg.svd(m)
     rank = np.sum(s > 1e-10)
     assert rank == m.shape[1]
-    
+
 def test_assert_full_rank():
     assert_full_rank(np.eye(10))
     assert_full_rank([[1, 0], [1, 0], [1, 0], [1, 1]])
@@ -44,7 +44,7 @@ def test_assert_full_rank():
     # col1 + col2 = col3
     assert_raises(AssertionError,
                   assert_full_rank, [[1, 2, 3], [1, 5, 6], [1, 6, 7]])
-    
+
 def make_termlist(*entries):
     terms = []
     for entry in entries:
@@ -116,11 +116,11 @@ def test_simple():
                            [1, 0, x1[1], 0],
                            [0, 1, x1[2], x1[2]],
                            [0, 1, x1[3], x1[3]]])
-    
+
     m = make_matrix(data, 3, [["x1"], ["x2"], ["x2", "x1"]],
                     column_names=["x1", "x2", "x2:x1"])
     assert np.allclose(m, np.column_stack((x1, x2, x1 * x2)))
-    
+
 def test_R_bugs():
     data = balanced(a=2, b=2, c=2)
     data["x"] = np.linspace(0, 1, len(data["a"]))
@@ -253,7 +253,7 @@ def test_return_type():
     def iter_maker():
         yield data
     builder = design_matrix_builders([make_termlist("x")], iter_maker, 0)[0]
-    
+
     # Check explicitly passing return_type="matrix" works
     mat = build_design_matrices([builder], data, return_type="matrix")[0]
     assert isinstance(mat, DesignMatrix)
@@ -298,7 +298,7 @@ def iter_maker():
     assert mat.shape == (2, 3)
     # According to this (and only this) function, NaN == NaN.
     np.testing.assert_array_equal(mat, [[1.0, 0.0, 10.0], [0.0, 1.0, np.nan]])
-    
+
     # NA_action="raise"
     assert_raises(PatsyError,
                   build_design_matrices,
@@ -596,7 +596,7 @@ def iter_maker():
 def test_contrast():
     from patsy.contrasts import ContrastMatrix, Sum
     values = ["a1", "a3", "a1", "a2"]
-    
+
     # No intercept in model, full-rank coding of 'a'
     m = make_matrix({"a": C(values)}, 3, [["a"]],
                     column_names=["a[a1]", "a[a2]", "a[a3]"])
@@ -605,7 +605,7 @@ def test_contrast():
                            [0, 0, 1],
                            [1, 0, 0],
                            [0, 1, 0]])
-    
+
     for s in (Sum, Sum()):
         m = make_matrix({"a": C(values, s)}, 3, [["a"]],
                         column_names=["a[mean]", "a[S.a1]", "a[S.a2]"])
@@ -614,7 +614,7 @@ def test_contrast():
                                [1,-1, -1],
                                [1, 1, 0],
                                [1, 0, 1]])
-    
+
     m = make_matrix({"a": C(values, Sum(omit=0))}, 3, [["a"]],
                     column_names=["a[mean]", "a[S.a2]", "a[S.a3]"])
     # Output from R
@@ -631,7 +631,7 @@ def test_contrast():
                            [1, 0, 1],
                            [1, 0, 0],
                            [1, 1, 0]])
-    
+
     for s in (Sum, Sum()):
         m = make_matrix({"a": C(values, s)}, 3, [[], ["a"]],
                         column_names=["Intercept", "a[S.a1]", "a[S.a2]"])
@@ -640,7 +640,7 @@ def test_contrast():
                                [1,-1, -1],
                                [1, 1, 0],
                                [1, 0, 1]])
-    
+
     m = make_matrix({"a": C(values, Sum(omit=0))}, 3, [[], ["a"]],
                     column_names=["Intercept", "a[S.a2]", "a[S.a3]"])
     # Output from R
@@ -747,24 +747,24 @@ def test_safe_data_maker():
     if not have_pandas:
         return
     from pandas.util.testing import assert_frame_equal
-    data = pandas.DataFrame({'a': [1, 2, 3],
-                             'b': [4, 5, 6],
-                             'c': [7, 8, 9]})
+    data = pandas.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8, 9],
+                             'b': [4, 5, 6, 7, 8, 9, 1, 2, 3],
+                             'c': [7, 8, 9, 1, 2, 3, 4, 5, 6]})
 
     def iter_maker():
-        for i in range(0, 3, 2):
-            yield data.iloc[i:i+2]
+        yield data.iloc[:4]
+        yield data.iloc[4:]
     d = safe_data_maker(iter_maker, ['a', 'b'])
     d2 = next(d)
-    assert_frame_equal(d2, data.iloc[:2])
+    assert_frame_equal(d2, data.iloc[:4])
     d2 = next(d)
-    assert_frame_equal(d2, data.iloc[2:])
+    assert_frame_equal(d2, data.iloc[4:])
 
-    def iter_maker(var_names):
-        for i in range(0, 3, 2):
-            yield data[var_names].iloc[i:i+2]
+    def iter_maker(varnames):
+        yield data[varnames].iloc[:4]
+        yield data[varnames].iloc[4:]
     d = safe_data_maker(iter_maker, ['a', 'b'])
     d2 = next(d)
-    assert_frame_equal(d2, data[['a', 'b']].iloc[:2])
+    assert_frame_equal(d2, data[['a', 'b']].iloc[:4])
     d2 = next(d)
-    assert_frame_equal(d2, data[['a', 'b']].iloc[2:])
+    assert_frame_equal(d2, data[['a', 'b']].iloc[4:])