From 7dc23bd93ec523b5ed5a8c7b72af1b8d4332547e Mon Sep 17 00:00:00 2001
From: thequackdaddy <pquack@gmail.com>
Date: Wed, 14 Sep 2016 20:33:27 -0500
Subject: [PATCH 1/6] ENH: R poly compatibility

Travis fixes
---
 doc/API-reference.rst           |   5 +
 patsy/__init__.py               |   3 +
 patsy/poly.py                   | 179 ++++++++++++++++++++++++++++++++
 patsy/test_poly_data.py         |  37 +++++++
 tools/get-R-poly-test-vectors.R |  62 +++++++++++
 5 files changed, 286 insertions(+)
 create mode 100644 patsy/poly.py
 create mode 100644 patsy/test_poly_data.py
 create mode 100644 tools/get-R-poly-test-vectors.R

diff --git a/doc/API-reference.rst b/doc/API-reference.rst
index 7b6b411..07a80b9 100644
--- a/doc/API-reference.rst
+++ b/doc/API-reference.rst
@@ -198,6 +198,11 @@ Spline regression
 .. autofunction:: cc
 .. autofunction:: te
 
+Orthogonal Polynomial
+---------------------
+
+.. autofunction:: poly
+
 Working with formulas programmatically
 --------------------------------------
 
diff --git a/patsy/__init__.py b/patsy/__init__.py
index 29722d0..6a37b0a 100644
--- a/patsy/__init__.py
+++ b/patsy/__init__.py
@@ -113,5 +113,8 @@ def _reexport(mod):
 import patsy.mgcv_cubic_splines
 _reexport(patsy.mgcv_cubic_splines)
 
+import patsy.poly
+_reexport(patsy.poly)
+
 # XX FIXME: we aren't exporting any of the explicit parsing interface
 # yet. Need to figure out how to do that.
diff --git a/patsy/poly.py b/patsy/poly.py
new file mode 100644
index 0000000..a20402b
--- /dev/null
+++ b/patsy/poly.py
@@ -0,0 +1,179 @@
+# This file is part of Patsy
+# Copyright (C) 2012-2013 Nathaniel Smith <njs@pobox.com>
+# See file LICENSE.txt for license information.
+
+# R-compatible poly function
+
+# These are made available in the patsy.* namespace
+__all__ = ["poly"]
+
+import numpy as np
+
+from patsy.util import have_pandas, no_pickling, assert_no_pickling
+from patsy.state import stateful_transform
+
+if have_pandas:
+    import pandas
+
+class Poly(object):
+    """poly(x, degree=1, raw=False)
+
+    Generates an orthogonal polynomial transformation of x of degree.
+    Generic usage is something along the lines of::
+
+      y ~ 1 + poly(x, 4)
+
+    to fit ``y`` as a function of ``x``, with a 4th degree polynomial.
+
+    :arg degree: The number of degrees for the polynomial expansion.
+    :arg raw: When raw is False (the default), will return orthogonal
+      polynomials.
+
+    .. versionadded:: 0.4.1
+    """
+    def __init__(self):
+        self._tmp = {}
+        self._degree = None
+        self._raw = None
+
+    def memorize_chunk(self, x, degree=3, raw=False):
+        args = {"degree": degree,
+                "raw": raw
+                }
+        self._tmp["args"] = args
+        # XX: check whether we need x values before saving them
+        x = np.atleast_1d(x)
+        if x.ndim == 2 and x.shape[1] == 1:
+            x = x[:, 0]
+        if x.ndim > 1:
+            raise ValueError("input to 'poly' must be 1-d, "
+                             "or a 2-d column vector")
+        # There's no better way to compute exact quantiles than memorizing
+        # all data.
+        x = np.array(x, dtype=float)
+        self._tmp.setdefault("xs", []).append(x)
+
+    def memorize_finish(self):
+        tmp = self._tmp
+        args = tmp["args"]
+        del self._tmp
+
+        if args["degree"] < 1:
+            raise ValueError("degree must be greater than 0 (not %r)"
+                             % (args["degree"],))
+        if int(args["degree"]) != args["degree"]:
+            raise ValueError("degree must be an integer (not %r)"
+                             % (self._degree,))
+
+        # These are guaranteed to all be 1d vectors by the code above
+        scores = np.concatenate(tmp["xs"])
+        scores_mean = scores.mean()
+        # scores -= scores_mean
+        self.scores_mean = scores_mean
+        n = args['degree']
+        self.degree = n
+        raw_poly = scores.reshape((-1, 1)) ** np.arange(n + 1).reshape((1, -1))
+        raw = args['raw']
+        self.raw = raw
+        if not raw:
+            q, r = np.linalg.qr(raw_poly)
+            # Q is now orthognoal of degree n. To match what R is doing, we
+            # need to use the three-term recurrence technique to calculate
+            # new alpha, beta, and norm.
+
+            self.alpha = (np.sum(scores.reshape((-1, 1)) * q[:, :n] ** 2,
+                                 axis=0) /
+                          np.sum(q[:, :n] ** 2, axis=0))
+
+            # For reasons I don't understand, the norms R uses are based off
+            # of the diagonal of the r upper triangular matrix.
+
+            self.norm = np.linalg.norm(q * np.diag(r), axis=0)
+            self.beta = (self.norm[1:] / self.norm[:n]) ** 2
+
+    def transform(self, x, degree=3, raw=False):
+        if have_pandas:
+            if isinstance(x, (pandas.Series, pandas.DataFrame)):
+                to_pandas = True
+                idx = x.index
+            else:
+                to_pandas = False
+        else:
+            to_pandas = False
+        x = np.array(x, ndmin=1).flatten()
+
+        if self.raw:
+            n = self.degree
+            p = x.reshape((-1, 1)) ** np.arange(n + 1).reshape((1, -1))
+        else:
+            # This is where the three-term recurrance technique is unwound.
+
+            p = np.empty((x.shape[0], self.degree + 1))
+            p[:, 0] = 1
+
+            for i in np.arange(self.degree):
+                p[:, i + 1] = (x - self.alpha[i]) * p[:, i]
+                if i > 0:
+                    p[:, i + 1] = (p[:, i + 1] -
+                                   (self.beta[i - 1] * p[:, i - 1]))
+            p /= self.norm
+
+        p = p[:, 1:]
+        if to_pandas:
+            p = pandas.DataFrame(p)
+            p.index = idx
+        return p
+
+    __getstate__ = no_pickling
+
+poly = stateful_transform(Poly)
+
+
+def test_poly_compat():
+    from patsy.test_state import check_stateful
+    from patsy.test_poly_data import (R_poly_test_x,
+                                      R_poly_test_data,
+                                      R_poly_num_tests)
+    lines = R_poly_test_data.split("\n")
+    tests_ran = 0
+    start_idx = lines.index("--BEGIN TEST CASE--")
+    while True:
+        if not lines[start_idx] == "--BEGIN TEST CASE--":
+            break
+        start_idx += 1
+        stop_idx = lines.index("--END TEST CASE--", start_idx)
+        block = lines[start_idx:stop_idx]
+        test_data = {}
+        for line in block:
+            key, value = line.split("=", 1)
+            test_data[key] = value
+        # Translate the R output into Python calling conventions
+        kwargs = {
+            # integer
+            "degree": int(test_data["degree"]),
+            # boolen
+            "raw": (test_data["raw"] == 'TRUE')
+            }
+        # Special case: in R, setting intercept=TRUE increases the effective
+        # dof by 1. Adjust our arguments to match.
+        # if kwargs["df"] is not None and kwargs["include_intercept"]:
+        #     kwargs["df"] += 1
+        output = np.asarray(eval(test_data["output"]))
+        # Do the actual test
+        check_stateful(Poly, False, R_poly_test_x, output, **kwargs)
+        tests_ran += 1
+        # Set up for the next one
+        start_idx = stop_idx + 1
+    assert tests_ran == R_poly_num_tests
+
+
+def test_poly_errors():
+    from nose.tools import assert_raises
+    x = np.arange(27)
+    # Invalid input shape
+    assert_raises(ValueError, poly, x.reshape((3, 3, 3)))
+    assert_raises(ValueError, poly, x.reshape((3, 3, 3)), raw=True)
+    # Invalid degree
+    assert_raises(ValueError, poly, x, degree=-1)
+    assert_raises(ValueError, poly, x, degree=0)
+    assert_raises(ValueError, poly, x, degree=3.5)
diff --git a/patsy/test_poly_data.py b/patsy/test_poly_data.py
new file mode 100644
index 0000000..cb4481b
--- /dev/null
+++ b/patsy/test_poly_data.py
@@ -0,0 +1,37 @@
+# This file auto-generated by tools/get-R-bs-test-vectors.R
+# Using: R version 3.2.4 Revised (2016-03-16 r70336)
+import numpy as np
+R_poly_test_x = np.array([1, 1.5, 2.25, 3.375, 5.0625, 7.59375, 11.390625, 17.0859375, 25.62890625, 38.443359375, 57.6650390625, 86.49755859375, 129.746337890625, 194.6195068359375, 291.92926025390625, 437.89389038085938, 656.84083557128906, 985.26125335693359, 1477.8918800354004, 2216.8378200531006, ])
+R_poly_test_data = """
+--BEGIN TEST CASE--
+degree=1
+raw=TRUE
+output=np.array([1, 1.5, 2.25, 3.375, 5.0625, 7.59375, 11.390625, 17.0859375, 25.62890625, 38.443359375, 57.6650390625, 86.49755859375, 129.746337890625, 194.6195068359375, 291.92926025390625, 437.89389038085938, 656.84083557128906, 985.26125335693359, 1477.8918800354004, 2216.8378200531006, ]).reshape((20, 1, ), order="F")
+--END TEST CASE--
+--BEGIN TEST CASE--
+degree=1
+raw=FALSE
+output=np.array([-0.12865949508274149, -0.12846539500908838, -0.12817424489860868, -0.12773751973288924, -0.12708243198431005, -0.12609980036144131, -0.12462585292713815, -0.12241493177568342, -0.11909855004850137, -0.11412397745772825, -0.10666211857156857, -0.095469330242329037, -0.07868014774846975, -0.053496374007680828, -0.015720713396497447, 0.040942777520277619, 0.12593801389544024, 0.25343086845818413, 0.4446701503023, 0.73152907306847381, ]).reshape((20, 1, ), order="F")
+--END TEST CASE--
+--BEGIN TEST CASE--
+degree=3
+raw=TRUE
+output=np.array([1, 1.5, 2.25, 3.375, 5.0625, 7.59375, 11.390625, 17.0859375, 25.62890625, 38.443359375, 57.6650390625, 86.49755859375, 129.746337890625, 194.6195068359375, 291.92926025390625, 437.89389038085938, 656.84083557128906, 985.26125335693359, 1477.8918800354004, 2216.8378200531006, 1, 2.25, 5.0625, 11.390625, 25.62890625, 57.6650390625, 129.746337890625, 291.92926025390625, 656.84083557128906, 1477.8918800354004, 3325.2567300796509, 7481.8276426792145, 16834.112196028233, 37876.752441063523, 85222.692992392927, 191751.05923288409, 431439.8832739892, 970739.73736647563, 2184164.4090745705, 4914369.9204177829, 1, 3.375, 11.390625, 38.443359375, 129.746337890625, 437.89389038085938, 1477.8918800354004, 4987.8850951194763, 16834.112196028233, 56815.128661595285, 191751.05923288409, 647159.82491098379, 2184164.4090745705, 7371554.8806266747, 24878997.722115029, 83966617.312138215, 283387333.4284665, 956432250.32107437, 3227958844.8336263, 10894361101.313488, ]).reshape((20, 3, ), order="F")
+--END TEST CASE--
+--BEGIN TEST CASE--
+degree=3
+raw=FALSE
+output=np.array([-0.12865949508274149, -0.12846539500908838, -0.12817424489860868, -0.12773751973288924, -0.12708243198431005, -0.12609980036144131, -0.12462585292713815, -0.12241493177568342, -0.11909855004850137, -0.11412397745772825, -0.10666211857156857, -0.095469330242329037, -0.07868014774846975, -0.053496374007680828, -0.015720713396497447, 0.040942777520277619, 0.12593801389544024, 0.25343086845818413, 0.4446701503023, 0.73152907306847381, 0.11682670564764953, 0.11622774987820758, 0.1153299112445243, 0.11398449209008393, 0.11196937564961051, 0.10895347864407183, 0.10444488285989936, 0.097716301062945765, 0.087700630095951776, 0.072850827534442664, 0.05096695744238839, 0.019020528242278005, -0.026920519697452645, -0.091380250921070119, -0.1780532062130448, -0.28552519567824058, -0.39602393206231051, -0.44767622905753701, -0.26843910749340033, 0.57802660073100254, -0.11560888340228653, -0.11436481217184656, -0.11250218782662975, -0.10971608089390825, -0.10555451667328646, -0.099351692934324679, -0.090136150925525155, -0.076511614727544461, -0.05651941299388475, -0.027522538371457093, 0.013772191900731716, 0.070864671671751547, 0.14593497036033168, 0.23591981919395397, 0.32391016867398448, 0.36336942185480259, 0.25890497941187346, -0.11572025100301592, -0.66076386903314166, 0.27159578788942196, ]).reshape((20, 3, ), order="F")
+--END TEST CASE--
+--BEGIN TEST CASE--
+degree=5
+raw=TRUE
+output=np.array([1, 1.5, 2.25, 3.375, 5.0625, 7.59375, 11.390625, 17.0859375, 25.62890625, 38.443359375, 57.6650390625, 86.49755859375, 129.746337890625, 194.6195068359375, 291.92926025390625, 437.89389038085938, 656.84083557128906, 985.26125335693359, 1477.8918800354004, 2216.8378200531006, 1, 2.25, 5.0625, 11.390625, 25.62890625, 57.6650390625, 129.746337890625, 291.92926025390625, 656.84083557128906, 1477.8918800354004, 3325.2567300796509, 7481.8276426792145, 16834.112196028233, 37876.752441063523, 85222.692992392927, 191751.05923288409, 431439.8832739892, 970739.73736647563, 2184164.4090745705, 4914369.9204177829, 1, 3.375, 11.390625, 38.443359375, 129.746337890625, 437.89389038085938, 1477.8918800354004, 4987.8850951194763, 16834.112196028233, 56815.128661595285, 191751.05923288409, 647159.82491098379, 2184164.4090745705, 7371554.8806266747, 24878997.722115029, 83966617.312138215, 283387333.4284665, 956432250.32107437, 3227958844.8336263, 10894361101.313488, 1, 5.0625, 25.62890625, 129.746337890625, 656.84083557128906, 3325.2567300796509, 16834.112196028233, 85222.692992392927, 431439.8832739892, 2184164.4090745705, 11057332.320940012, 55977744.87475881, 283387333.4284665, 1434648375.4816115, 7262907400.875659, 36768468716.933022, 186140372879.47342, 942335637702.33411, 4770574165868.0674, 24151031714707.086, 1, 7.59375, 57.6650390625, 437.89389038085938, 3325.2567300796509, 25251.168294042349, 191751.05923288409, 1456109.6060497134, 11057332.320940012, 83966617.31213823, 637621500.21404958, 4841938267.2504387, 36768468716.933022, 279210559319.21014, 2120255184830.252, 16100687809804.727, 122264598055704.64, 928446791485507, 7050392822843070, 53538920498464552, ]).reshape((20, 5, ), order="F")
+--END TEST CASE--
+--BEGIN TEST CASE--
+degree=5
+raw=FALSE
+output=np.array([-0.12865949508274149, -0.12846539500908838, -0.12817424489860868, -0.12773751973288924, -0.12708243198431005, -0.12609980036144131, -0.12462585292713815, -0.12241493177568342, -0.11909855004850137, -0.11412397745772825, -0.10666211857156857, -0.095469330242329037, -0.07868014774846975, -0.053496374007680828, -0.015720713396497447, 0.040942777520277619, 0.12593801389544024, 0.25343086845818413, 0.4446701503023, 0.73152907306847381, 0.11682670564764953, 0.11622774987820758, 0.1153299112445243, 0.11398449209008393, 0.11196937564961051, 0.10895347864407183, 0.10444488285989936, 0.097716301062945765, 0.087700630095951776, 0.072850827534442664, 0.05096695744238839, 0.019020528242278005, -0.026920519697452645, -0.091380250921070119, -0.1780532062130448, -0.28552519567824058, -0.39602393206231051, -0.44767622905753701, -0.26843910749340033, 0.57802660073100254, -0.11560888340228653, -0.11436481217184656, -0.11250218782662975, -0.10971608089390825, -0.10555451667328646, -0.099351692934324679, -0.090136150925525155, -0.076511614727544461, -0.05651941299388475, -0.027522538371457093, 0.013772191900731716, 0.070864671671751547, 0.14593497036033168, 0.23591981919395397, 0.32391016867398448, 0.36336942185480259, 0.25890497941187346, -0.11572025100301592, -0.66076386903314166, 0.27159578788942196, 0.11925766326375063, 0.11701962699862156, 0.11367531238125347, 0.10868744714732725, 0.10126981942884175, 0.090287103769210786, 0.074134201646975206, 0.050620044131431986, 0.016933017097416861, -0.030116712154368355, -0.093138533517390085, -0.17160263551697441, -0.25618209006285081, -0.3183631162695052, -0.29707753517866498, -0.10102478727647804, 0.30185248746535442, 0.55289166632880227, -0.46108564710186972, 0.081962667419115426, -0.12626707822019206, -0.12250155553682644, -0.11689136915447108, -0.10856147160045609, -0.096257598068575617, -0.078227654788373013, -0.052128116579684983, -0.015063001240831148, 0.035988153544508683, 0.10280803884977513, 0.18263307034840112, 0.26144732880503613, 0.30325203347309243, 0.24116709207723347, -0.00082575540196283526, -0.37830141983168153, -0.42887161757203512, 0.55207091753656046, -0.17171017635275559, 0.016240179713238136, ]).reshape((20, 5, ), order="F")
+--END TEST CASE--
+"""
+R_poly_num_tests = 6
diff --git a/tools/get-R-poly-test-vectors.R b/tools/get-R-poly-test-vectors.R
new file mode 100644
index 0000000..d97c84e
--- /dev/null
+++ b/tools/get-R-poly-test-vectors.R
@@ -0,0 +1,62 @@
+cat("# This file auto-generated by tools/get-R-bs-test-vectors.R\n")
+cat(sprintf("# Using: %s\n", R.Version()$version.string))
+cat("import numpy as np\n")
+
+options(digits=20)
+library(splines)
+x <- (1.5)^(0:19)
+
+MISSING <- "MISSING"
+
+is.missing <- function(obj) {
+  length(obj) == 1 && obj == MISSING
+}
+
+pyprint <- function(arr) {
+  if (is.missing(arr)) {
+    cat("None\n")
+  } else {
+    cat("np.array([")
+    for (val in arr) {
+      cat(val)
+      cat(", ")
+    }
+    cat("])")
+    if (!is.null(dim(arr))) {
+      cat(".reshape((")
+      for (size in dim(arr)) {
+        cat(sprintf("%s, ", size))
+      }
+      cat("), order=\"F\")")
+    }
+    cat("\n")
+  }
+}
+
+num.tests <- 0
+dump.poly <- function(degree, raw) {
+  cat("--BEGIN TEST CASE--\n")
+  cat(sprintf("degree=%s\n", degree))
+  cat(sprintf("raw=%s\n", raw))
+  
+  args <- list(x=x, degree=degree, raw=raw)
+
+  result <- do.call(poly, args)
+  
+  cat("output=")
+  pyprint(result)
+  cat("--END TEST CASE--\n")
+  assign("num.tests", num.tests + 1, envir=.GlobalEnv)
+}
+
+cat("R_poly_test_x = ")
+pyprint(x)
+cat("R_poly_test_data = \"\"\"\n")
+
+for (degree in c(1, 3, 5)) {
+  for (raw in c(TRUE, FALSE)) {
+    dump.poly(degree, raw)
+  }
+}
+cat("\"\"\"\n")
+cat(sprintf("R_poly_num_tests = %s\n", num.tests))

From 35e092312a2e8df13dfce71b75df3f6a6ac24d96 Mon Sep 17 00:00:00 2001
From: thequackdaddy <pquack@gmail.com>
Date: Tue, 25 Oct 2016 22:23:34 -0500
Subject: [PATCH 2/6] Use poly to calculate poly contrast. Add ability to use
 standarize in addition to qr decomposition. Added all numpy polynomial types.

remove poly
---
 doc/API-reference.rst             |   4 +-
 patsy/__init__.py                 |   4 +-
 patsy/contrasts.py                |   9 +-
 patsy/{poly.py => polynomials.py} | 157 ++++++++++++++++++++++--------
 patsy/test_poly_data.py           |   2 +-
 tools/get-R-poly-test-vectors.R   |   2 +-
 6 files changed, 125 insertions(+), 53 deletions(-)
 rename patsy/{poly.py => polynomials.py} (50%)

diff --git a/doc/API-reference.rst b/doc/API-reference.rst
index 07a80b9..1d3ada9 100644
--- a/doc/API-reference.rst
+++ b/doc/API-reference.rst
@@ -198,8 +198,8 @@ Spline regression
 .. autofunction:: cc
 .. autofunction:: te
 
-Orthogonal Polynomial
----------------------
+Polynomial
+----------
 
 .. autofunction:: poly
 
diff --git a/patsy/__init__.py b/patsy/__init__.py
index 6a37b0a..a4ec751 100644
--- a/patsy/__init__.py
+++ b/patsy/__init__.py
@@ -113,8 +113,8 @@ def _reexport(mod):
 import patsy.mgcv_cubic_splines
 _reexport(patsy.mgcv_cubic_splines)
 
-import patsy.poly
-_reexport(patsy.poly)
+import patsy.polynomials
+_reexport(patsy.polynomials)
 
 # XX FIXME: we aren't exporting any of the explicit parsing interface
 # yet. Need to figure out how to do that.
diff --git a/patsy/contrasts.py b/patsy/contrasts.py
index 3f1cf54..469aac0 100644
--- a/patsy/contrasts.py
+++ b/patsy/contrasts.py
@@ -17,6 +17,7 @@
 from patsy.util import (repr_pretty_delegate, repr_pretty_impl,
                         safe_issubdtype,
                         no_pickling, assert_no_pickling)
+from patsy.polynomials import Poly as Polynomial
 
 class ContrastMatrix(object):
     """A simple container for a matrix used for coding categorical factors.
@@ -263,11 +264,9 @@ def _code_either(self, intercept, levels):
         # quadratic, etc., functions of the raw scores, and then use 'qr' to
         # orthogonalize each column against those to its left.
         scores -= scores.mean()
-        raw_poly = scores.reshape((-1, 1)) ** np.arange(n).reshape((1, -1))
-        q, r = np.linalg.qr(raw_poly)
-        q *= np.sign(np.diag(r))
-        q /= np.sqrt(np.sum(q ** 2, axis=1))
-        # The constant term is always all 1's -- we don't normalize it.
+        raw_poly = Polynomial.vander(scores, n - 1, 'poly')
+        alpha, norm, beta = Polynomial.gen_qr(raw_poly, n - 1)
+        q = Polynomial.apply_qr(raw_poly, n - 1, alpha, norm, beta)
         q[:, 0] = 1
         names = [".Constant", ".Linear", ".Quadratic", ".Cubic"]
         names += ["^%s" % (i,) for i in range(4, n)]
diff --git a/patsy/poly.py b/patsy/polynomials.py
similarity index 50%
rename from patsy/poly.py
rename to patsy/polynomials.py
index a20402b..56ca9b5 100644
--- a/patsy/poly.py
+++ b/patsy/polynomials.py
@@ -16,7 +16,7 @@
     import pandas
 
 class Poly(object):
-    """poly(x, degree=1, raw=False)
+    """poly(x, degree=3, polytype='poly', raw=False, scaler=None)
 
     Generates an orthogonal polynomial transformation of x of degree.
     Generic usage is something along the lines of::
@@ -26,19 +26,29 @@ class Poly(object):
     to fit ``y`` as a function of ``x``, with a 4th degree polynomial.
 
     :arg degree: The number of degrees for the polynomial expansion.
+    :arg polytype: Either poly (the default), legendre, laguerre, hermite, or
+      hermanite_e.
     :arg raw: When raw is False (the default), will return orthogonal
       polynomials.
+    :arg scaler: Choice of 'qr' (default when raw=False) for QR-
+      decomposition or 'standardize'. 
 
     .. versionadded:: 0.4.1
     """
     def __init__(self):
         self._tmp = {}
-        self._degree = None
-        self._raw = None
 
-    def memorize_chunk(self, x, degree=3, raw=False):
+    def memorize_chunk(self, x, degree=3, polytype='poly', raw=False,
+                       scaler=None):
+        if not raw and (scaler is None):
+            scaler = 'qr'
+        if scaler not in ('qr', 'standardize', None):
+            raise ValueError('input to \'scaler\' %s is not a valid '
+                             'scaling technique' % scaler)
         args = {"degree": degree,
-                "raw": raw
+                "raw": raw,
+                "scaler": scaler,
+                'polytype': polytype
                 }
         self._tmp["args"] = args
         # XX: check whether we need x values before saving them
@@ -63,35 +73,27 @@ def memorize_finish(self):
                              % (args["degree"],))
         if int(args["degree"]) != args["degree"]:
             raise ValueError("degree must be an integer (not %r)"
-                             % (self._degree,))
+                             % (args['degree'],))
 
         # These are guaranteed to all be 1d vectors by the code above
         scores = np.concatenate(tmp["xs"])
-        scores_mean = scores.mean()
-        # scores -= scores_mean
-        self.scores_mean = scores_mean
+
         n = args['degree']
         self.degree = n
-        raw_poly = scores.reshape((-1, 1)) ** np.arange(n + 1).reshape((1, -1))
-        raw = args['raw']
-        self.raw = raw
-        if not raw:
-            q, r = np.linalg.qr(raw_poly)
-            # Q is now orthognoal of degree n. To match what R is doing, we
-            # need to use the three-term recurrence technique to calculate
-            # new alpha, beta, and norm.
-
-            self.alpha = (np.sum(scores.reshape((-1, 1)) * q[:, :n] ** 2,
-                                 axis=0) /
-                          np.sum(q[:, :n] ** 2, axis=0))
-
-            # For reasons I don't understand, the norms R uses are based off
-            # of the diagonal of the r upper triangular matrix.
-
-            self.norm = np.linalg.norm(q * np.diag(r), axis=0)
-            self.beta = (self.norm[1:] / self.norm[:n]) ** 2
-
-    def transform(self, x, degree=3, raw=False):
+        self.scaler = args['scaler']
+        self.raw = args['raw']
+        self.polytype = args['polytype']
+
+        if self.scaler is not None:
+            raw_poly = self.vander(scores, n, self.polytype)
+
+        if self.scaler == 'qr':
+            self.alpha, self.norm, self.beta = self.gen_qr(raw_poly, n)
+
+        if self.scaler == 'standardize':
+            self.mean, self.var = self.gen_standardize(raw_poly)
+
+    def transform(self, x, degree=3, polytype='poly', raw=False, scaler=None):
         if have_pandas:
             if isinstance(x, (pandas.Series, pandas.DataFrame)):
                 to_pandas = True
@@ -102,21 +104,14 @@ def transform(self, x, degree=3, raw=False):
             to_pandas = False
         x = np.array(x, ndmin=1).flatten()
 
-        if self.raw:
-            n = self.degree
-            p = x.reshape((-1, 1)) ** np.arange(n + 1).reshape((1, -1))
-        else:
-            # This is where the three-term recurrance technique is unwound.
+        n = self.degree
+        p = self.vander(x, n, self.polytype)
 
-            p = np.empty((x.shape[0], self.degree + 1))
-            p[:, 0] = 1
+        if self.scaler == 'qr':
+            p = self.apply_qr(p, n, self.alpha, self.norm, self.beta)
 
-            for i in np.arange(self.degree):
-                p[:, i + 1] = (x - self.alpha[i]) * p[:, i]
-                if i > 0:
-                    p[:, i + 1] = (p[:, i + 1] -
-                                   (self.beta[i - 1] * p[:, i - 1]))
-            p /= self.norm
+        if self.scaler == 'standardize':
+            p = self.apply_standardize(p, self.mean, self.var)
 
         p = p[:, 1:]
         if to_pandas:
@@ -124,6 +119,60 @@ def transform(self, x, degree=3, raw=False):
             p.index = idx
         return p
 
+    @staticmethod
+    def vander(x, n, polytype):
+        v_func = {'poly': np.polynomial.polynomial.polyvander,
+                  'cheb': np.polynomial.chebyshev.chebvander,
+                  'legendre': np.polynomial.legendre.legvander,
+                  'laguerre': np.polynomial.laguerre.lagvander,
+                  'hermite': np.polynomial.hermite.hermvander,
+                  'hermite_e': np.polynomial.hermite_e.hermevander}
+        raw_poly = v_func[polytype](x, n)
+        return raw_poly
+
+    @staticmethod
+    def gen_qr(raw_poly, n):
+        # Q is now orthognoal of degree n. To match what R is doing, we
+        # need to use the three-term recurrence technique to calculate
+        # new alpha, beta, and norm.
+        x = raw_poly[:, 1]
+        q, r = np.linalg.qr(raw_poly)
+        alpha = (np.sum(x.reshape((-1, 1)) * q[:, :n] ** 2, axis=0) /
+                 np.sum(q[:, :n] ** 2, axis=0))
+
+        # For reasons I don't understand, the norms R uses are based off
+        # of the diagonal of the r upper triangular matrix.
+
+        norm = np.linalg.norm(q * np.diag(r), axis=0)
+        beta = (norm[1:] / norm[:n]) ** 2
+        return alpha, norm, beta
+
+    @staticmethod
+    def gen_standardize(raw_poly):
+        return raw_poly.mean(axis=0), raw_poly.var(axis=0)
+
+    @staticmethod
+    def apply_qr(x, n, alpha, norm, beta):
+        # This is where the three-term recurrence is unwound for the QR
+        # decomposition.
+        if np.ndim(x) == 2:
+            x = x[:, 1]
+        p = np.empty((x.shape[0], n + 1))
+        p[:, 0] = 1
+
+        for i in np.arange(n):
+            p[:, i + 1] = (x - alpha[i]) * p[:, i]
+            if i > 0:
+                p[:, i + 1] = (p[:, i + 1] - (beta[i - 1] * p[:, i - 1]))
+        p /= norm
+        return p
+
+    @staticmethod
+    def apply_standardize(x, mean, var):
+        x[:, 1:] = ((x[:, 1:] - mean[1:]) / (var[1:] ** 0.5))
+        return x
+        
+
     __getstate__ = no_pickling
 
 poly = stateful_transform(Poly)
@@ -166,6 +215,24 @@ def test_poly_compat():
         start_idx = stop_idx + 1
     assert tests_ran == R_poly_num_tests
 
+def test_poly_smoke():
+    # Test that standardized values match.
+    x = np.arange(27)
+    vanders = ['poly', 'cheb', 'legendre', 'laguerre', 'hermite', 'hermite_e']
+    scalers = ['raw', 'qr', 'standardize']
+    for v in vanders:
+        p1 = poly(x, polytype=v, scaler='standardize')
+        p2 = poly(x, polytype=v, raw=True)
+        p2 = (p2 - p2.mean(axis=0)) / p2.std(axis=0)
+        np.testing.assert_allclose(p1, p2)
+
+    # Don't have tests for all this... so just make sure it works.
+    for v in vanders:
+        for s in scalers:
+            if s == 'raw':
+                poly(x, raw=True, polytype=v)
+            else:
+                poly(x, scaler=s, polytype=v)
 
 def test_poly_errors():
     from nose.tools import assert_raises
@@ -177,3 +244,9 @@ def test_poly_errors():
     assert_raises(ValueError, poly, x, degree=-1)
     assert_raises(ValueError, poly, x, degree=0)
     assert_raises(ValueError, poly, x, degree=3.5)
+
+    #Invalid Poly Type
+    assert_raises(KeyError, poly, x, polytype='foo')
+
+    #Invalid scaling type
+    assert_raises(ValueError, poly, x, scaler='bar')
diff --git a/patsy/test_poly_data.py b/patsy/test_poly_data.py
index cb4481b..668ee28 100644
--- a/patsy/test_poly_data.py
+++ b/patsy/test_poly_data.py
@@ -1,4 +1,4 @@
-# This file auto-generated by tools/get-R-bs-test-vectors.R
+# This file auto-generated by tools/get-R-poly-test-vectors.R
 # Using: R version 3.2.4 Revised (2016-03-16 r70336)
 import numpy as np
 R_poly_test_x = np.array([1, 1.5, 2.25, 3.375, 5.0625, 7.59375, 11.390625, 17.0859375, 25.62890625, 38.443359375, 57.6650390625, 86.49755859375, 129.746337890625, 194.6195068359375, 291.92926025390625, 437.89389038085938, 656.84083557128906, 985.26125335693359, 1477.8918800354004, 2216.8378200531006, ])
diff --git a/tools/get-R-poly-test-vectors.R b/tools/get-R-poly-test-vectors.R
index d97c84e..68e9678 100644
--- a/tools/get-R-poly-test-vectors.R
+++ b/tools/get-R-poly-test-vectors.R
@@ -1,4 +1,4 @@
-cat("# This file auto-generated by tools/get-R-bs-test-vectors.R\n")
+cat("# This file auto-generated by tools/get-R-poly-test-vectors.R\n")
 cat(sprintf("# Using: %s\n", R.Version()$version.string))
 cat("import numpy as np\n")
 

From becad9a7a94726c014c43f6dcd4e2f8289a0307b Mon Sep 17 00:00:00 2001
From: thequackdaddy <pquack@gmail.com>
Date: Thu, 3 Nov 2016 19:54:06 -0500
Subject: [PATCH 3/6] Removing other polytypes, standardize procedure.

---
 patsy/contrasts.py   |  2 +-
 patsy/polynomials.py | 84 ++++++++------------------------------------
 2 files changed, 15 insertions(+), 71 deletions(-)

diff --git a/patsy/contrasts.py b/patsy/contrasts.py
index 469aac0..010afb3 100644
--- a/patsy/contrasts.py
+++ b/patsy/contrasts.py
@@ -264,7 +264,7 @@ def _code_either(self, intercept, levels):
         # quadratic, etc., functions of the raw scores, and then use 'qr' to
         # orthogonalize each column against those to its left.
         scores -= scores.mean()
-        raw_poly = Polynomial.vander(scores, n - 1, 'poly')
+        raw_poly = Polynomial.vander(scores, n - 1)
         alpha, norm, beta = Polynomial.gen_qr(raw_poly, n - 1)
         q = Polynomial.apply_qr(raw_poly, n - 1, alpha, norm, beta)
         q[:, 0] = 1
diff --git a/patsy/polynomials.py b/patsy/polynomials.py
index 56ca9b5..9137864 100644
--- a/patsy/polynomials.py
+++ b/patsy/polynomials.py
@@ -5,18 +5,19 @@
 # R-compatible poly function
 
 # These are made available in the patsy.* namespace
-__all__ = ["poly"]
-
 import numpy as np
 
 from patsy.util import have_pandas, no_pickling, assert_no_pickling
 from patsy.state import stateful_transform
 
+__all__ = ["poly"]
+
 if have_pandas:
     import pandas
 
+
 class Poly(object):
-    """poly(x, degree=3, polytype='poly', raw=False, scaler=None)
+    """poly(x, degree=3, raw=False)
 
     Generates an orthogonal polynomial transformation of x of degree.
     Generic usage is something along the lines of::
@@ -26,29 +27,17 @@ class Poly(object):
     to fit ``y`` as a function of ``x``, with a 4th degree polynomial.
 
     :arg degree: The number of degrees for the polynomial expansion.
-    :arg polytype: Either poly (the default), legendre, laguerre, hermite, or
-      hermanite_e.
     :arg raw: When raw is False (the default), will return orthogonal
       polynomials.
-    :arg scaler: Choice of 'qr' (default when raw=False) for QR-
-      decomposition or 'standardize'. 
 
     .. versionadded:: 0.4.1
     """
     def __init__(self):
         self._tmp = {}
 
-    def memorize_chunk(self, x, degree=3, polytype='poly', raw=False,
-                       scaler=None):
-        if not raw and (scaler is None):
-            scaler = 'qr'
-        if scaler not in ('qr', 'standardize', None):
-            raise ValueError('input to \'scaler\' %s is not a valid '
-                             'scaling technique' % scaler)
+    def memorize_chunk(self, x, degree=3, raw=False):
         args = {"degree": degree,
-                "raw": raw,
-                "scaler": scaler,
-                'polytype': polytype
+                "raw": raw
                 }
         self._tmp["args"] = args
         # XX: check whether we need x values before saving them
@@ -80,19 +69,12 @@ def memorize_finish(self):
 
         n = args['degree']
         self.degree = n
-        self.scaler = args['scaler']
         self.raw = args['raw']
-        self.polytype = args['polytype']
-
-        if self.scaler is not None:
-            raw_poly = self.vander(scores, n, self.polytype)
 
-        if self.scaler == 'qr':
+        if not self.raw:
+            raw_poly = self.vander(scores, n)
             self.alpha, self.norm, self.beta = self.gen_qr(raw_poly, n)
 
-        if self.scaler == 'standardize':
-            self.mean, self.var = self.gen_standardize(raw_poly)
-
     def transform(self, x, degree=3, polytype='poly', raw=False, scaler=None):
         if have_pandas:
             if isinstance(x, (pandas.Series, pandas.DataFrame)):
@@ -120,23 +102,17 @@ def transform(self, x, degree=3, polytype='poly', raw=False, scaler=None):
         return p
 
     @staticmethod
-    def vander(x, n, polytype):
-        v_func = {'poly': np.polynomial.polynomial.polyvander,
-                  'cheb': np.polynomial.chebyshev.chebvander,
-                  'legendre': np.polynomial.legendre.legvander,
-                  'laguerre': np.polynomial.laguerre.lagvander,
-                  'hermite': np.polynomial.hermite.hermvander,
-                  'hermite_e': np.polynomial.hermite_e.hermevander}
-        raw_poly = v_func[polytype](x, n)
+    def vander(x, n):
+        raw_poly = np.polynomial.polynomial.polyvander(x, n)
         return raw_poly
 
     @staticmethod
     def gen_qr(raw_poly, n):
+        x = raw_poly[:, 1]
+        q, r = np.linalg.qr(raw_poly)
         # Q is now orthognoal of degree n. To match what R is doing, we
         # need to use the three-term recurrence technique to calculate
         # new alpha, beta, and norm.
-        x = raw_poly[:, 1]
-        q, r = np.linalg.qr(raw_poly)
         alpha = (np.sum(x.reshape((-1, 1)) * q[:, :n] ** 2, axis=0) /
                  np.sum(q[:, :n] ** 2, axis=0))
 
@@ -147,10 +123,6 @@ def gen_qr(raw_poly, n):
         beta = (norm[1:] / norm[:n]) ** 2
         return alpha, norm, beta
 
-    @staticmethod
-    def gen_standardize(raw_poly):
-        return raw_poly.mean(axis=0), raw_poly.var(axis=0)
-
     @staticmethod
     def apply_qr(x, n, alpha, norm, beta):
         # This is where the three-term recurrence is unwound for the QR
@@ -166,13 +138,6 @@ def apply_qr(x, n, alpha, norm, beta):
                 p[:, i + 1] = (p[:, i + 1] - (beta[i - 1] * p[:, i - 1]))
         p /= norm
         return p
-
-    @staticmethod
-    def apply_standardize(x, mean, var):
-        x[:, 1:] = ((x[:, 1:] - mean[1:]) / (var[1:] ** 0.5))
-        return x
-        
-
     __getstate__ = no_pickling
 
 poly = stateful_transform(Poly)
@@ -215,24 +180,6 @@ def test_poly_compat():
         start_idx = stop_idx + 1
     assert tests_ran == R_poly_num_tests
 
-def test_poly_smoke():
-    # Test that standardized values match.
-    x = np.arange(27)
-    vanders = ['poly', 'cheb', 'legendre', 'laguerre', 'hermite', 'hermite_e']
-    scalers = ['raw', 'qr', 'standardize']
-    for v in vanders:
-        p1 = poly(x, polytype=v, scaler='standardize')
-        p2 = poly(x, polytype=v, raw=True)
-        p2 = (p2 - p2.mean(axis=0)) / p2.std(axis=0)
-        np.testing.assert_allclose(p1, p2)
-
-    # Don't have tests for all this... so just make sure it works.
-    for v in vanders:
-        for s in scalers:
-            if s == 'raw':
-                poly(x, raw=True, polytype=v)
-            else:
-                poly(x, scaler=s, polytype=v)
 
 def test_poly_errors():
     from nose.tools import assert_raises
@@ -245,8 +192,5 @@ def test_poly_errors():
     assert_raises(ValueError, poly, x, degree=0)
     assert_raises(ValueError, poly, x, degree=3.5)
 
-    #Invalid Poly Type
-    assert_raises(KeyError, poly, x, polytype='foo')
-
-    #Invalid scaling type
-    assert_raises(ValueError, poly, x, scaler='bar')
+    p = poly(np.arange(1, 10), degree=3)
+    assert_no_pickling(p)

From 0cd4299024101885a3c414a2c0489ec6755b264a Mon Sep 17 00:00:00 2001
From: thequackdaddy <pquack@gmail.com>
Date: Thu, 3 Nov 2016 20:03:39 -0500
Subject: [PATCH 4/6] Missed a few other places where polytype/scaler were.

---
 patsy/polynomials.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/patsy/polynomials.py b/patsy/polynomials.py
index 9137864..25d18f7 100644
--- a/patsy/polynomials.py
+++ b/patsy/polynomials.py
@@ -75,7 +75,7 @@ def memorize_finish(self):
             raw_poly = self.vander(scores, n)
             self.alpha, self.norm, self.beta = self.gen_qr(raw_poly, n)
 
-    def transform(self, x, degree=3, polytype='poly', raw=False, scaler=None):
+    def transform(self, x, degree=3, raw=False):
         if have_pandas:
             if isinstance(x, (pandas.Series, pandas.DataFrame)):
                 to_pandas = True
@@ -87,14 +87,11 @@ def transform(self, x, degree=3, polytype='poly', raw=False, scaler=None):
         x = np.array(x, ndmin=1).flatten()
 
         n = self.degree
-        p = self.vander(x, n, self.polytype)
+        p = self.vander(x, n)
 
-        if self.scaler == 'qr':
+        if not self.raw:
             p = self.apply_qr(p, n, self.alpha, self.norm, self.beta)
 
-        if self.scaler == 'standardize':
-            p = self.apply_standardize(p, self.mean, self.var)
-
         p = p[:, 1:]
         if to_pandas:
             p = pandas.DataFrame(p)

From 0cb862bd0cc57140e8c67b97ca781bd699667bb2 Mon Sep 17 00:00:00 2001
From: thequackdaddy <pquack@gmail.com>
Date: Thu, 3 Nov 2016 20:18:04 -0500
Subject: [PATCH 5/6] assert_no_pickling again

---
 patsy/polynomials.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/patsy/polynomials.py b/patsy/polynomials.py
index 25d18f7..85a35d1 100644
--- a/patsy/polynomials.py
+++ b/patsy/polynomials.py
@@ -189,5 +189,4 @@ def test_poly_errors():
     assert_raises(ValueError, poly, x, degree=0)
     assert_raises(ValueError, poly, x, degree=3.5)
 
-    p = poly(np.arange(1, 10), degree=3)
-    assert_no_pickling(p)
+    assert_no_pickling(Poly())

From 4d107c634ee13d2392c85ab5ddb8c6a390b5ba94 Mon Sep 17 00:00:00 2001
From: thequackdaddy <pquack@gmail.com>
Date: Sat, 3 Nov 2018 19:55:14 -0500
Subject: [PATCH 6/6] Increase test coverage

---
 patsy/polynomials.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/patsy/polynomials.py b/patsy/polynomials.py
index 85a35d1..f8d54c2 100644
--- a/patsy/polynomials.py
+++ b/patsy/polynomials.py
@@ -32,6 +32,7 @@ class Poly(object):
 
     .. versionadded:: 0.4.1
     """
+
     def __init__(self):
         self._tmp = {}
 
@@ -110,8 +111,8 @@ def gen_qr(raw_poly, n):
         # Q is now orthognoal of degree n. To match what R is doing, we
         # need to use the three-term recurrence technique to calculate
         # new alpha, beta, and norm.
-        alpha = (np.sum(x.reshape((-1, 1)) * q[:, :n] ** 2, axis=0) /
-                 np.sum(q[:, :n] ** 2, axis=0))
+        alpha = (np.sum(x.reshape((-1, 1)) * q[:, :n] ** 2, axis=0)
+                 / np.sum(q[:, :n] ** 2, axis=0))
 
         # For reasons I don't understand, the norms R uses are based off
         # of the diagonal of the r upper triangular matrix.
@@ -137,6 +138,7 @@ def apply_qr(x, n, alpha, norm, beta):
         return p
     __getstate__ = no_pickling
 
+
 poly = stateful_transform(Poly)
 
 
@@ -145,6 +147,8 @@ def test_poly_compat():
     from patsy.test_poly_data import (R_poly_test_x,
                                       R_poly_test_data,
                                       R_poly_num_tests)
+    from numpy.testing import assert_allclose
+
     lines = R_poly_test_data.split("\n")
     tests_ran = 0
     start_idx = lines.index("--BEGIN TEST CASE--")
@@ -172,6 +176,14 @@ def test_poly_compat():
         output = np.asarray(eval(test_data["output"]))
         # Do the actual test
         check_stateful(Poly, False, R_poly_test_x, output, **kwargs)
+        raw_poly = Poly.vander(R_poly_test_x, kwargs['degree'])
+        if kwargs['raw']:
+            actual = raw_poly[:, 1:]
+        else:
+            alpha, norm, beta = Poly.gen_qr(raw_poly, kwargs['degree'])
+            actual = Poly.apply_qr(R_poly_test_x, kwargs['degree'], alpha,
+                                   norm, beta)[:, 1:]
+        assert_allclose(actual, output)
         tests_ran += 1
         # Set up for the next one
         start_idx = stop_idx + 1