vene · vene · Feb 10, 2019 · Feb 10, 2019 · Feb 10, 2019 · Feb 10, 2019
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,24 @@
+notifications:
+  email: false
+
+language: python
+
+matrix:
+    include:
+        - os: linux
+          python: 3.7
+          dist: xenial
+          sudo: true
+
+install:
+  - wget https://github.com/andre-martins/AD3/archive/2.2.1.tar.gz
+  - tar zxvf 2.2.1.tar.gz
+  - cd AD3-2.2.1; make; cd ..
+  - pip install pytest numpy ad3==2.2.1 cython
+  - pip install https://download.pytorch.org/whl/cpu/torch-1.0.1.post2-cp37-cp37m-linux_x86_64.whl
+  - AD3_DIR=AD3-2.2.1/ python setup.py bdist_wheel
+  - pip install --pre --no-index --find-links dist/ sparsemap
+
+script:
+  - echo "Running tests"
+  - mkdir empty_folder; cd empty_folder; pytest -vs --pyargs sparsemap; cd ..
diff --git a/README.md b/README.md
@@ -2,6 +2,8 @@
 
 ![SparseMAP cartoon](sparsemap.png?raw=true "SparseMAP cartoon")
 
+[![Build Status](https://travis-ci.org/vene/sparsemap.svg?branch=master)](https://travis-ci.org/vene/sparsemap)<Paste>
+
 SparseMAP is a new method for **sparse structured inference,**
 able to automatically select only a few global structures:
 it is  situated between MAP inference, which picks a single structure, 
@@ -29,28 +31,27 @@ to the `cpp` folder for an implementation, and see our paper,
 ## Current state of the codebase
 
 We are working to slowly provide useful implementations. At the moment,
-the codebase provides a generic pytorch layer supporting version 0.2,
-as well as particular instantiations for sequence, matching, and tree layers.
+the codebase provides a generic pytorch 1.0 layer, as well as particular 
+instantiations for sequence, matching, and tree layers.
 
 Dynet custom layers, as well as the SparseMAP loss, are on the way.
 
 
 ## Python Setup
 
-Requirements: numpy, scipy, Cython, pytorch=0.2, and ad3 >= 2.2
+Requirements: numpy, scipy, Cython, pytorch>=1.0, and ad3 >= 2.2
 
 1. Set the `AD3_DIR` environment variable to point to the
-   [AD3](https://github.com/andre-martins/ad3) source directory.
+   [AD3](https://github.com/andre-martins/ad3) source directory,
+   where you have compiled AD3.
 
-2. Inside the `python` dir, run  `python setup.py build_ext --inplace`.
+2. Run `pip install .` (optionally with the `-e` flag).
 
 
 ### Notes on testing
 
-The implemented layers pass numerical tests. However, the pytorch
-gradcheck (as of version 0.2) has a very strict "reentrant" test, which we fail
-due to tiny numerical differences. To reliably check gradients, please comment
-out the `if not reentrant: ...` part of pytorch's gradcheck.py.
+Because of slight numerical differences, we had to relax the reentrancy
+test from pytorch's gradcheck.
 
 ## Dynet (c++) setup:
 

diff --git a/python/setup.py b/python/setup.py
diff --git a/python/sparsemap/__init__.py b/python/sparsemap/__init__.py
@@ -1 +1,3 @@
 from ._sparsemap import sparsemap
+from . import layers_pt
+from . import fw_solver
diff --git a/python/sparsemap/_sparsemap.pyx b/python/sparsemap/_sparsemap.pyx
@@ -46,7 +46,7 @@ cpdef sparsemap(PGenericFactor f,
         vector[Configuration] active_set_c
         vector[double] distribution
         vector[double] inverse_A
-        vector[double] M, Madd
+        vector[double] M,N
 
         GenericFactor* gf
 
@@ -69,27 +69,27 @@ cpdef sparsemap(PGenericFactor f,
 
     active_set_c = gf.GetQPActiveSet()
     distribution = gf.GetQPDistribution()
-    inverse_A = gf.GetQPInvA()
-    gf.GetCorrespondence(&M, &Madd)
+    inverse = gf.GetQPInvA()
+    gf.GetCorrespondence(&M, &N)
 
     n_active = active_set_c.size()
     n_add = post_additionals.size()
 
     post_unaries_np = asfloatvec(post_unaries.data(), n_var)
     post_additionals_np = asfloatvec(post_additionals.data(), n_add)
     distribution_np = asfloatvec(distribution.data(), n_active)
-    invA_np = asfloatarray(inverse_A.data(), 1 + n_active, 1 + n_active)
+    inv_np = asfloatarray(inverse.data(), 1 + n_active, 1 + n_active)
     M_np = asfloatarray(M.data(), n_active, n_var)
-    Madd_np = asfloatarray(Madd.data(), n_active, n_add)
+    N_np = asfloatarray(N.data(), n_active, n_add)
 
     active_set_py = [f._cast_configuration(x) for x in active_set_c]
 
     solver_data = {
         'active_set': active_set_py,
         'distribution': distribution_np,
-        'inverse_A': invA_np,
+        'inverse': inv_np,
         'M': M_np,
-        'Madd': Madd_np
+        'N': N_np
     }
 
     return post_unaries_np, post_additionals_np, solver_data
diff --git a/python/sparsemap/fw_solver.py b/python/sparsemap/fw_solver.py
@@ -10,9 +10,7 @@
 from collections import defaultdict
 
 import numpy as np
-from numpy.testing import assert_allclose
 
-import pytest
 
 class SparseMAPFW(object):
 
@@ -213,145 +211,3 @@ def solve(self, eta_u, eta_v, full_path=False):
             return u, v, active_set, objs, size
         else:
             return u, v, active_set
-
-
-@pytest.mark.parametrize('variant', ('vanilla', 'pairwise', 'away-step'))
-def test_pairwise_factor(variant):
-
-    class PairwiseFactor(object):
-        """A factor with two binary variables and a coupling between them."""
-
-        def vertex(self, y):
-
-            # y is a tuple (0, 0), (0, 1), (1, 0) or (1, 1)
-            u = np.array(y, dtype=np.float)
-            v = np.atleast_1d(np.prod(u))
-            return u, v
-
-        def map_oracle(self, eta_u, eta_v):
-
-            best_score = -np.inf
-            best_y = None
-            for x1 in (0, 1):
-                for x2 in (0, 1):
-                    y = (x1, x2)
-                    u, v = self.vertex(y)
-
-                    score = np.dot(u, eta_u) + np.dot(v, eta_v)
-                    if score > best_score:
-                        best_score = score
-                        best_y = y
-            return best_y
-
-        def qp(self, eta_u, eta_v):
-            """Prop 6.5 in Andre Martins' thesis"""
-
-            c1, c2, c12 = eta_u[0], eta_u[1], eta_v[0]
-
-            flip_sign = False
-            if c12 < 0:
-                flip_sign = True
-                c1, c2, c12 = c1 + c12, 1 - c2, -c12
-
-            if c1 > c2 + c12:
-                u = [c1, c2 + c12]
-            elif c2 > c1 + c12:
-                u = [c1 + c12, c2]
-            else:
-                uu = (c1 + c2 + c12) / 2
-                u = [uu, uu]
-
-            u = np.clip(np.array(u), 0, 1)
-            v = np.atleast_1d(np.min(u))
-
-            if flip_sign:
-                u[1] = 1 - u[1]
-                v[0] = u[0] - v[0]
-
-            return u, v
-
-    pw = PairwiseFactor()
-    fw = SparseMAPFW(pw, max_iter=10000, tol=1e-12, variant=variant)
-
-    params = [
-        (np.array([0, 0]), np.array([0])),
-        (np.array([100, 0]), np.array([0])),
-        (np.array([0, 100]), np.array([0])),
-        (np.array([100, 0]), np.array([-100])),
-        (np.array([0, 100]), np.array([-100]))
-    ]
-
-    rng = np.random.RandomState(0)
-    for _ in range(20):
-        eta_u = rng.randn(2)
-        eta_v = rng.randn(1)
-        params.append((eta_u, eta_v))
-
-    for eta_u, eta_v in params:
-
-        u, v, active_set = fw.solve(eta_u, eta_v)
-        ustar, vstar = pw.qp(eta_u, eta_v)
-
-        uv = np.concatenate([u, v])
-        uvstar = np.concatenate([ustar, vstar])
-
-        assert_allclose(uv, uvstar, atol=1e-10)
-
-
-@pytest.mark.parametrize('variant', ('vanilla', 'pairwise', 'away-step'))
-@pytest.mark.parametrize('k', (1, 4, 20))
-def test_xor(variant, k):
-    class XORFactor(object):
-        """A one-of-K factor"""
-
-        def __init__(self, k):
-            self.k = k
-
-        def vertex(self, y):
-            # y is an integer between 0 and k-1
-            u = np.zeros(k)
-            u[y] = 1
-            v = np.array(())
-
-            return u, v
-
-        def map_oracle(self, eta_u, eta_v):
-            return np.argmax(eta_u)
-
-        def qp(self, eta_u, eta_v):
-            """Projection onto the simplex"""
-            z = 1
-            v = np.array(eta_u)
-            n_features = v.shape[0]
-            u = np.sort(v)[::-1]
-            cssv = np.cumsum(u) - z
-            ind = np.arange(n_features) + 1
-            cond = u - cssv / ind > 0
-            rho = ind[cond][-1]
-            theta = cssv[cond][-1] / float(rho)
-            uu = np.maximum(v - theta, 0)
-            vv = np.array(())
-            return uu, vv
-
-    xor = XORFactor(k)
-    fw = SparseMAPFW(xor, max_iter=10000, tol=1e-12, variant=variant)
-
-    params = [np.zeros(k), np.ones(k), np.full(k, -1)]
-
-    rng = np.random.RandomState(0)
-    for _ in range(20):
-        eta_u = rng.randn(k)
-        params.append(eta_u)
-
-    for eta_u in params:
-
-        # try different ways of supplying empty eta_v
-        for eta_v in (np.array(()), [], 0, None):
-
-            u, v, active_set = fw.solve(eta_u, eta_v)
-            ustar, vstar = xor.qp(eta_u, eta_v)
-
-            uv = np.concatenate([u, v])
-            uvstar = np.concatenate([ustar, vstar])
-
-            assert_allclose(uv, uvstar, atol=1e-10)
diff --git a/python/sparsemap/layers_pt/__init__.py b/python/sparsemap/layers_pt/__init__.py
@@ -0,0 +1,6 @@
+# from .matching_layer import MatchingSparseMarginals
+# from .seq_layer import SequenceSparseMarginals
+# from .seq_layer import SequenceDistanceSparseMarginals
+# from .seq_layer import StationarySequencePotentials
+# from .tree_layer import TreeSparseMarginals
+# from .tree_layer import TreeSparseMarginalsFast