Pass both a Text and a Matrix instance into Skimmer#build().

davidmcclure · davidmcclure · commit a4e2862ddf70 · 2015-04-29T19:44:15.000-07:00
diff --git a/test/matrix/test_anchored_pairs.py b/test/matrix/test_anchored_pairs.py
@@ -1,7 +1,7 @@
 
 
 from textplot.text import Text
-from textplot.matrix import TextMatrix
+from textplot.matrix import Matrix
 
 
 def test_anchored_pairs():
@@ -12,9 +12,9 @@ def test_anchored_pairs():
     """
 
     t = Text('aa bb cc dd')
-    m = TextMatrix(t)
+    m = Matrix()
 
-    m.index()
+    m.index(t)
 
     pairs = m.anchored_pairs('aa')
 
diff --git a/test/matrix/test_index.py b/test/matrix/test_index.py
@@ -1,7 +1,7 @@
 
 
 from textplot.text import Text
-from textplot.matrix import TextMatrix
+from textplot.matrix import Matrix
 
 
 def test_index():
@@ -11,9 +11,9 @@ def test_index():
     """
 
     t = Text('aa bb cc')
-    m = TextMatrix(t)
+    m = Matrix()
 
-    m.index()
+    m.index(t)
 
     assert m.get_pair('aa', 'bb') == t.score_braycurtis('aa', 'bb')
     assert m.get_pair('aa', 'cc') == t.score_braycurtis('aa', 'cc')
@@ -27,9 +27,9 @@ def test_term_subset():
     """
 
     t = Text('aa bb cc')
-    m = TextMatrix(t)
+    m = Matrix()
 
-    m.index(['aa', 'bb'])
+    m.index(t, ['aa', 'bb'])
 
     # Should index 'aa' and 'bb'.
     assert m.get_pair('aa', 'bb') == t.score_braycurtis('aa', 'bb')
diff --git a/textplot/__init__.py b/textplot/__init__.py
@@ -1,8 +1,8 @@
 
 
 from textplot.text import Text
-from textplot.matrix import TextMatrix
 from textplot.graphs import Skimmer
+from textplot.matrix import Matrix
 
 
 def frequent(path, term_depth=500, skim_depth=10, d_weights=False, **kwargs):
@@ -13,14 +13,14 @@ def frequent(path, term_depth=500, skim_depth=10, d_weights=False, **kwargs):
 
     print('Tokenizing text...')
     t = Text.from_file(path)
-    m = TextMatrix(t)
+    m = Matrix()
 
     print('Indexing terms:')
-    m.index(t.most_frequent_terms(term_depth), **kwargs)
+    m.index(t, t.most_frequent_terms(term_depth), **kwargs)
 
     g = Skimmer()
 
     print('Generating graph:')
-    g.build(m, skim_depth, d_weights)
+    g.build(t, m, skim_depth, d_weights)
 
     return g
diff --git a/textplot/graphs.py b/textplot/graphs.py
@@ -3,7 +3,6 @@
 import networkx as nx
 import matplotlib.pyplot as plt
 
-from textplot.matrix import TextMatrix
 from abc import ABCMeta, abstractmethod
 from clint.textui.progress import bar
 
@@ -70,7 +69,7 @@ def write_graphml(self, path):
 class Skimmer(Graph):
 
 
-    def build(self, matrix, skim_depth=10, d_weights=False):
+    def build(self, text, matrix, skim_depth=10, d_weights=False):
 
         """
         1. For each term in the passed matrix, score its KDE similarity with
@@ -80,14 +79,15 @@ def build(self, matrix, skim_depth=10, d_weights=False):
         pairs and add them as edges.
 
         Args:
-            matrix (TextMatrix): An indexed term matrix.
+            text (Text): The source text instance.
+            matrix (Matrix): An indexed term matrix.
             skim_depth (int): The number of siblings for each term.
             d_weights (bool): If true, give "close" words low edge weights.
         """
 
         for anchor in bar(matrix.keys):
 
-            n1 = matrix.text.unstem(anchor)
+            n1 = text.unstem(anchor)
 
             # Heaviest pair scores:
             pairs = matrix.anchored_pairs(anchor).items()
@@ -97,7 +97,7 @@ def build(self, matrix, skim_depth=10, d_weights=False):
                 # score, so that similar words are connected by "short" edges.
                 if d_weights: weight = 1-weight
 
-                n2 = matrix.text.unstem(term)
+                n2 = text.unstem(term)
 
                 # NetworkX does not handle numpy types when writing graphml,
                 # so we cast the weight to a regular float.
diff --git a/textplot/matrix.py b/textplot/matrix.py
@@ -80,23 +80,7 @@ def get_pair(self, term1, term2):
         return self.pairs.get(key, None)
 
 
-class TextMatrix(Matrix):
-
-
-    def __init__(self, text):
-
-        """
-        Set the source text instance.
-
-        Args:
-            text (Text): The source text.
-        """
-
-        super().__init__()
-        self.text = text
-
-
-    def index(self, terms=None, **kwargs):
+    def index(self, text, terms=None, **kwargs):
 
         """
         Index all term pair distances.
@@ -109,15 +93,15 @@ def index(self, terms=None, **kwargs):
         self.clear()
 
         # By default, use all terms.
-        terms = terms or self.text.terms.keys()
+        terms = terms or text.terms.keys()
 
         pairs = combinations(terms, 2)
         count = comb(len(terms), 2)
 
         for t1, t2 in bar(pairs, expected_size=count, every=1000):
 
             # Set the Bray-Curtis distance.
-            score = self.text.score_braycurtis(t1, t2, **kwargs)
+            score = text.score_braycurtis(t1, t2, **kwargs)
             self.set_pair(t1, t2, score)