Skip to content

Commit a4e2862

Browse files
committed
Pass both a Text and a Matrix instance into Skimmer#build().
1 parent eb89154 commit a4e2862

File tree

5 files changed

+20
-36
lines changed

5 files changed

+20
-36
lines changed

test/matrix/test_anchored_pairs.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22

33
from textplot.text import Text
4-
from textplot.matrix import TextMatrix
4+
from textplot.matrix import Matrix
55

66

77
def test_anchored_pairs():
@@ -12,9 +12,9 @@ def test_anchored_pairs():
1212
"""
1313

1414
t = Text('aa bb cc dd')
15-
m = TextMatrix(t)
15+
m = Matrix()
1616

17-
m.index()
17+
m.index(t)
1818

1919
pairs = m.anchored_pairs('aa')
2020

test/matrix/test_index.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22

33
from textplot.text import Text
4-
from textplot.matrix import TextMatrix
4+
from textplot.matrix import Matrix
55

66

77
def test_index():
@@ -11,9 +11,9 @@ def test_index():
1111
"""
1212

1313
t = Text('aa bb cc')
14-
m = TextMatrix(t)
14+
m = Matrix()
1515

16-
m.index()
16+
m.index(t)
1717

1818
assert m.get_pair('aa', 'bb') == t.score_braycurtis('aa', 'bb')
1919
assert m.get_pair('aa', 'cc') == t.score_braycurtis('aa', 'cc')
@@ -27,9 +27,9 @@ def test_term_subset():
2727
"""
2828

2929
t = Text('aa bb cc')
30-
m = TextMatrix(t)
30+
m = Matrix()
3131

32-
m.index(['aa', 'bb'])
32+
m.index(t, ['aa', 'bb'])
3333

3434
# Should index 'aa' and 'bb'.
3535
assert m.get_pair('aa', 'bb') == t.score_braycurtis('aa', 'bb')

textplot/__init__.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11

22

33
from textplot.text import Text
4-
from textplot.matrix import TextMatrix
54
from textplot.graphs import Skimmer
5+
from textplot.matrix import Matrix
66

77

88
def frequent(path, term_depth=500, skim_depth=10, d_weights=False, **kwargs):
@@ -13,14 +13,14 @@ def frequent(path, term_depth=500, skim_depth=10, d_weights=False, **kwargs):
1313

1414
print('Tokenizing text...')
1515
t = Text.from_file(path)
16-
m = TextMatrix(t)
16+
m = Matrix()
1717

1818
print('Indexing terms:')
19-
m.index(t.most_frequent_terms(term_depth), **kwargs)
19+
m.index(t, t.most_frequent_terms(term_depth), **kwargs)
2020

2121
g = Skimmer()
2222

2323
print('Generating graph:')
24-
g.build(m, skim_depth, d_weights)
24+
g.build(t, m, skim_depth, d_weights)
2525

2626
return g

textplot/graphs.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import networkx as nx
44
import matplotlib.pyplot as plt
55

6-
from textplot.matrix import TextMatrix
76
from abc import ABCMeta, abstractmethod
87
from clint.textui.progress import bar
98

@@ -70,7 +69,7 @@ def write_graphml(self, path):
7069
class Skimmer(Graph):
7170

7271

73-
def build(self, matrix, skim_depth=10, d_weights=False):
72+
def build(self, text, matrix, skim_depth=10, d_weights=False):
7473

7574
"""
7675
1. For each term in the passed matrix, score its KDE similarity with
@@ -80,14 +79,15 @@ def build(self, matrix, skim_depth=10, d_weights=False):
8079
pairs and add them as edges.
8180
8281
Args:
83-
matrix (TextMatrix): An indexed term matrix.
82+
text (Text): The source text instance.
83+
matrix (Matrix): An indexed term matrix.
8484
skim_depth (int): The number of siblings for each term.
8585
d_weights (bool): If true, give "close" words low edge weights.
8686
"""
8787

8888
for anchor in bar(matrix.keys):
8989

90-
n1 = matrix.text.unstem(anchor)
90+
n1 = text.unstem(anchor)
9191

9292
# Heaviest pair scores:
9393
pairs = matrix.anchored_pairs(anchor).items()
@@ -97,7 +97,7 @@ def build(self, matrix, skim_depth=10, d_weights=False):
9797
# score, so that similar words are connected by "short" edges.
9898
if d_weights: weight = 1-weight
9999

100-
n2 = matrix.text.unstem(term)
100+
n2 = text.unstem(term)
101101

102102
# NetworkX does not handle numpy types when writing graphml,
103103
# so we cast the weight to a regular float.

textplot/matrix.py

+3-19
Original file line numberDiff line numberDiff line change
@@ -80,23 +80,7 @@ def get_pair(self, term1, term2):
8080
return self.pairs.get(key, None)
8181

8282

83-
class TextMatrix(Matrix):
84-
85-
86-
def __init__(self, text):
87-
88-
"""
89-
Set the source text instance.
90-
91-
Args:
92-
text (Text): The source text.
93-
"""
94-
95-
super().__init__()
96-
self.text = text
97-
98-
99-
def index(self, terms=None, **kwargs):
83+
def index(self, text, terms=None, **kwargs):
10084

10185
"""
10286
Index all term pair distances.
@@ -109,15 +93,15 @@ def index(self, terms=None, **kwargs):
10993
self.clear()
11094

11195
# By default, use all terms.
112-
terms = terms or self.text.terms.keys()
96+
terms = terms or text.terms.keys()
11397

11498
pairs = combinations(terms, 2)
11599
count = comb(len(terms), 2)
116100

117101
for t1, t2 in bar(pairs, expected_size=count, every=1000):
118102

119103
# Set the Bray-Curtis distance.
120-
score = self.text.score_braycurtis(t1, t2, **kwargs)
104+
score = text.score_braycurtis(t1, t2, **kwargs)
121105
self.set_pair(t1, t2, score)
122106

123107

0 commit comments

Comments
 (0)