Skip to content

Commit da998dd

Browse files
committed
Run modernizer on the code.
1 parent 07f6881 commit da998dd

24 files changed

+61
-27
lines changed

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from __future__ import absolute_import
12
from setuptools import setup, find_packages
23
from setuptools.command.install import install
34

talon/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from __future__ import absolute_import
12
from talon.quotations import register_xpath_extensions
23
try:
34
from talon import signature

talon/constants.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from __future__ import absolute_import
12
import regex as re
23

34

talon/html_quotations.py

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
messages (without quoted messages) from html
44
"""
55

6+
from __future__ import absolute_import
67
import regex as re
78

89

talon/quotations.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
original messages (without quoted messages)
66
"""
77

8+
from __future__ import absolute_import
89
import regex as re
910
import logging
1011
from copy import deepcopy
@@ -13,6 +14,7 @@
1314

1415
from talon.utils import get_delimiter, html_to_text
1516
from talon import html_quotations
17+
from six.moves import range
1618

1719

1820
log = logging.getLogger(__name__)
@@ -207,7 +209,7 @@ def mark_message_lines(lines):
207209
if splitter:
208210
# append as many splitter markers as lines in splitter
209211
splitter_lines = splitter.group().splitlines()
210-
for j in xrange(len(splitter_lines)):
212+
for j in range(len(splitter_lines)):
211213
markers[i + j] = 's'
212214

213215
# skip splitter lines
@@ -388,7 +390,7 @@ def extract_from_html(msg_body):
388390
lines_were_deleted, first_deleted, last_deleted = return_flags
389391
if lines_were_deleted:
390392
#collect checkpoints from deleted lines
391-
for i in xrange(first_deleted, last_deleted):
393+
for i in range(first_deleted, last_deleted):
392394
for checkpoint in line_checkpoints[i]:
393395
quotation_checkpoints[checkpoint] = True
394396
else:

talon/signature/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
* signature/data/classifier
2121
"""
2222

23+
from __future__ import absolute_import
2324
import os
2425

2526
from . import extraction

talon/signature/bruteforce.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from __future__ import absolute_import
12
import logging
23

34
import regex as re
@@ -111,7 +112,7 @@ def extract_signature(msg_body):
111112

112113
return (stripped_body.strip(),
113114
signature.strip())
114-
except Exception, e:
115+
except Exception as e:
115116
log.exception('ERROR extracting signature')
116117
return (msg_body, None)
117118

talon/signature/extraction.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3+
from __future__ import absolute_import
34
import logging
45

56
import regex as re

talon/signature/learning/classifier.py

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
body belongs to the signature.
66
"""
77

8+
from __future__ import absolute_import
89
from numpy import genfromtxt
910
from sklearn.svm import LinearSVC
1011
from sklearn.externals import joblib

talon/signature/learning/dataset.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,13 @@
1616
suffix which should be `_sender`.
1717
"""
1818

19+
from __future__ import absolute_import
1920
import os
2021
import regex as re
2122

2223
from talon.signature.constants import SIGNATURE_MAX_LINES
2324
from talon.signature.learning.featurespace import build_pattern, features
25+
from six.moves import range
2426

2527

2628
SENDER_SUFFIX = '_sender'
@@ -144,7 +146,7 @@ def build_extraction_dataset(folder, dataset_filename,
144146
if not sender or not msg:
145147
continue
146148
lines = msg.splitlines()
147-
for i in xrange(1, min(SIGNATURE_MAX_LINES,
149+
for i in range(1, min(SIGNATURE_MAX_LINES,
148150
len(lines)) + 1):
149151
line = lines[-i]
150152
label = -1

talon/signature/learning/featurespace.py

+3
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,12 @@
77
applying features to them.
88
"""
99

10+
from __future__ import absolute_import
1011
from talon.signature.constants import (SIGNATURE_MAX_LINES,
1112
TOO_LONG_SIGNATURE_LINE)
1213
from talon.signature.learning.helpers import *
14+
from six.moves import zip
15+
from functools import reduce
1316

1417

1518
def features(sender=''):

talon/signature/learning/helpers.py

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
77
"""
88

9+
from __future__ import absolute_import
910
import unicodedata
1011
import regex as re
1112

talon/utils.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# coding:utf-8
22

3+
from __future__ import absolute_import
34
import logging
45
from random import shuffle
56
import chardet
@@ -10,6 +11,7 @@
1011
from lxml.cssselect import CSSSelector
1112

1213
from talon.constants import RE_DELIMITER
14+
import six
1315

1416

1517
def safe_format(format_string, *args, **kwargs):
@@ -28,7 +30,7 @@ def safe_format(format_string, *args, **kwargs):
2830
except (UnicodeEncodeError, UnicodeDecodeError):
2931
format_string = to_utf8(format_string)
3032
args = [to_utf8(p) for p in args]
31-
kwargs = {k: to_utf8(v) for k, v in kwargs.iteritems()}
33+
kwargs = {k: to_utf8(v) for k, v in six.iteritems(kwargs)}
3234
return format_string.format(*args, **kwargs)
3335

3436
# ignore other errors
@@ -47,7 +49,7 @@ def to_unicode(str_or_unicode, precise=False):
4749
"""
4850
encoding = quick_detect_encoding(str_or_unicode) if precise else 'utf-8'
4951
if isinstance(str_or_unicode, str):
50-
return unicode(str_or_unicode, encoding, 'replace')
52+
return six.text_type(str_or_unicode, encoding, 'replace')
5153
return str_or_unicode
5254

5355

@@ -61,7 +63,7 @@ def detect_encoding(string):
6163
detected = chardet.detect(string)
6264
if detected:
6365
return detected.get('encoding') or 'utf-8'
64-
except Exception, e:
66+
except Exception as e:
6567
pass
6668
return 'utf-8'
6769

@@ -76,7 +78,7 @@ def quick_detect_encoding(string):
7678
detected = cchardet.detect(string)
7779
if detected:
7880
return detected.get('encoding') or detect_encoding(string)
79-
except Exception, e:
81+
except Exception as e:
8082
pass
8183
return detect_encoding(string)
8284

@@ -87,7 +89,7 @@ def to_utf8(str_or_unicode):
8789
>>> utils.to_utf8(u'hi')
8890
'hi'
8991
"""
90-
if isinstance(str_or_unicode, unicode):
92+
if isinstance(str_or_unicode, six.text_type):
9193
return str_or_unicode.encode("utf-8", "ignore")
9294
return str(str_or_unicode)
9395

@@ -173,7 +175,7 @@ def _rm_excessive_newlines(s):
173175
def _encode_utf8(s):
174176
"""Encode in 'utf-8' if unicode
175177
"""
176-
return s.encode('utf-8') if isinstance(s, unicode) else s
178+
return s.encode('utf-8') if isinstance(s, six.text_type) else s
177179

178180

179181
_UTF8_DECLARATION = ('<meta http-equiv="Content-Type" content="text/html;'

tests/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from __future__ import absolute_import
12
from nose.tools import *
23
from mock import *
34

tests/html_quotations_test.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3+
from __future__ import absolute_import
34
from . import *
45
from . fixtures import *
56

tests/quotations_test.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3+
from __future__ import absolute_import
34
from . import *
45
from . fixtures import *
56

tests/signature/bruteforce_test.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3+
from __future__ import absolute_import
34
from .. import *
45

56
from talon.signature import bruteforce

tests/signature/extraction_test.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3+
from __future__ import absolute_import
34
from .. import *
45

56
import os
@@ -8,6 +9,7 @@
89
from talon import signature
910
from talon.signature import extraction as e
1011
from talon.signature import bruteforce
12+
from six.moves import range
1113

1214

1315
def test_message_shorter_SIGNATURE_MAX_LINES():
@@ -127,20 +129,20 @@ def test_mark_lines():
127129

128130
def test_process_marked_lines():
129131
# no signature found
130-
eq_((range(5), None), e._process_marked_lines(range(5), 'telt'))
132+
eq_((list(range(5)), None), e._process_marked_lines(list(range(5)), 'telt'))
131133

132134
# signature in the middle of the text
133-
eq_((range(9), None), e._process_marked_lines(range(9), 'tesestelt'))
135+
eq_((list(range(9)), None), e._process_marked_lines(list(range(9)), 'tesestelt'))
134136

135137
# long line splits signature
136-
eq_((range(7), [7, 8]),
137-
e._process_marked_lines(range(9), 'tsslsless'))
138+
eq_((list(range(7)), [7, 8]),
139+
e._process_marked_lines(list(range(9)), 'tsslsless'))
138140

139-
eq_((range(20), [20]),
140-
e._process_marked_lines(range(21), 'ttttttstttesllelelets'))
141+
eq_((list(range(20)), [20]),
142+
e._process_marked_lines(list(range(21)), 'ttttttstttesllelelets'))
141143

142144
# some signature lines could be identified as text
143-
eq_(([0], range(1, 9)), e._process_marked_lines(range(9), 'tsetetest'))
145+
eq_(([0], list(range(1, 9))), e._process_marked_lines(list(range(9)), 'tsetetest'))
144146

145-
eq_(([], range(5)),
146-
e._process_marked_lines(range(5), "ststt"))
147+
eq_(([], list(range(5))),
148+
e._process_marked_lines(list(range(5)), "ststt"))

tests/signature/learning/dataset_test.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3+
from __future__ import absolute_import
34
from ... import *
45
import os
56

tests/signature/learning/featurespace_test.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3+
from __future__ import absolute_import
34
from ... import *
45

56
from talon.signature.learning import featurespace as fs

tests/signature/learning/helpers_test.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
# -*- coding: utf-8 -*-
22

3+
from __future__ import absolute_import
34
from ... import *
45

56
import regex as re
67

78
from talon.signature.learning import helpers as h
89
from talon.signature.learning.helpers import *
10+
from six.moves import range
911

1012
# First testing regex constants.
1113
VALID = '''
@@ -154,7 +156,7 @@ def test_extract_names():
154156
# check that extracted names could be compiled
155157
try:
156158
re.compile("|".join(extracted_names))
157-
except Exception, e:
159+
except Exception as e:
158160
ok_(False, ("Failed to compile extracted names {}"
159161
"\n\nReason: {}").format(extracted_names, e))
160162
if expected_names:
@@ -204,7 +206,7 @@ def test_has_signature():
204206
205207
assert_false(h.has_signature('http://www.example.com/555-555-5555',
206208
207-
long_line = ''.join(['q' for e in xrange(28)])
209+
long_line = ''.join(['q' for e in range(28)])
208210
assert_false(h.has_signature(long_line + ' sender', '[email protected]'))
209211
# wont crash on an empty string
210212
assert_false(h.has_signature('', ''))

tests/text_quotations_test.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
# -*- coding: utf-8 -*-
22

3+
from __future__ import absolute_import
34
from . import *
45
from . fixtures import *
56

67
import os
78

89
import email.iterators
910
from talon import quotations
11+
import six
12+
from six.moves import range
1013

1114

1215
@patch.object(quotations, 'MAX_LINES_COUNT', 1)
@@ -138,7 +141,7 @@ def _check_pattern_original_message(original_message_indicator):
138141
-----{}-----
139142
140143
Test"""
141-
eq_('Test reply', quotations.extract_from_plain(msg_body.format(unicode(original_message_indicator))))
144+
eq_('Test reply', quotations.extract_from_plain(msg_body.format(six.text_type(original_message_indicator))))
142145

143146
def test_english_original_message():
144147
_check_pattern_original_message('Original Message')
@@ -669,7 +672,7 @@ def test_standard_replies():
669672
continue
670673
with open(filename) as f:
671674
message = email.message_from_file(f)
672-
body = email.iterators.typed_subpart_iterator(message, subtype='plain').next()
675+
body = next(email.iterators.typed_subpart_iterator(message, subtype='plain'))
673676
text = ''.join(email.iterators.body_line_iterator(body, True))
674677

675678
stripped_text = quotations.extract_from_plain(text)

tests/utils_test.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# coding:utf-8
22

3+
from __future__ import absolute_import
34
from . import *
45

56
from talon import utils as u
67
import cchardet
8+
import six
79

810

911
def test_get_delimiter():
@@ -14,10 +16,10 @@ def test_get_delimiter():
1416

1517
def test_unicode():
1618
eq_ (u'hi', u.to_unicode('hi'))
17-
eq_ (type(u.to_unicode('hi')), unicode )
18-
eq_ (type(u.to_unicode(u'hi')), unicode )
19-
eq_ (type(u.to_unicode('привет')), unicode )
20-
eq_ (type(u.to_unicode(u'привет')), unicode )
19+
eq_ (type(u.to_unicode('hi')), six.text_type )
20+
eq_ (type(u.to_unicode(u'hi')), six.text_type )
21+
eq_ (type(u.to_unicode('привет')), six.text_type )
22+
eq_ (type(u.to_unicode(u'привет')), six.text_type )
2123
eq_ (u"привет", u.to_unicode('привет'))
2224
eq_ (u"привет", u.to_unicode(u'привет'))
2325
# some latin1 stuff

train.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from __future__ import absolute_import
12
from talon.signature import EXTRACTOR_FILENAME, EXTRACTOR_DATA
23
from talon.signature.learning.classifier import train, init
34

0 commit comments

Comments
 (0)