diff --git a/abydos/distance/_aline.py b/abydos/distance/_aline.py index aa57a7460..a876f6e77 100644 --- a/abydos/distance/_aline.py +++ b/abydos/distance/_aline.py @@ -1459,7 +1459,10 @@ def _record(score: float, out: List[Tuple[str, str]]) -> None: ): loc_out = deepcopy(out) loc_out.append( - (src_tok[i - 1], tar_tok[j - 2] + tar_tok[j - 1],) + ( + src_tok[i - 1], + tar_tok[j - 2] + tar_tok[j - 1], + ) ) _retrieve( i - 1, @@ -1500,7 +1503,10 @@ def _record(score: float, out: List[Tuple[str, str]]) -> None: ): loc_out = deepcopy(out) loc_out.append( - (src_tok[i - 2] + src_tok[i - 1], tar_tok[j - 1],) + ( + src_tok[i - 2] + src_tok[i - 1], + tar_tok[j - 1], + ) ) _retrieve( i - 2, diff --git a/abydos/distance/_synoname.py b/abydos/distance/_synoname.py index 64a2bc5f2..fccd32502 100644 --- a/abydos/distance/_synoname.py +++ b/abydos/distance/_synoname.py @@ -740,13 +740,15 @@ def _approx_c() -> Tuple[bool, float]: ( initial_diff == self._lev.dist_abs( - src_initials_str, tar_initials_str, + src_initials_str, + tar_initials_str, ) ) or ( initial_diff == self._lev.dist_abs( - src_initials_str, tar_initials_str, + src_initials_str, + tar_initials_str, ) ) ): diff --git a/abydos/phonetic/_henry_early.py b/abydos/phonetic/_henry_early.py index 3de17a947..97d7a6f09 100644 --- a/abydos/phonetic/_henry_early.py +++ b/abydos/phonetic/_henry_early.py @@ -197,10 +197,15 @@ def encode(self, word: str) -> str: # IId elif char == 'H' and prev in self._uc_c_set: continue - elif char in self._uc_c_set - { - 'L', - 'R', - } and nxch in self._uc_c_set - {'L', 'R'}: + elif ( + char + in self._uc_c_set + - { + 'L', + 'R', + } + and nxch in self._uc_c_set - {'L', 'R'} + ): continue elif char == 'L' and nxch in {'M', 'N'}: continue diff --git a/abydos/stemmer/_paice_husk.py b/abydos/stemmer/_paice_husk.py index a35797d86..6ea5ae857 100644 --- a/abydos/stemmer/_paice_husk.py +++ b/abydos/stemmer/_paice_husk.py @@ -240,9 +240,12 @@ def stem(self, word: str) -> str: if word[-n:] in self._rule_table[n]: accept = False for rule in self._rule_table[n][word[-n:]]: - (word, accept, intact, terminate,) = self._apply_rule( - word, rule, intact, terminate - ) + ( + word, + accept, + intact, + terminate, + ) = self._apply_rule(word, rule, intact, terminate) if accept: break diff --git a/abydos/tokenizer/_character.py b/abydos/tokenizer/_character.py index 6c2f3285c..1718798b0 100644 --- a/abydos/tokenizer/_character.py +++ b/abydos/tokenizer/_character.py @@ -33,7 +33,8 @@ class CharacterTokenizer(_Tokenizer): """ def __init__( - self, scaler: Optional[Union[str, Callable[[float], float]]] = None, + self, + scaler: Optional[Union[str, Callable[[float], float]]] = None, ) -> None: """Initialize tokenizer. diff --git a/abydos/tokenizer/_legalipy.py b/abydos/tokenizer/_legalipy.py index 5bbffe7da..11b759aaf 100644 --- a/abydos/tokenizer/_legalipy.py +++ b/abydos/tokenizer/_legalipy.py @@ -24,13 +24,13 @@ from ._tokenizer import _Tokenizer try: - from syllabipy.legalipy import LegaliPy - from syllabipy.legalipy import getOnsets as gen_onsets # noqa: N813 + from nltk.tokenize import LegalitySyllableTokenizer + from nltk.tokenize.legality_principle import find_legal_onsets except ImportError: # pragma: no cover # If the system lacks the SyllabiPy library, that's fine, but SyllabiPy # tokenization won't be supported. - LegaliPy = None # type: ignore - gen_onsets = None # type: ignore + LegalitySyllableTokenizer = None # type: ignore + find_legal_onsets = None # type: ignore class LegaliPyTokenizer(_Tokenizer): @@ -40,7 +40,8 @@ class LegaliPyTokenizer(_Tokenizer): """ def __init__( - self, scaler: Optional[Union[str, Callable[[float], float]]] = None, + self, + scaler: Optional[Union[str, Callable[[float], float]]] = None, ) -> None: """Initialize Tokenizer. @@ -65,12 +66,11 @@ def __init__( .. versionadded:: 0.4.0 """ - if LegaliPy is None: + if LegalitySyllableTokenizer is None: raise TypeError( # pragma: no cover 'LegaliPy tokenizer requires installation of SyllabiPy' + ' package.' ) - super(LegaliPyTokenizer, self).__init__(scaler) self._onsets = [''] @@ -99,7 +99,7 @@ def train_onsets( .. versionadded:: 0.4.0 """ - new_onsets = gen_onsets(text, threshold, clean) + new_onsets = self.tokenizer.legal_onsets if append: self._onsets = list(set(self._onsets + new_onsets)) else: @@ -132,8 +132,10 @@ def tokenize(self, string: str, ipa: bool = False) -> 'LegaliPyTokenizer': self._string = string self._ordered_tokens = [] - for word in string.split(): - self._ordered_tokens += LegaliPy(word, self._onsets) + words = string.split() + self.tokenizer = LegalitySyllableTokenizer(words) + for word in words: + self._ordered_tokens += self.tokenizer.tokenize(word, self._onsets) if not self._ordered_tokens: self._ordered_tokens = [self._string] diff --git a/abydos/tokenizer/_saps.py b/abydos/tokenizer/_saps.py index cf0c1a2d4..05fb79817 100644 --- a/abydos/tokenizer/_saps.py +++ b/abydos/tokenizer/_saps.py @@ -33,7 +33,8 @@ class SAPSTokenizer(_Tokenizer): """ def __init__( - self, scaler: Optional[Union[str, Callable[[float], float]]] = None, + self, + scaler: Optional[Union[str, Callable[[float], float]]] = None, ) -> None: """Initialize Tokenizer. diff --git a/abydos/tokenizer/_sonoripy.py b/abydos/tokenizer/_sonoripy.py index 694668c95..f6689b9c1 100644 --- a/abydos/tokenizer/_sonoripy.py +++ b/abydos/tokenizer/_sonoripy.py @@ -38,7 +38,8 @@ class SonoriPyTokenizer(_Tokenizer): """ def __init__( - self, scaler: Optional[Union[str, Callable[[float], float]]] = None, + self, + scaler: Optional[Union[str, Callable[[float], float]]] = None, ) -> None: """Initialize Tokenizer. diff --git a/tests/distance/test_distance_jaccard.py b/tests/distance/test_distance_jaccard.py index 8a40b4cc7..9f02219de 100644 --- a/tests/distance/test_distance_jaccard.py +++ b/tests/distance/test_distance_jaccard.py @@ -164,7 +164,8 @@ def test_jaccard_tanimoto_coeff(self): self.cmp_q2.tanimoto_coeff('', 'neilsen'), float('-inf') ) self.assertAlmostEqual( - self.cmp_q2.tanimoto_coeff('nelson', 'neilsen'), log2(4 / 11), + self.cmp_q2.tanimoto_coeff('nelson', 'neilsen'), + log2(4 / 11), ) # supplied q-gram tests diff --git a/tests/phonetic/test_phonetic_daitch_mokotoff.py b/tests/phonetic/test_phonetic_daitch_mokotoff.py index 0d1f0a313..77ea19766 100644 --- a/tests/phonetic/test_phonetic_daitch_mokotoff.py +++ b/tests/phonetic/test_phonetic_daitch_mokotoff.py @@ -103,7 +103,8 @@ def test_daitch_mokotoff(self): '68', ) self.assertEqual( - DaitchMokotoff(max_length=0, zero_pad=False).encode('Niall'), '68', + DaitchMokotoff(max_length=0, zero_pad=False).encode('Niall'), + '68', ) self.assertEqual( DaitchMokotoff(max_length=0, zero_pad=True).encode('Niall'), diff --git a/tests/regression b/tests/regression deleted file mode 160000 index 94fd925e2..000000000 --- a/tests/regression +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 94fd925e22c6bfde0ee7e26df4c2e301e956a148 diff --git a/tox.ini b/tox.ini index 24fba9db0..2f9782b3a 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,7 @@ [tox] envlist = black - py37 + py38 doctest regression fuzz @@ -28,7 +28,7 @@ deps = commands = nosetests [] [testenv:doctest] -basepython = python3.7 +basepython = python3.8 setenv = NOSE_WITH_COVERAGE=0 NOSE_WITH_DOCTEST=1 @@ -44,7 +44,7 @@ passenv = {[testenv:doctest]passenv} commands = {[testenv:doctest]commands} [testenv:regression] -basepython = python3.7 +basepython = python3.8 commands = nosetests {toxinidir}/tests/regression --processes=-1 \ --process-timeout=1200 --process-restartworker -c=0 -v [] @@ -55,7 +55,7 @@ deps = commands = {[testenv:regression]commands} [testenv:fuzz] -basepython = python3.7 +basepython = python3.8 commands = nosetests {toxinidir}/tests/fuzz --processes=-1 \ --process-timeout=1200 --process-restartworker -c=0 -v [] @@ -68,32 +68,32 @@ commands = {[testenv:fuzz]commands} [testenv:black] depends = changedir = {toxinidir} -basepython = python3.7 +basepython = python3.8 skip_install = true deps = black commands = black . [testenv:pylint] -basepython = python3.7 +basepython = python3.8 skip_install = true deps = pylint commands = {toxinidir}/helpers/call_and_write_log.py \ "pylint --rcfile=setup.cfg abydos" 0 [testenv:pydocstyle] -basepython = python3.7 +basepython = python3.8 skip_install = true deps = pydocstyle commands = {toxinidir}/helpers/call_and_write_log.py "pydocstyle --count ." 0 [testenv:pycodestyle] -basepython = python3.7 +basepython = python3.8 skip_install = true deps = pycodestyle commands = {toxinidir}/helpers/call_and_write_log.py "pycodestyle ." 0 [testenv:flake8] -basepython = python3.7 +basepython = python3.8 skip_install = true deps = flake8 @@ -134,7 +134,7 @@ commands = {toxinidir}/helpers/call_and_write_log.py \ "flake8 {toxinidir} --htmldir={toxinidir}/flake8" 0 [testenv:mypy] -basepython = python3.7 +basepython = python3.8 deps = mypy lxml @@ -145,14 +145,14 @@ mypy_paths = commands = mypy {posargs:{[testenv:mypy]mypy_paths}} --txt-report mypy --html-report mypy [testenv:doc8] -basepython = python3.7 +basepython = python3.8 skip_install = true deps = doc8 commands = {toxinidir}/helpers/call_and_write_log.py "doc8 {toxinidir}" 0 [testenv:docs] changedir = docs -basepython = python3.7 +basepython = python3.8 whitelist_externals=make deps = sphinx @@ -176,7 +176,7 @@ commands = {toxinidir}/helpers/call_and_write_log.py \ "sloccount --datadir .tox/sloccount/tmp ./abydos" 0 [testenv:badges] -basepython = python3.7 +basepython = python3.8 depends = pylint pydocstyle @@ -188,7 +188,7 @@ deps = commands = python {toxinidir}/badge_update.py [testenv:build] -basepython = python3.7 +basepython = python3.8 skip_install = true deps = wheel @@ -199,7 +199,7 @@ commands = twine check dist/* [testenv:release] -basepython = python3.7 +basepython = python3.8 skip_install = true deps = {[testenv:build]deps} @@ -208,7 +208,7 @@ commands = twine upload --skip-existing --repository pypi dist/* [testenv:release-test] -basepython = python3.7 +basepython = python3.8 skip_install = true deps = {[testenv:build]deps}