Skip to content

First cut/attempt at removing syllabipy dependency #269

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions abydos/distance/_aline.py
Original file line number Diff line number Diff line change
Expand Up @@ -1459,7 +1459,10 @@ def _record(score: float, out: List[Tuple[str, str]]) -> None:
):
loc_out = deepcopy(out)
loc_out.append(
(src_tok[i - 1], tar_tok[j - 2] + tar_tok[j - 1],)
(
src_tok[i - 1],
tar_tok[j - 2] + tar_tok[j - 1],
)
)
_retrieve(
i - 1,
Expand Down Expand Up @@ -1500,7 +1503,10 @@ def _record(score: float, out: List[Tuple[str, str]]) -> None:
):
loc_out = deepcopy(out)
loc_out.append(
(src_tok[i - 2] + src_tok[i - 1], tar_tok[j - 1],)
(
src_tok[i - 2] + src_tok[i - 1],
tar_tok[j - 1],
)
)
_retrieve(
i - 2,
Expand Down
6 changes: 4 additions & 2 deletions abydos/distance/_synoname.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,13 +740,15 @@ def _approx_c() -> Tuple[bool, float]:
(
initial_diff
== self._lev.dist_abs(
src_initials_str, tar_initials_str,
src_initials_str,
tar_initials_str,
)
)
or (
initial_diff
== self._lev.dist_abs(
src_initials_str, tar_initials_str,
src_initials_str,
tar_initials_str,
)
)
):
Expand Down
13 changes: 9 additions & 4 deletions abydos/phonetic/_henry_early.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,15 @@ def encode(self, word: str) -> str:
# IId
elif char == 'H' and prev in self._uc_c_set:
continue
elif char in self._uc_c_set - {
'L',
'R',
} and nxch in self._uc_c_set - {'L', 'R'}:
elif (
char
in self._uc_c_set
- {
'L',
'R',
}
and nxch in self._uc_c_set - {'L', 'R'}
):
continue
elif char == 'L' and nxch in {'M', 'N'}:
continue
Expand Down
9 changes: 6 additions & 3 deletions abydos/stemmer/_paice_husk.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,12 @@ def stem(self, word: str) -> str:
if word[-n:] in self._rule_table[n]:
accept = False
for rule in self._rule_table[n][word[-n:]]:
(word, accept, intact, terminate,) = self._apply_rule(
word, rule, intact, terminate
)
(
word,
accept,
intact,
terminate,
) = self._apply_rule(word, rule, intact, terminate)
if accept:
break

Expand Down
3 changes: 2 additions & 1 deletion abydos/tokenizer/_character.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class CharacterTokenizer(_Tokenizer):
"""

def __init__(
self, scaler: Optional[Union[str, Callable[[float], float]]] = None,
self,
scaler: Optional[Union[str, Callable[[float], float]]] = None,
) -> None:
"""Initialize tokenizer.

Expand Down
22 changes: 12 additions & 10 deletions abydos/tokenizer/_legalipy.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
from ._tokenizer import _Tokenizer

try:
from syllabipy.legalipy import LegaliPy
from syllabipy.legalipy import getOnsets as gen_onsets # noqa: N813
from nltk.tokenize import LegalitySyllableTokenizer
from nltk.tokenize.legality_principle import find_legal_onsets
except ImportError: # pragma: no cover
# If the system lacks the SyllabiPy library, that's fine, but SyllabiPy
# tokenization won't be supported.
LegaliPy = None # type: ignore
gen_onsets = None # type: ignore
LegalitySyllableTokenizer = None # type: ignore
find_legal_onsets = None # type: ignore


class LegaliPyTokenizer(_Tokenizer):
Expand All @@ -40,7 +40,8 @@ class LegaliPyTokenizer(_Tokenizer):
"""

def __init__(
self, scaler: Optional[Union[str, Callable[[float], float]]] = None,
self,
scaler: Optional[Union[str, Callable[[float], float]]] = None,
) -> None:
"""Initialize Tokenizer.

Expand All @@ -65,12 +66,11 @@ def __init__(
.. versionadded:: 0.4.0

"""
if LegaliPy is None:
if LegalitySyllableTokenizer is None:
raise TypeError( # pragma: no cover
'LegaliPy tokenizer requires installation of SyllabiPy'
+ ' package.'
)

super(LegaliPyTokenizer, self).__init__(scaler)

self._onsets = ['']
Expand Down Expand Up @@ -99,7 +99,7 @@ def train_onsets(
.. versionadded:: 0.4.0

"""
new_onsets = gen_onsets(text, threshold, clean)
new_onsets = self.tokenizer.legal_onsets
if append:
self._onsets = list(set(self._onsets + new_onsets))
else:
Expand Down Expand Up @@ -132,8 +132,10 @@ def tokenize(self, string: str, ipa: bool = False) -> 'LegaliPyTokenizer':
self._string = string

self._ordered_tokens = []
for word in string.split():
self._ordered_tokens += LegaliPy(word, self._onsets)
words = string.split()
self.tokenizer = LegalitySyllableTokenizer(words)
for word in words:
self._ordered_tokens += self.tokenizer.tokenize(word, self._onsets)
if not self._ordered_tokens:
self._ordered_tokens = [self._string]

Expand Down
3 changes: 2 additions & 1 deletion abydos/tokenizer/_saps.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class SAPSTokenizer(_Tokenizer):
"""

def __init__(
self, scaler: Optional[Union[str, Callable[[float], float]]] = None,
self,
scaler: Optional[Union[str, Callable[[float], float]]] = None,
) -> None:
"""Initialize Tokenizer.

Expand Down
3 changes: 2 additions & 1 deletion abydos/tokenizer/_sonoripy.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ class SonoriPyTokenizer(_Tokenizer):
"""

def __init__(
self, scaler: Optional[Union[str, Callable[[float], float]]] = None,
self,
scaler: Optional[Union[str, Callable[[float], float]]] = None,
) -> None:
"""Initialize Tokenizer.

Expand Down
3 changes: 2 additions & 1 deletion tests/distance/test_distance_jaccard.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ def test_jaccard_tanimoto_coeff(self):
self.cmp_q2.tanimoto_coeff('', 'neilsen'), float('-inf')
)
self.assertAlmostEqual(
self.cmp_q2.tanimoto_coeff('nelson', 'neilsen'), log2(4 / 11),
self.cmp_q2.tanimoto_coeff('nelson', 'neilsen'),
log2(4 / 11),
)

# supplied q-gram tests
Expand Down
3 changes: 2 additions & 1 deletion tests/phonetic/test_phonetic_daitch_mokotoff.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ def test_daitch_mokotoff(self):
'68',
)
self.assertEqual(
DaitchMokotoff(max_length=0, zero_pad=False).encode('Niall'), '68',
DaitchMokotoff(max_length=0, zero_pad=False).encode('Niall'),
'68',
)
self.assertEqual(
DaitchMokotoff(max_length=0, zero_pad=True).encode('Niall'),
Expand Down
1 change: 0 additions & 1 deletion tests/regression
Submodule regression deleted from 94fd92
32 changes: 16 additions & 16 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tox]
envlist =
black
py37
py38
doctest
regression
fuzz
Expand All @@ -28,7 +28,7 @@ deps =
commands = nosetests []

[testenv:doctest]
basepython = python3.7
basepython = python3.8
setenv =
NOSE_WITH_COVERAGE=0
NOSE_WITH_DOCTEST=1
Expand All @@ -44,7 +44,7 @@ passenv = {[testenv:doctest]passenv}
commands = {[testenv:doctest]commands}

[testenv:regression]
basepython = python3.7
basepython = python3.8
commands = nosetests {toxinidir}/tests/regression --processes=-1 \
--process-timeout=1200 --process-restartworker -c=0 -v []

Expand All @@ -55,7 +55,7 @@ deps =
commands = {[testenv:regression]commands}

[testenv:fuzz]
basepython = python3.7
basepython = python3.8
commands = nosetests {toxinidir}/tests/fuzz --processes=-1 \
--process-timeout=1200 --process-restartworker -c=0 -v []

Expand All @@ -68,32 +68,32 @@ commands = {[testenv:fuzz]commands}
[testenv:black]
depends =
changedir = {toxinidir}
basepython = python3.7
basepython = python3.8
skip_install = true
deps = black
commands = black .

[testenv:pylint]
basepython = python3.7
basepython = python3.8
skip_install = true
deps = pylint
commands = {toxinidir}/helpers/call_and_write_log.py \
"pylint --rcfile=setup.cfg abydos" 0

[testenv:pydocstyle]
basepython = python3.7
basepython = python3.8
skip_install = true
deps = pydocstyle
commands = {toxinidir}/helpers/call_and_write_log.py "pydocstyle --count ." 0

[testenv:pycodestyle]
basepython = python3.7
basepython = python3.8
skip_install = true
deps = pycodestyle
commands = {toxinidir}/helpers/call_and_write_log.py "pycodestyle ." 0

[testenv:flake8]
basepython = python3.7
basepython = python3.8
skip_install = true
deps =
flake8
Expand Down Expand Up @@ -134,7 +134,7 @@ commands = {toxinidir}/helpers/call_and_write_log.py \
"flake8 {toxinidir} --htmldir={toxinidir}/flake8" 0

[testenv:mypy]
basepython = python3.7
basepython = python3.8
deps =
mypy
lxml
Expand All @@ -145,14 +145,14 @@ mypy_paths =
commands = mypy {posargs:{[testenv:mypy]mypy_paths}} --txt-report mypy --html-report mypy

[testenv:doc8]
basepython = python3.7
basepython = python3.8
skip_install = true
deps = doc8
commands = {toxinidir}/helpers/call_and_write_log.py "doc8 {toxinidir}" 0

[testenv:docs]
changedir = docs
basepython = python3.7
basepython = python3.8
whitelist_externals=make
deps =
sphinx
Expand All @@ -176,7 +176,7 @@ commands = {toxinidir}/helpers/call_and_write_log.py \
"sloccount --datadir .tox/sloccount/tmp ./abydos" 0

[testenv:badges]
basepython = python3.7
basepython = python3.8
depends =
pylint
pydocstyle
Expand All @@ -188,7 +188,7 @@ deps =
commands = python {toxinidir}/badge_update.py

[testenv:build]
basepython = python3.7
basepython = python3.8
skip_install = true
deps =
wheel
Expand All @@ -199,7 +199,7 @@ commands =
twine check dist/*

[testenv:release]
basepython = python3.7
basepython = python3.8
skip_install = true
deps =
{[testenv:build]deps}
Expand All @@ -208,7 +208,7 @@ commands =
twine upload --skip-existing --repository pypi dist/*

[testenv:release-test]
basepython = python3.7
basepython = python3.8
skip_install = true
deps =
{[testenv:build]deps}
Expand Down