Skip to content

Commit

Permalink
End of day
Browse files Browse the repository at this point in the history
  • Loading branch information
apmoore1 committed Oct 6, 2021
1 parent 3fb79ae commit 61dd1ec
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 15 deletions.
8 changes: 4 additions & 4 deletions pymusas/lexicon_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def add_lexicon_entry(self, value: LexiconEntry,

@staticmethod
def from_tsv(tsv_file_path: Union[PathLike, str], include_pos: bool = True
) -> "LexiconCollection":
) -> Dict[str, List[str]]:
'''
If `include_pos` is True and the TSV file does not contain a
`pos` field heading then this will return a LexiconCollection that is
Expand All @@ -130,8 +130,8 @@ def from_tsv(tsv_file_path: Union[PathLike, str], include_pos: bool = True
adding the `LexiconEntry` into the returned
`LexiconCollection`. For more information on this
see the `add_lexicon_entry` method.
:returns: A `LexiconCollection` that has been created from the data
within the TSV file.
:returns: A dictionary object that can be used to create a
`LexiconCollection`
:raises: ValueError if the minimum field headings, lemma and
semantic_tags, do not exist in the given TSV file.
'''
Expand Down Expand Up @@ -175,4 +175,4 @@ def from_tsv(tsv_file_path: Union[PathLike, str], include_pos: bool = True
collection_from_tsv.add_lexicon_entry(LexiconEntry(**row_data),
include_pos=include_pos)

return collection_from_tsv
return collection_from_tsv.data
17 changes: 17 additions & 0 deletions pymusas/spacy_lexicon_collection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from os import PathLike
from typing import Optional, Dict, List, Union

import spacy

from .lexicon_collection import LexiconCollection



@spacy.util.registry.misc('lexicon_collection')
def lexicon_collection(data: Optional[Dict[str, List[str]]] = None,
tsv_file_path: Optional[Union[PathLike, str]] = None
) -> LexiconCollection:
if data is not None:
return LexiconCollection(data)
else:
return LexiconCollection.from_tsv(tsv_file_path)
17 changes: 9 additions & 8 deletions pymusas/spacy_tagger.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, List, Iterable, Callable
from typing import Optional, List, Iterable, Callable, Dict

from spacy.training import Example
from spacy.language import Language
Expand All @@ -13,6 +13,8 @@ def __init__(self, nlp: Language, lexicon_lookup: Optional[LexiconCollection] =
lexicon_lemma_lookup: Optional[LexiconCollection] = None
) -> None:
print(nlp.pipe_names)
self.lexicon_lookup = lexicon_lookup
self.lexicon_lemma_lookup = lexicon_lemma_lookup
if lexicon_lookup is None:
self.lexicon_lookup: LexiconCollection = LexiconCollection()
if lexicon_lemma_lookup is None:
Expand All @@ -27,21 +29,21 @@ def tag_token(text: str, lemma: str, pos: str,
if pos == 'punc':
return ["PUNCT"]

text_pos = f"{text}_{pos}"
text_pos = f"{text}|{pos}"
if text_pos in lexicon_lookup:
return lexicon_lookup[text_pos]

lemma_pos = f"{lemma}_{pos}"
lemma_pos = f"{lemma}|{pos}"
if lemma_pos in lexicon_lookup:
return lexicon_lookup[lemma_pos]

text_lower = text.lower()
text_pos_lower = f"{text_lower}_{pos}"
text_pos_lower = f"{text_lower}|{pos}"
if text_pos_lower in lexicon_lookup:
return lexicon_lookup[text_pos_lower]

lemma_lower = lemma.lower()
lemma_pos_lower = f"{lemma_lower}_{pos}"
lemma_pos_lower = f"{lemma_lower}|{pos}"
if lemma_pos_lower in lexicon_lookup:
return lexicon_lookup[lemma_pos_lower]

Expand Down Expand Up @@ -102,6 +104,5 @@ def initialize(self, get_examples: Optional[Callable[[], Iterable[Example]]] = N

@Language.factory("usas_tagger")
def create_spacy_rule_based_tagger_component(nlp: Language, name: str,
lexicon_lookup: Optional[LexiconCollection] = None,
lexicon_lemma_lookup: Optional[LexiconCollection] = None):
return SpacyRuleBasedTagger(nlp, lexicon_lookup, lexicon_lemma_lookup)
lexicon_lookup: Optional[Dict[str, List[str]]] = None):
return SpacyRuleBasedTagger(nlp, lexicon_lookup, None)
17 changes: 14 additions & 3 deletions test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
import spacy

from pymusas.lexicon_collection import LexiconCollection, LexiconEntry
from pymusas.spacy_tagger import create_spacy_rule_based_tagger_component
from pymusas.spacy_lexicon_collection import lexicon_collection
'''
from __future__ import annotations
from spacy.language import Language
from spacy.tokens import Doc
Expand All @@ -14,7 +20,12 @@ def create_spacy_rule_based_tagger_component(nlp: Language, name: str):
class SpacyRuleBasedTagger:
def __init__(self, nlp: Language):
pass

'''
# Add the component to the pipeline and configure it
nlp = spacy.blank("en")
nlp.add_pipe("ucrel")
a_collection = lexicon_collection(data={'London|PROPN': ['Z2']}).data
print(a_collection)
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("usas_tagger", last=True, config={'lexicon_lookup':a_collection})
doc = nlp('London is great today')
for token in doc:
print(f'{token.text} {token._.usas_tags}')

0 comments on commit 61dd1ec

Please sign in to comment.