Ver Fonte

make leaf_strategy return tokens instead of text

Nikos Atlas há 2 anos atrás
pai
commit
b8878b43d9

+ 14 - 2
word_processor/generators/synonym.py

@@ -2,6 +2,8 @@ import nltk
 from nltk.corpus import wordnet
 import os
 
+from nltk.corpus.reader import WordNetError
+
 print("Loading nltk", os.path.dirname(__file__) + '/../../nltk_data/')
 nltk.data.path.append(os.path.dirname(__file__) + '/../../nltk_data/')
 
@@ -9,7 +11,12 @@ nltk.data.path.append(os.path.dirname(__file__) + '/../../nltk_data/')
 def get_synonyms(word):
     synonyms = []
 
-    for syn in wordnet.synsets(word):
+    try:
+        synsets_words = wordnet.synsets(word)
+    except WordNetError:
+        return []
+
+    for syn in synsets_words:
         for lemma in syn.lemmas():
             synonyms.append(lemma.name())
 
@@ -19,7 +26,12 @@ def get_synonyms(word):
 def get_antonyms(word):
     antonyms = []
 
-    for syn in wordnet.synsets(word):
+    try:
+        synsets_words = wordnet.synsets(word)
+    except WordNetError:
+        return []
+
+    for syn in synsets_words:
         for lemma in syn.lemmas():
             if lemma.antonyms():
                 for antonym in lemma.antonyms():

+ 1 - 1
word_processor/parsers/javascript.py

@@ -7,7 +7,7 @@ class JavascriptParser(Parser):
         super().__init__(**kwargs)
 
     def parse_word(self, word: str):
-        return word.title()
+        return word.text.title()
 
     def parse_words(self, words: [str]):
         parsed_words = super().parse_words(words)

+ 9 - 5
word_processor/parsers/parser.py

@@ -1,17 +1,21 @@
+from spacy.tokens import Token
+
+
 class Parser:
     suggestions = None
+    parsed_suggestions = None
 
     def __init__(self, suggestions=None):
         self.suggestions = suggestions
 
-    def parse_word(self, word: str):
-        return word
+    def parse_word(self, word: Token):
+        return word.text
 
-    def parse_words(self, words: [str]):
+    def parse_words(self, words: [Token]):
         return [self.parse_word(word) for word in words]
 
     def parse(self):
-        self.suggestions = [self.parse_words(words) for words in self.suggestions]
+        self.parsed_suggestions = [self.parse_words(words) for words in self.suggestions]
 
         return self
 
@@ -26,4 +30,4 @@ class Parser:
     def present(self):
         self.parse()
 
-        return [self.present_words(words) for words in self.suggestions]
+        return [self.present_words(words) for words in self.parsed_suggestions]

+ 16 - 0
word_processor/strategies/leaf_no_preps_strategy.py

@@ -0,0 +1,16 @@
+from .leaf_strategy import leaf_strategy
+
+
+def leaf_no_prep_strategy(doc):
+    """
+    Should return an arrays of variable names based on leaf strategy without the preps(at, for)
+
+    :param doc: spacy document
+    :return Array of strings
+    """
+
+    ## TODO dont account for `for`, `at`, `in` based on token._dep type
+    suggestions = leaf_strategy(doc)
+    return suggestions
+
+

+ 2 - 4
word_processor/strategies/leaf_strategy.py

@@ -1,5 +1,3 @@
-import itertools
-
 
 def leaf_strategy(doc):
     """
@@ -17,7 +15,7 @@ def dfs(graph, result=[], output=[]):
     flag = False
     for u in graph.children:
         flag = True
-        dfs(u, [*result, graph.text], output)
+        dfs(u, [*result, graph], output)
     if flag is False:
-        output.append([*result, graph.text])
+        output.append([*result, graph])
     return output

+ 1 - 1
word_processor/tests/snapshots/test_antonyms/test_antonyms/cat/antonyms

@@ -1 +1 @@
-["keep_down"]
+[]

+ 1 - 1
word_processor/tests/snapshots/test_synonyms/test_synonyms/cat/synonyms

@@ -1 +1 @@
-["cat", "true_cat", "guy", "cat", "hombre", "bozo", "cat", "kat", "khat", "qat", "quat", "cat", "Arabian_tea", "African_tea", "cat-o'-nine-tails", "cat", "Caterpillar", "cat", "big_cat", "cat", "computerized_tomography", "computed_tomography", "CT", "computerized_axial_tomography", "computed_axial_tomography", "CAT", "cat", "vomit", "vomit_up", "purge", "cast", "sick", "cat", "be_sick", "disgorge", "regorge", "retch", "puke", "barf", "spew", "spue", "chuck", "upchuck", "honk", "regurgitate", "throw_up"]
+[]

+ 1 - 1
word_processor/tests/snapshots/test_synonyms/test_synonyms/dog/synonyms

@@ -1 +1 @@
-["dog", "domestic_dog", "Canis_familiaris", "frump", "dog", "dog", "cad", "bounder", "blackguard", "dog", "hound", "heel", "frank", "frankfurter", "hotdog", "hot_dog", "dog", "wiener", "wienerwurst", "weenie", "pawl", "detent", "click", "dog", "andiron", "firedog", "dog", "dog-iron", "chase", "chase_after", "trail", "tail", "tag", "give_chase", "dog", "go_after", "track"]
+[]

+ 5 - 2
word_processor/tests/test_leaf_strategy.py

@@ -3,7 +3,6 @@ import json
 import pytest
 
 import spacy
-from spacy import displacy
 
 from word_processor.generators import Generator
 from word_processor.strategies import leaf_strategy
@@ -20,4 +19,8 @@ def test_leaf_strategy(snapshot, text, expected):
 
     results = generator.suggest()
 
-    snapshot.assert_match(json.dumps(results), 'leaf_strategy')
+    text_results = []
+    for suggestion in results:
+        text_results.append([token.text for token in suggestion])
+
+    snapshot.assert_match(json.dumps(text_results), 'leaf_strategy')