Quellcode durchsuchen

adds adverbial, compound and prepend strategies

Nikos Atlas vor 2 Jahren
Ursprung
Commit
b58b81acbc

+ 2 - 2
word_processor/playground.py

@@ -10,5 +10,5 @@ def test_something(text):
     displacy.serve(doc, style="dep", port=5001)
 
 
-# test_something('Regex for redacted phone numbers with extra info for Baan-kaidee (TO BE REMOVED).')
-test_something('makes for mobile phones a diagonal number')
+test_something('Regex for redacted phone numbers with extra info for Baan-kaidee (TO BE REMOVED).')
+#test_something('Stick element after scroll has passed the element')

+ 6 - 2
word_processor/strategies/__init__.py

@@ -1,9 +1,13 @@
 from .leaf_strategy import leaf_strategy
 from .leaf_no_preps_strategy import leaf_no_prep_strategy
 from .adverbial_strategy import adverbial_strategy
+from .compound_strategy import compound_strategy
+from .flip_prep_pobj_strategy import flip_prep_pobj_strategy
 
 __all__ = [
     'leaf_strategy',
-    'adverbial_strategy',
     'leaf_no_prep_strategy',
-]
+    'adverbial_strategy',
+    'compound_strategy',
+    'flip_prep_pobj_strategy',
+]

+ 37 - 10
word_processor/strategies/adverbial_strategy.py

@@ -6,26 +6,53 @@ from word_processor.types import DEP_TYPES
 
 def adverbial_strategy(doc) -> [[Token]]:
     """
-    Should return an arrays of variable names based on leaf strategy
+    Should return an arrays of variable names based on adverbial strategy
+    When a verb appears, if it has children in both directions, then remove the verb and
+    add the a left child and a continue to the right child. Does so for all combinations (left,right)
 
     :param doc: spacy document
     :return Array of strings
     """
-
+    suggestions = []
     for token in doc:
         if token.dep_ == DEP_TYPES['ROOT']:
             suggestions = process_adverbial_clauses(token)
             break
 
-    return suggestions or []
+    return suggestions
+
+
+INVALID_DEP = ['aux', 'prep']
+INVALID_POS = ['DET', 'AUX', 'ADP']
+
+
+def is_valid(node):
+    return not (node.pos_ in INVALID_POS) and not (node.dep_ in INVALID_DEP)
 
 
 # todo - this is dfs
-def process_adverbial_clauses(graph, result=[], output=[]):
-    flag = False
-    for u in graph.children:
-        flag = True
-        dfs(u, [*result, graph], output)
-    if flag is False:
-        output.append([*result, graph])
+def process_adverbial_clauses(node, result=None, output=None):
+    if output is None:
+        output = []
+    if result is None:
+        result = []
+
+    has_children = (node.n_lefts + node.n_rights) > 0
+    has_both_directions = node.n_lefts > 0 and node.n_rights > 0
+
+    if node.pos_ == 'VERB' and has_both_directions:
+        for lefty in node.lefts:
+            for righty in node.rights:
+                if is_valid(lefty):
+                    process_adverbial_clauses(righty, [*result, lefty], output)
+                # if is_valid(righty):
+                #     process_adverbial_clauses(lefty, [*result, righty], output)
+    elif has_children:
+        for u in node.children:
+            valid_results = [*result, node] if is_valid(node) else result
+            process_adverbial_clauses(u, valid_results, output)
+    else:
+        valid_results = [*result, node] if is_valid(node) else result
+        output.append(valid_results)
+
     return output

+ 75 - 0
word_processor/strategies/compound_strategy.py

@@ -0,0 +1,75 @@
+from spacy.tokens import Token
+
+from word_processor.types import DEP_TYPES
+
+
+def compound_strategy(doc) -> [[Token]]:
+    """
+    Should return an arrays of variable names based on compound strategy
+    Uses adverbial strategy and also adds compounds to nouns
+
+    e.g.
+    Reads black phone numbers
+    Will treat PhoneNumbers as a single entity.
+
+    :param doc: spacy document
+    :return Array of strings
+    """
+    suggestions = []
+    for token in doc:
+        if token.dep_ == DEP_TYPES['ROOT']:
+            suggestions = compound_dfs(token)
+            break
+
+    return suggestions
+
+
+INVALID_DEP = ['aux', 'prep']
+INVALID_POS = ['DET', 'AUX', 'ADP']
+FLIP_DEP = ['compound', 'amod']
+
+
+def is_valid(node, existing=None):
+    return (
+            not (node.pos_ in INVALID_POS)
+            and not (node.dep_ in INVALID_DEP)
+            and not (node in existing if existing else False)
+    )
+
+
+def should_flip(node):
+    return node.dep_ in FLIP_DEP
+
+
+# todo - this is dfs
+def compound_dfs(node, result=None, output=None):
+    if output is None:
+        output = []
+    if result is None:
+        result = []
+
+    has_parent = node.head != node
+    has_children = (node.n_lefts + node.n_rights) > 0
+    has_both_directions = node.n_lefts > 0 and node.n_rights > 0
+
+    if should_flip(node) and has_parent and is_valid(node, result):
+        compound_dfs(node.head, [*result[:-1], node], output)
+        # dunno if i should return here
+        return output
+
+    if node.pos_ == 'VERB' and has_both_directions:
+        for lefty in node.lefts:
+            for righty in node.rights:
+                if is_valid(lefty, result):
+                    compound_dfs(righty, [*result, lefty], output)
+                # if is_valid(righty):
+                #     compound_dfs(lefty, [*result, righty], output)
+    elif has_children:
+        for u in node.children:
+            valid_results = [*result, node] if is_valid(node, result) else result
+            compound_dfs(u, valid_results, output)
+    else:
+        valid_results = [*result, node] if is_valid(node, result) else result
+        output.append(valid_results)
+
+    return output

+ 89 - 0
word_processor/strategies/flip_prep_pobj_strategy.py

@@ -0,0 +1,89 @@
+from spacy.tokens import Token
+
+from word_processor.types import DEP_TYPES
+
+
+def flip_prep_pobj_strategy(doc) -> [[Token]]:
+    """
+    Should return an arrays of variable names based on compound strategy
+    Uses adverbial strategy and also adds compounds to nouns
+
+    e.g.
+    Reads black phone numbers
+    Will treat PhoneNumbers as a single entity.
+
+    :param doc: spacy document
+    :return Array of strings
+    """
+    suggestions = []
+    for token in doc:
+        if token.dep_ == DEP_TYPES['ROOT']:
+            suggestions = prep_dfs(token)
+            break
+
+    return suggestions
+
+
+INVALID_DEP = ['aux', 'prep']
+INVALID_POS = ['DET', 'AUX', 'ADP']
+FLIP_DEP = ['compound', 'amod']
+PREPEND_DEP = ['pobj']
+
+
+def is_valid(node, existing=None):
+    return (
+            not (node.pos_ in INVALID_POS)
+            and not (node.dep_ in INVALID_DEP)
+            and not (node in existing if existing else False)
+    )
+
+
+def should_flip(node):
+    return node.dep_ in FLIP_DEP
+
+
+def should_prepend(node):
+    return node.dep_ in PREPEND_DEP
+
+
+def prep_dfs(node, result=None, output=None, root=None):
+    if output is None:
+        output = []
+    if result is None:
+        result = []
+
+    has_parent = node.head != node and root != node
+    has_children = (node.n_lefts + node.n_rights) > 0
+    has_both_directions = node.n_lefts > 0 and node.n_rights > 0
+
+    if should_flip(node) and has_parent and is_valid(node, result):
+        prep_dfs(node.head, [*result[:-1], node], output, root=root)
+        # dunno if i should return here
+        return output
+
+    if has_parent and should_prepend(node) and is_valid(node, result):
+        sub_tree_outputs = prep_dfs(node, [node], [[node]] if not has_children else None, root=node)
+        try:
+            output.remove(result)
+        except ValueError:
+            pass
+        for sub_output in sub_tree_outputs:
+            output.append([*sub_output, *result])
+        return output
+
+    if node.pos_ == 'VERB' and has_both_directions:
+        for lefty in node.lefts:
+            for righty in node.rights:
+                if is_valid(lefty, result):
+                    prep_dfs(righty, [*result, lefty], output, root=root)
+                # if is_valid(righty):
+                #     prep_dfs(lefty, [*result, righty], output, root=root)
+    elif has_children:
+        for u in node.children:
+            valid_results = [*result, node] if is_valid(node, result) else result
+            prep_dfs(u, valid_results, output, root=root)
+    else:
+        valid_results = [*result, node] if is_valid(node, result) else result
+        output.append(valid_results)
+
+    return output

+ 6 - 1
word_processor/strategies/leaf_strategy.py

@@ -19,7 +19,12 @@ def leaf_strategy(doc) -> [[Token]]:
     return suggestions or []
 
 
-def dfs(graph, result=[], output=[]):
+def dfs(graph, result=None, output=None):
+    if output is None:
+        output = []
+    if result is None:
+        result = []
+
     flag = False
     for u in graph.children:
         flag = True

+ 62 - 0
word_processor/tests/__snapshots__/test_adverbial_strategy.ambr

@@ -0,0 +1,62 @@
+# name: test_adverbial_strategy[Regex for redacted phone numbers with extra info for PlayStation]
+  <class 'list'> [
+    <class 'list'> [
+      'regex',
+      'number',
+      'redact',
+    ],
+    <class 'list'> [
+      'regex',
+      'number',
+      'phone',
+    ],
+    <class 'list'> [
+      'regex',
+      'number',
+      'info',
+      'extra',
+    ],
+    <class 'list'> [
+      'regex',
+      'number',
+      'info',
+      'PlayStation',
+    ],
+  ]
+---
+# name: test_adverbial_strategy[Stick element after scroll has passed the element0]
+  <class 'list'> [
+    <class 'list'> [
+      'stick',
+      'element',
+    ],
+    <class 'list'> [
+      'stick',
+      'after',
+      'element',
+    ],
+    <class 'list'> [
+      'stick',
+      'scroll',
+      'element',
+    ],
+  ]
+---
+# name: test_adverbial_strategy[Stick element after scroll has passed the element1]
+  <class 'list'> [
+    <class 'list'> [
+      'stick',
+      'element',
+    ],
+    <class 'list'> [
+      'stick',
+      'after',
+      'element',
+    ],
+    <class 'list'> [
+      'stick',
+      'scroll',
+      'element',
+    ],
+  ]
+---

+ 187 - 0
word_processor/tests/__snapshots__/test_compound_strategy.ambr

@@ -0,0 +1,187 @@
+# name: test_compound_strategy[Regex for redacted phone numbers with extra info for PlayStation]
+  <class 'list'> [
+    <class 'list'> [
+      'regex',
+      'redact',
+      'number',
+    ],
+    <class 'list'> [
+      'regex',
+      'redact',
+      'phone',
+      'number',
+    ],
+    <class 'list'> [
+      'regex',
+      'redact',
+      'phone',
+      'number',
+    ],
+    <class 'list'> [
+      'regex',
+      'redact',
+      'phone',
+      'number',
+      'extra',
+      'info',
+    ],
+    <class 'list'> [
+      'regex',
+      'redact',
+      'phone',
+      'number',
+      'extra',
+      'info',
+      'PlayStation',
+    ],
+    <class 'list'> [
+      'regex',
+      'redact',
+      'phone',
+      'number',
+      'info',
+      'PlayStation',
+    ],
+    <class 'list'> [
+      'regex',
+      'redact',
+      'number',
+      'extra',
+      'info',
+    ],
+    <class 'list'> [
+      'regex',
+      'redact',
+      'number',
+      'extra',
+      'info',
+      'PlayStation',
+    ],
+    <class 'list'> [
+      'regex',
+      'redact',
+      'number',
+      'info',
+      'PlayStation',
+    ],
+    <class 'list'> [
+      'regex',
+      'phone',
+      'redact',
+      'number',
+    ],
+    <class 'list'> [
+      'regex',
+      'phone',
+      'redact',
+      'number',
+    ],
+    <class 'list'> [
+      'regex',
+      'phone',
+      'redact',
+      'number',
+      'extra',
+      'info',
+    ],
+    <class 'list'> [
+      'regex',
+      'phone',
+      'redact',
+      'number',
+      'extra',
+      'info',
+      'PlayStation',
+    ],
+    <class 'list'> [
+      'regex',
+      'phone',
+      'redact',
+      'number',
+      'info',
+      'PlayStation',
+    ],
+    <class 'list'> [
+      'regex',
+      'phone',
+      'number',
+    ],
+    <class 'list'> [
+      'regex',
+      'phone',
+      'number',
+      'extra',
+      'info',
+    ],
+    <class 'list'> [
+      'regex',
+      'phone',
+      'number',
+      'extra',
+      'info',
+      'PlayStation',
+    ],
+    <class 'list'> [
+      'regex',
+      'phone',
+      'number',
+      'info',
+      'PlayStation',
+    ],
+    <class 'list'> [
+      'regex',
+      'number',
+      'extra',
+      'info',
+    ],
+    <class 'list'> [
+      'regex',
+      'number',
+      'extra',
+      'info',
+      'PlayStation',
+    ],
+    <class 'list'> [
+      'regex',
+      'number',
+      'info',
+      'PlayStation',
+    ],
+  ]
+---
+# name: test_compound_strategy[Stick element after scroll has passed the element0]
+  <class 'list'> [
+    <class 'list'> [
+      'stick',
+      'element',
+    ],
+    <class 'list'> [
+      'stick',
+      'after',
+      'element',
+    ],
+    <class 'list'> [
+      'stick',
+      'scroll',
+      'element',
+    ],
+  ]
+---
+# name: test_compound_strategy[Stick element after scroll has passed the element1]
+  <class 'list'> [
+    <class 'list'> [
+      'stick',
+      'element',
+    ],
+    <class 'list'> [
+      'stick',
+      'after',
+      'element',
+    ],
+    <class 'list'> [
+      'stick',
+      'scroll',
+      'element',
+    ],
+  ]
+---

+ 546 - 0
word_processor/tests/__snapshots__/test_flip_prep_pobj_strategy.ambr

@@ -0,0 +1,546 @@
+# name: test_flip_prep_pobj_strategy[Regex for redacted phone numbers with extra info for PlayStation0]
+  <class 'list'> [
+    <class 'list'> [
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'number',
+      'regex',
+    ],
+  ]
+---
+# name: test_flip_prep_pobj_strategy[Regex for redacted phone numbers with extra info for PlayStation1]
+  <class 'list'> [
+    <class 'list'> [
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'number',
+      'regex',
+    ],
+  ]
+---
+# name: test_flip_prep_pobj_strategy[Regex for redacted phone numbers with extra info for PlayStation2]
+  <class 'list'> [
+    <class 'list'> [
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'number',
+      'regex',
+    ],
+  ]
+---
+# name: test_flip_prep_pobj_strategy[Stick element after scroll has passed the element0]
+  <class 'list'> [
+    <class 'list'> [
+      'stick',
+      'element',
+    ],
+    <class 'list'> [
+      'stick',
+      'after',
+      'element',
+    ],
+    <class 'list'> [
+      'stick',
+      'scroll',
+      'element',
+    ],
+  ]
+---
+# name: test_flip_prep_pobj_strategy[Stick element after scroll has passed the element1]
+  <class 'list'> [
+    <class 'list'> [
+      'stick',
+      'element',
+    ],
+    <class 'list'> [
+      'stick',
+      'after',
+      'element',
+    ],
+    <class 'list'> [
+      'stick',
+      'scroll',
+      'element',
+    ],
+  ]
+---

+ 4 - 4
word_processor/tests/__snapshots__/test_javascript_parser.ambr

@@ -1,8 +1,8 @@
 # name: test_javascript_parser[Regex for redacted phone numbers with extra info for PlayStation]
   <class 'list'> [
-    'RedactedNumbersForRegex',
-    'PhoneNumbersForRegex',
-    'ExtraInfoWithNumbersForRegex',
-    'PlaystationForInfoWithNumbersForRegex',
+    'RegexForNumberRedact',
+    'RegexForNumberPhone',
+    'RegexForNumberWithInfoExtra',
+    'RegexForNumberWithInfoForPlaystation',
   ]
 ---

+ 26 - 0
word_processor/tests/__snapshots__/test_leaf_no_prep_strategy.ambr

@@ -0,0 +1,26 @@
+# name: test_leaf_no_prep_strategy[Regex for redacted phone numbers with extra info for PlayStation-expected0]
+  <class 'list'> [
+    <class 'list'> [
+      'Regex',
+      'numbers',
+      'redacted',
+    ],
+    <class 'list'> [
+      'Regex',
+      'numbers',
+      'phone',
+    ],
+    <class 'list'> [
+      'Regex',
+      'numbers',
+      'info',
+      'extra',
+    ],
+    <class 'list'> [
+      'Regex',
+      'numbers',
+      'info',
+      'PlayStation',
+    ],
+  ]
+---

+ 0 - 29
word_processor/tests/__snapshots__/test_leaf_strategy.ambr

@@ -29,34 +29,5 @@
       'for',
       'PlayStation',
     ],
-    <class 'list'> [
-      'Regex',
-      'for',
-      'numbers',
-      'redacted',
-    ],
-    <class 'list'> [
-      'Regex',
-      'for',
-      'numbers',
-      'phone',
-    ],
-    <class 'list'> [
-      'Regex',
-      'for',
-      'numbers',
-      'with',
-      'info',
-      'extra',
-    ],
-    <class 'list'> [
-      'Regex',
-      'for',
-      'numbers',
-      'with',
-      'info',
-      'for',
-      'PlayStation',
-    ],
   ]
 ---

+ 0 - 8
word_processor/tests/__snapshots__/test_parser.ambr

@@ -4,13 +4,5 @@
     'Regexfornumbersphone',
     'Regexfornumberswithinfoextra',
     'RegexfornumberswithinfoforPlayStation',
-    'Regexfornumbersredacted',
-    'Regexfornumbersphone',
-    'Regexfornumberswithinfoextra',
-    'RegexfornumberswithinfoforPlayStation',
-    'Regexfornumbersredacted',
-    'Regexfornumbersphone',
-    'Regexfornumberswithinfoextra',
-    'RegexfornumberswithinfoforPlayStation',
   ]
 ---

+ 2 - 1
word_processor/tests/test_adverbial_strategy.py

@@ -11,7 +11,8 @@ nlp = spacy.load("en_core_web_trf")
 
 
 @pytest.mark.parametrize('text', [
-    # 'Regex for redacted phone numbers with extra info for PlayStation',
+    'Regex for redacted phone numbers with extra info for PlayStation',
+    'Stick element after scroll has passed the element',
     'Stick element after scroll has passed the element',
 ])
 def test_adverbial_strategy(snapshot, text):

+ 27 - 0
word_processor/tests/test_compound_strategy.py

@@ -0,0 +1,27 @@
+import json
+
+import pytest
+
+import spacy
+
+from word_processor.generators import Generator
+from word_processor.strategies import compound_strategy
+
+nlp = spacy.load("en_core_web_trf")
+
+
[email protected]('text', [
+    'Regex for redacted phone numbers with extra info for PlayStation',
+    'Stick element after scroll has passed the element',
+    'Stick element after scroll has passed the element',
+])
+def test_compound_strategy(snapshot, text):
+    generator = Generator(text=text, strategy=compound_strategy)
+
+    results = generator.suggest()
+
+    text_results = []
+    for suggestion in results:
+        text_results.append([token.lemma_ for token in suggestion])
+
+    assert text_results == snapshot

+ 29 - 0
word_processor/tests/test_flip_prep_pobj_strategy.py

@@ -0,0 +1,29 @@
+import json
+
+import pytest
+
+import spacy
+
+from word_processor.generators import Generator
+from word_processor.strategies import flip_prep_pobj_strategy
+
+nlp = spacy.load("en_core_web_trf")
+
+
[email protected]('text', [
+    'Regex for redacted phone numbers with extra info for PlayStation',
+    'Regex for redacted phone numbers with extra info for PlayStation',
+    'Regex for redacted phone numbers with extra info for PlayStation',
+    'Stick element after scroll has passed the element',
+    'Stick element after scroll has passed the element',
+])
+def test_flip_prep_pobj_strategy(snapshot, text):
+    generator = Generator(text=text, strategy=flip_prep_pobj_strategy)
+
+    results = generator.suggest()
+
+    text_results = []
+    for suggestion in results:
+        text_results.append([token.lemma_ for token in suggestion])
+
+    assert text_results == snapshot