Prechádzať zdrojové kódy

back_and_forth_strategy

Nikos Atlas 2 rokov pred
rodič
commit
e55f4abdee

+ 3 - 3
word_processor/__main__.py

@@ -2,17 +2,17 @@ import sys
 
 from word_processor.generators import Generator
 from word_processor.parsers import JavascriptParser
-from word_processor.strategies import flip_prep_pobj_strategy
+from word_processor.strategies import back_and_forth_strategy
 
 
 def calculate_names(text: str):
-    generator = Generator(text=text, strategy=flip_prep_pobj_strategy, parser=JavascriptParser())
+    generator = Generator(text=text, strategy=back_and_forth_strategy, parser=JavascriptParser())
 
     generator.suggest()
 
     presentation = generator.present()
 
-    print('\n'.join(presentation))
+    print('\n\n'.join(presentation))
 
 
 # Press the green button in the gutter to run the script.

+ 1 - 1
word_processor/playground.py

@@ -10,5 +10,5 @@ def test_something(text):
     displacy.serve(doc, style="dep", port=5001)
 
 
-test_something('Regex for redacted phone numbers with extra info for Baan-kaidee (TO BE REMOVED).')
+test_something('allow chat communication for authenticated users')
 #test_something('Stick element after scroll has passed the element')

+ 2 - 0
word_processor/strategies/__init__.py

@@ -3,6 +3,7 @@ from .leaf_no_preps_strategy import leaf_no_prep_strategy
 from .adverbial_strategy import adverbial_strategy
 from .compound_strategy import compound_strategy
 from .flip_prep_pobj_strategy import flip_prep_pobj_strategy
+from .back_and_forth_strategy import back_and_forth_strategy
 
 __all__ = [
     'leaf_strategy',
@@ -10,4 +11,5 @@ __all__ = [
     'adverbial_strategy',
     'compound_strategy',
     'flip_prep_pobj_strategy',
+    'back_and_forth_strategy',
 ]

+ 118 - 0
word_processor/strategies/back_and_forth_strategy.py

@@ -0,0 +1,118 @@
+from spacy.tokens import Token
+
+from word_processor.types import DEP_TYPES
+
+
+def back_and_forth_strategy(doc) -> [[Token]]:
+    """
+    Should return an arrays of variable names based on compound strategy
+    Uses adverbial strategy and also adds compounds to nouns
+
+    e.g.
+    Reads black phone numbers
+    Will treat PhoneNumbers as a single entity.
+
+    :param doc: spacy document
+    :return Array of strings
+    """
+    suggestions = []
+    for token in doc:
+        if token.dep_ == DEP_TYPES['ROOT']:
+            suggestions = prep_dfs(token)
+            break
+
+    return suggestions
+
+
+INVALID_DEP = ['aux', 'prep']
+INVALID_POS = ['DET', 'AUX', 'ADP']
+FLIP_DEP = ['compound', 'amod']
+PREPEND_DEP = ['pobj', 'dobj']
+
+
+def is_valid(node, existing=None):
+    return (
+            not (node.pos_ in INVALID_POS)
+            and not (node.dep_ in INVALID_DEP)
+            and not (node in existing if existing else False)
+    )
+
+
+def should_flip(node):
+    return node.dep_ in FLIP_DEP
+
+
+def should_prepend(node):
+    return node.dep_ in PREPEND_DEP
+
+
+def merge_outputs(output, merge_outputs=None, append=None, clear=None):
+    if merge_outputs is None:
+        merge_outputs = []
+    if append is None:
+        append = []
+    try:
+        output.remove(clear)
+    except ValueError:
+        pass
+    for sub_output in merge_outputs:
+        output.append([*sub_output, *append])
+    return output
+
+
+def prep_dfs(node, result=None, output=None, root=None):
+    if root is None:
+        root = node
+    if output is None:
+        output = []
+    if result is None:
+        result = []
+
+    has_parent = node.head != node and root != node
+    has_children = (node.n_lefts + node.n_rights) > 0
+    has_both_directions = node.n_lefts > 0 and node.n_rights > 0
+
+    if should_flip(node) and has_parent and is_valid(node, result):
+        prep_dfs(node.head, [*result[:-1], node], output, root=root)
+        # dunno if i should return here
+        return output
+
+    if has_parent and should_prepend(node) and is_valid(node, result):
+        sub_tree_output = prep_dfs(node, [node], [[node]] if not has_children else None, root=node)
+        for child in node.head.children:
+            if child != node:
+                child_tree_output = prep_dfs(
+                    child,
+                    [child] if is_valid(child) else None,
+                    [[child]] if (child.n_lefts+child.n_rights) and is_valid(child) else None,
+                    root=child
+                )
+                for child_output in child_tree_output:
+                    merge_outputs(output, [*sub_tree_output], append=[*child_output, *result], clear=result)
+        merge_outputs(output, [*sub_tree_output], append=[*result], clear=result)
+        return output
+
+    if node.pos_ == 'VERB' and has_both_directions:
+        for lefty in node.lefts:
+            for righty in node.rights:
+                if is_valid(lefty, result):
+                    prep_dfs(righty, [*result, lefty], output, root=root)
+                # if is_valid(righty):
+                #     prep_dfs(lefty, [*result, righty], output, root=root)
+    elif has_children:
+        sub_trees = []
+
+        # for child in node.children:
+        #     valid_results = [*result, node] if is_valid(node, result) else result
+        #     prep_dfs(child, valid_results, output, root=root)
+        for child in node.children:
+            valid_results = [*result, node] if is_valid(node, result) else result
+            sub_trees.append(prep_dfs(child, valid_results, [*output], root=root))
+
+        for sub_tree in sub_trees:
+            merge_outputs(output, [*sub_tree])
+    else:
+        valid_results = [*result, node] if is_valid(node, result) else result
+        output.append(valid_results)
+
+    return output

+ 475 - 0
word_processor/tests/__snapshots__/test_back_and_forth_strategy.ambr

@@ -0,0 +1,475 @@
+# name: test_back_and_forth_strategy[Regex for redacted phone numbers with extra info for PlayStation]
+  <class 'list'> [
+    <class 'list'> [
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'extra',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'extra',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'extra',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'redact',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'extra',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'phone',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'extra',
+      'info',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'extra',
+      'info',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'number',
+      'regex',
+    ],
+    <class 'list'> [
+      'PlayStation',
+      'info',
+      'number',
+      'regex',
+    ],
+  ]
+---
+# name: test_back_and_forth_strategy[Stick element after scroll has passed the element]
+  <class 'list'> [
+    <class 'list'> [
+      'element',
+      'pass',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'pass',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'after',
+      'pass',
+      'after',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'after',
+      'pass',
+      'after',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'scroll',
+      'pass',
+      'after',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'scroll',
+      'pass',
+      'after',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'pass',
+      'after',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'pass',
+      'after',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'pass',
+      'after',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'pass',
+      'after',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'after',
+      'pass',
+      'scroll',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'after',
+      'pass',
+      'scroll',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'scroll',
+      'pass',
+      'scroll',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'scroll',
+      'pass',
+      'scroll',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'pass',
+      'scroll',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'pass',
+      'scroll',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'pass',
+      'scroll',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'element',
+      'pass',
+      'scroll',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'stick',
+    ],
+    <class 'list'> [
+      'element',
+      'after',
+      'stick',
+      'after',
+    ],
+    <class 'list'> [
+      'element',
+      'scroll',
+      'stick',
+      'after',
+    ],
+    <class 'list'> [
+      'element',
+      'stick',
+      'after',
+    ],
+    <class 'list'> [
+      'element',
+      'stick',
+      'after',
+    ],
+    <class 'list'> [
+      'element',
+      'after',
+      'stick',
+      'scroll',
+    ],
+    <class 'list'> [
+      'element',
+      'scroll',
+      'stick',
+      'scroll',
+    ],
+    <class 'list'> [
+      'element',
+      'stick',
+      'scroll',
+    ],
+    <class 'list'> [
+      'element',
+      'stick',
+      'scroll',
+    ],
+  ]
+---
+# name: test_back_and_forth_strategy[allow chat communication for authenticated users]
+  <class 'list'> [
+    <class 'list'> [
+      'chat',
+      'communication',
+      'authenticate',
+      'user',
+      'allow',
+    ],
+    <class 'list'> [
+      'chat',
+      'communication',
+      'allow',
+    ],
+    <class 'list'> [
+      'authenticate',
+      'user',
+      'allow',
+    ],
+  ]
+---
+# name: test_back_and_forth_strategy[generate an authentication token for offline users]
+  <class 'list'> [
+    <class 'list'> [
+      'token',
+      'generate',
+    ],
+    <class 'list'> [
+      'authentication',
+      'token',
+      'generate',
+    ],
+    <class 'list'> [
+      'authentication',
+      'token',
+      'generate',
+    ],
+    <class 'list'> [
+      'offline',
+      'user',
+      'authentication',
+      'token',
+      'generate',
+    ],
+    <class 'list'> [
+      'offline',
+      'user',
+      'token',
+      'generate',
+    ],
+  ]
+---
+# name: test_back_and_forth_strategy[serializer for contact roles]
+  <class 'list'> [
+    <class 'list'> [
+      'contact',
+      'role',
+      'serializer',
+    ],
+  ]
+---

+ 34 - 347
word_processor/tests/__snapshots__/test_flip_prep_pobj_strategy.ambr

@@ -1,4 +1,4 @@
-# name: test_flip_prep_pobj_strategy[Regex for redacted phone numbers with extra info for PlayStation0]
+# name: test_flip_prep_pobj_strategy[Regex for redacted phone numbers with extra info for PlayStation]
   <class 'list'> [
     <class 'list'> [
       'redact',
@@ -168,379 +168,66 @@
     ],
   ]
 ---
-# name: test_flip_prep_pobj_strategy[Regex for redacted phone numbers with extra info for PlayStation1]
+# name: test_flip_prep_pobj_strategy[Stick element after scroll has passed the element]
   <class 'list'> [
     <class 'list'> [
-      'redact',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'redact',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'redact',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'redact',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'redact',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'phone',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'phone',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'phone',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'phone',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'phone',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'number',
-      'regex',
+      'stick',
+      'element',
     ],
     <class 'list'> [
-      'PlayStation',
-      'info',
-      'number',
-      'regex',
+      'stick',
+      'after',
+      'element',
     ],
     <class 'list'> [
-      'PlayStation',
-      'info',
-      'number',
-      'regex',
+      'stick',
+      'scroll',
+      'element',
     ],
   ]
 ---
-# name: test_flip_prep_pobj_strategy[Regex for redacted phone numbers with extra info for PlayStation2]
+# name: test_flip_prep_pobj_strategy[allow chat communication for authenticated users]
   <class 'list'> [
     <class 'list'> [
-      'redact',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'redact',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'redact',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'redact',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'redact',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'phone',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'phone',
-      'redact',
-      'number',
-      'regex',
+      'allow',
+      'chat',
+      'communication',
     ],
     <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'phone',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'phone',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'phone',
-      'redact',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'phone',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'extra',
-      'info',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'number',
-      'regex',
-    ],
-    <class 'list'> [
-      'PlayStation',
-      'info',
-      'number',
-      'regex',
+      'authenticate',
+      'user',
+      'allow',
     ],
   ]
 ---
-# name: test_flip_prep_pobj_strategy[Stick element after scroll has passed the element0]
+# name: test_flip_prep_pobj_strategy[generate an authentication token for offline users]
   <class 'list'> [
     <class 'list'> [
-      'stick',
-      'element',
+      'generate',
+      'authentication',
+      'token',
     ],
     <class 'list'> [
-      'stick',
-      'after',
-      'element',
+      'offline',
+      'user',
+      'generate',
+      'authentication',
+      'token',
     ],
     <class 'list'> [
-      'stick',
-      'scroll',
-      'element',
+      'offline',
+      'user',
+      'generate',
+      'token',
     ],
   ]
 ---
-# name: test_flip_prep_pobj_strategy[Stick element after scroll has passed the element1]
+# name: test_flip_prep_pobj_strategy[serializer for contact roles]
   <class 'list'> [
     <class 'list'> [
-      'stick',
-      'element',
-    ],
-    <class 'list'> [
-      'stick',
-      'after',
-      'element',
-    ],
-    <class 'list'> [
-      'stick',
-      'scroll',
-      'element',
+      'contact',
+      'role',
+      'serializer',
     ],
   ]
 ---

+ 29 - 0
word_processor/tests/test_back_and_forth_strategy.py

@@ -0,0 +1,29 @@
+import json
+
+import pytest
+
+import spacy
+
+from word_processor.generators import Generator
+from word_processor.strategies import back_and_forth_strategy
+
+nlp = spacy.load("en_core_web_trf")
+
+
[email protected]('text', [
+    'allow chat communication for authenticated users',
+    'Regex for redacted phone numbers with extra info for PlayStation',
+    'generate an authentication token for offline users',
+    'serializer for contact roles',
+    'Stick element after scroll has passed the element',
+])
+def test_back_and_forth_strategy(snapshot, text):
+    generator = Generator(text=text, strategy=back_and_forth_strategy)
+
+    results = generator.suggest()
+
+    text_results = []
+    for suggestion in results:
+        text_results.append([token.lemma_ for token in suggestion])
+
+    assert text_results == snapshot

+ 3 - 3
word_processor/tests/test_flip_prep_pobj_strategy.py

@@ -11,10 +11,10 @@ nlp = spacy.load("en_core_web_trf")
 
 
 @pytest.mark.parametrize('text', [
+    'allow chat communication for authenticated users',
     'Regex for redacted phone numbers with extra info for PlayStation',
-    'Regex for redacted phone numbers with extra info for PlayStation',
-    'Regex for redacted phone numbers with extra info for PlayStation',
-    'Stick element after scroll has passed the element',
+    'generate an authentication token for offline users',
+    'serializer for contact roles',
     'Stick element after scroll has passed the element',
 ])
 def test_flip_prep_pobj_strategy(snapshot, text):