|
@@ -0,0 +1,118 @@
|
|
|
|
+from spacy.tokens import Token
|
|
|
|
+
|
|
|
|
+from word_processor.types import DEP_TYPES
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def back_and_forth_strategy(doc) -> [[Token]]:
|
|
|
|
+ """
|
|
|
|
+ Should return an arrays of variable names based on compound strategy
|
|
|
|
+ Uses adverbial strategy and also adds compounds to nouns
|
|
|
|
+
|
|
|
|
+ e.g.
|
|
|
|
+ Reads black phone numbers
|
|
|
|
+ Will treat PhoneNumbers as a single entity.
|
|
|
|
+
|
|
|
|
+ :param doc: spacy document
|
|
|
|
+ :return Array of strings
|
|
|
|
+ """
|
|
|
|
+ suggestions = []
|
|
|
|
+ for token in doc:
|
|
|
|
+ if token.dep_ == DEP_TYPES['ROOT']:
|
|
|
|
+ suggestions = prep_dfs(token)
|
|
|
|
+ break
|
|
|
|
+
|
|
|
|
+ return suggestions
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+INVALID_DEP = ['aux', 'prep']
|
|
|
|
+INVALID_POS = ['DET', 'AUX', 'ADP']
|
|
|
|
+FLIP_DEP = ['compound', 'amod']
|
|
|
|
+PREPEND_DEP = ['pobj', 'dobj']
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def is_valid(node, existing=None):
|
|
|
|
+ return (
|
|
|
|
+ not (node.pos_ in INVALID_POS)
|
|
|
|
+ and not (node.dep_ in INVALID_DEP)
|
|
|
|
+ and not (node in existing if existing else False)
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def should_flip(node):
|
|
|
|
+ return node.dep_ in FLIP_DEP
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def should_prepend(node):
|
|
|
|
+ return node.dep_ in PREPEND_DEP
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def merge_outputs(output, merge_outputs=None, append=None, clear=None):
|
|
|
|
+ if merge_outputs is None:
|
|
|
|
+ merge_outputs = []
|
|
|
|
+ if append is None:
|
|
|
|
+ append = []
|
|
|
|
+ try:
|
|
|
|
+ output.remove(clear)
|
|
|
|
+ except ValueError:
|
|
|
|
+ pass
|
|
|
|
+ for sub_output in merge_outputs:
|
|
|
|
+ output.append([*sub_output, *append])
|
|
|
|
+ return output
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def prep_dfs(node, result=None, output=None, root=None):
|
|
|
|
+ if root is None:
|
|
|
|
+ root = node
|
|
|
|
+ if output is None:
|
|
|
|
+ output = []
|
|
|
|
+ if result is None:
|
|
|
|
+ result = []
|
|
|
|
+
|
|
|
|
+ has_parent = node.head != node and root != node
|
|
|
|
+ has_children = (node.n_lefts + node.n_rights) > 0
|
|
|
|
+ has_both_directions = node.n_lefts > 0 and node.n_rights > 0
|
|
|
|
+
|
|
|
|
+ if should_flip(node) and has_parent and is_valid(node, result):
|
|
|
|
+ prep_dfs(node.head, [*result[:-1], node], output, root=root)
|
|
|
|
+ # dunno if i should return here
|
|
|
|
+ return output
|
|
|
|
+
|
|
|
|
+ if has_parent and should_prepend(node) and is_valid(node, result):
|
|
|
|
+ sub_tree_output = prep_dfs(node, [node], [[node]] if not has_children else None, root=node)
|
|
|
|
+ for child in node.head.children:
|
|
|
|
+ if child != node:
|
|
|
|
+ child_tree_output = prep_dfs(
|
|
|
|
+ child,
|
|
|
|
+ [child] if is_valid(child) else None,
|
|
|
|
+ [[child]] if (child.n_lefts+child.n_rights) and is_valid(child) else None,
|
|
|
|
+ root=child
|
|
|
|
+ )
|
|
|
|
+ for child_output in child_tree_output:
|
|
|
|
+ merge_outputs(output, [*sub_tree_output], append=[*child_output, *result], clear=result)
|
|
|
|
+ merge_outputs(output, [*sub_tree_output], append=[*result], clear=result)
|
|
|
|
+ return output
|
|
|
|
+
|
|
|
|
+ if node.pos_ == 'VERB' and has_both_directions:
|
|
|
|
+ for lefty in node.lefts:
|
|
|
|
+ for righty in node.rights:
|
|
|
|
+ if is_valid(lefty, result):
|
|
|
|
+ prep_dfs(righty, [*result, lefty], output, root=root)
|
|
|
|
+ # if is_valid(righty):
|
|
|
|
+ # prep_dfs(lefty, [*result, righty], output, root=root)
|
|
|
|
+ elif has_children:
|
|
|
|
+ sub_trees = []
|
|
|
|
+
|
|
|
|
+ # for child in node.children:
|
|
|
|
+ # valid_results = [*result, node] if is_valid(node, result) else result
|
|
|
|
+ # prep_dfs(child, valid_results, output, root=root)
|
|
|
|
+ for child in node.children:
|
|
|
|
+ valid_results = [*result, node] if is_valid(node, result) else result
|
|
|
|
+ sub_trees.append(prep_dfs(child, valid_results, [*output], root=root))
|
|
|
|
+
|
|
|
|
+ for sub_tree in sub_trees:
|
|
|
|
+ merge_outputs(output, [*sub_tree])
|
|
|
|
+ else:
|
|
|
|
+ valid_results = [*result, node] if is_valid(node, result) else result
|
|
|
|
+ output.append(valid_results)
|
|
|
|
+
|
|
|
|
+ return output
|