back_and_forth_strategy.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. from spacy.tokens import Token
  2. from word_processor.types import DEP_TYPES
  3. def back_and_forth_strategy(doc) -> [[Token]]:
  4. """
  5. Should return an arrays of variable names based on compound strategy
  6. Uses adverbial strategy and also adds compounds to nouns
  7. e.g.
  8. Reads black phone numbers
  9. Will treat PhoneNumbers as a single entity.
  10. :param doc: spacy document
  11. :return Array of strings
  12. """
  13. suggestions = []
  14. for token in doc:
  15. if token.dep_ == DEP_TYPES['ROOT']:
  16. suggestions = prep_dfs(token)
  17. break
  18. return suggestions
  19. INVALID_DEP = ['aux', 'prep']
  20. INVALID_POS = ['DET', 'AUX', 'ADP']
  21. FLIP_DEP = ['compound', 'amod']
  22. PREPEND_DEP = ['pobj', 'dobj']
  23. def is_valid(node, existing=None):
  24. return (
  25. not (node.pos_ in INVALID_POS)
  26. and not (node.dep_ in INVALID_DEP)
  27. and not (node in existing if existing else False)
  28. )
  29. def should_flip(node):
  30. return node.dep_ in FLIP_DEP
  31. def should_prepend(node):
  32. return node.dep_ in PREPEND_DEP
  33. def merge_outputs(output, merge_outputs=None, append=None, clear=None):
  34. if merge_outputs is None:
  35. merge_outputs = []
  36. if append is None:
  37. append = []
  38. try:
  39. output.remove(clear)
  40. except ValueError:
  41. pass
  42. for sub_output in merge_outputs:
  43. output.append([*sub_output, *append])
  44. return output
  45. def prep_dfs(node, result=None, output=None, root=None):
  46. if root is None:
  47. root = node
  48. if output is None:
  49. output = []
  50. if result is None:
  51. result = []
  52. has_parent = node.head != node and root != node
  53. has_children = (node.n_lefts + node.n_rights) > 0
  54. has_both_directions = node.n_lefts > 0 and node.n_rights > 0
  55. if should_flip(node) and has_parent and is_valid(node, result):
  56. prep_dfs(node.head, [*result[:-1], node], output, root=root)
  57. # dunno if i should return here
  58. return output
  59. if has_parent and should_prepend(node) and is_valid(node, result):
  60. sub_tree_output = prep_dfs(node, [node], [[node]] if not has_children else None, root=node)
  61. for child in node.head.children:
  62. if child != node:
  63. child_tree_output = prep_dfs(
  64. child,
  65. [child] if is_valid(child) else None,
  66. [[child]] if (child.n_lefts+child.n_rights) and is_valid(child) else None,
  67. root=child
  68. )
  69. for child_output in child_tree_output:
  70. merge_outputs(output, [*sub_tree_output], append=[*child_output, *result], clear=result)
  71. merge_outputs(output, [*sub_tree_output], append=[*result], clear=result)
  72. return output
  73. if node.pos_ == 'VERB' and has_both_directions:
  74. for lefty in node.lefts:
  75. for righty in node.rights:
  76. if is_valid(lefty, result):
  77. prep_dfs(righty, [*result, lefty], output, root=root)
  78. # if is_valid(righty):
  79. # prep_dfs(lefty, [*result, righty], output, root=root)
  80. elif has_children:
  81. sub_trees = []
  82. # for child in node.children:
  83. # valid_results = [*result, node] if is_valid(node, result) else result
  84. # prep_dfs(child, valid_results, output, root=root)
  85. for child in node.children:
  86. valid_results = [*result, node] if is_valid(node, result) else result
  87. sub_trees.append(prep_dfs(child, valid_results, [*output], root=root))
  88. for sub_tree in sub_trees:
  89. merge_outputs(output, [*sub_tree])
  90. else:
  91. valid_results = [*result, node] if is_valid(node, result) else result
  92. output.append(valid_results)
  93. return output