flip_prep_pobj_strategy.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. from spacy.tokens import Token
  2. from word_processor.types import DEP_TYPES
  3. def flip_prep_pobj_strategy(doc) -> [[Token]]:
  4. """
  5. Should return an arrays of variable names based on compound strategy
  6. Uses adverbial strategy and also adds compounds to nouns
  7. e.g.
  8. Reads black phone numbers
  9. Will treat PhoneNumbers as a single entity.
  10. :param doc: spacy document
  11. :return Array of strings
  12. """
  13. suggestions = []
  14. for token in doc:
  15. if token.dep_ == DEP_TYPES['ROOT']:
  16. suggestions = prep_dfs(token)
  17. break
  18. return suggestions
  19. INVALID_DEP = ['aux', 'prep']
  20. INVALID_POS = ['DET', 'AUX', 'ADP']
  21. FLIP_DEP = ['compound', 'amod']
  22. PREPEND_DEP = ['pobj']
  23. def is_valid(node, existing=None):
  24. return (
  25. not (node.pos_ in INVALID_POS)
  26. and not (node.dep_ in INVALID_DEP)
  27. and not (node in existing if existing else False)
  28. )
  29. def should_flip(node):
  30. return node.dep_ in FLIP_DEP
  31. def should_prepend(node):
  32. return node.dep_ in PREPEND_DEP
  33. def prep_dfs(node, result=None, output=None, root=None):
  34. if output is None:
  35. output = []
  36. if result is None:
  37. result = []
  38. has_parent = node.head != node and root != node
  39. has_children = (node.n_lefts + node.n_rights) > 0
  40. has_both_directions = node.n_lefts > 0 and node.n_rights > 0
  41. if should_flip(node) and has_parent and is_valid(node, result):
  42. prep_dfs(node.head, [*result[:-1], node], output, root=root)
  43. # dunno if i should return here
  44. return output
  45. if has_parent and should_prepend(node) and is_valid(node, result):
  46. sub_tree_outputs = prep_dfs(node, [node], [[node]] if not has_children else None, root=node)
  47. try:
  48. output.remove(result)
  49. except ValueError:
  50. pass
  51. for sub_output in sub_tree_outputs:
  52. output.append([*sub_output, *result])
  53. return output
  54. if node.pos_ == 'VERB' and has_both_directions:
  55. for lefty in node.lefts:
  56. for righty in node.rights:
  57. if is_valid(lefty, result):
  58. prep_dfs(righty, [*result, lefty], output, root=root)
  59. # if is_valid(righty):
  60. # prep_dfs(lefty, [*result, righty], output, root=root)
  61. elif has_children:
  62. for u in node.children:
  63. valid_results = [*result, node] if is_valid(node, result) else result
  64. prep_dfs(u, valid_results, output, root=root)
  65. else:
  66. valid_results = [*result, node] if is_valid(node, result) else result
  67. output.append(valid_results)
  68. return output