adverbial_strategy.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. from spacy.tokens import Token
  2. from word_processor.strategies.leaf_strategy import dfs
  3. from word_processor.types import DEP_TYPES
  4. def adverbial_strategy(doc) -> [[Token]]:
  5. """
  6. Should return an arrays of variable names based on adverbial strategy
  7. When a verb appears, if it has children in both directions, then remove the verb and
  8. add the a left child and a continue to the right child. Does so for all combinations (left,right)
  9. :param doc: spacy document
  10. :return Array of strings
  11. """
  12. suggestions = []
  13. for token in doc:
  14. if token.dep_ == DEP_TYPES['ROOT']:
  15. suggestions = process_adverbial_clauses(token)
  16. break
  17. return suggestions
  18. INVALID_DEP = ['aux', 'prep']
  19. INVALID_POS = ['DET', 'AUX', 'ADP']
  20. def is_valid(node):
  21. return not (node.pos_ in INVALID_POS) and not (node.dep_ in INVALID_DEP)
  22. # todo - this is dfs
  23. def process_adverbial_clauses(node, result=None, output=None):
  24. if output is None:
  25. output = []
  26. if result is None:
  27. result = []
  28. has_children = (node.n_lefts + node.n_rights) > 0
  29. has_both_directions = node.n_lefts > 0 and node.n_rights > 0
  30. if node.pos_ == 'VERB' and has_both_directions:
  31. for lefty in node.lefts:
  32. for righty in node.rights:
  33. if is_valid(lefty):
  34. process_adverbial_clauses(righty, [*result, lefty], output)
  35. # if is_valid(righty):
  36. # process_adverbial_clauses(lefty, [*result, righty], output)
  37. elif has_children:
  38. for u in node.children:
  39. valid_results = [*result, node] if is_valid(node) else result
  40. process_adverbial_clauses(u, valid_results, output)
  41. else:
  42. valid_results = [*result, node] if is_valid(node) else result
  43. output.append(valid_results)
  44. return output