[关闭]
@zsh-o 2019-06-16T21:02:10.000000Z 字数 1791 阅读 805

AC自动机

算法


  1. # coding = uts-8
  2. from collections import deque
  3. class Node:
  4. def __init__(self):
  5. self.key = None
  6. self.ending = False
  7. self.maps = dict()
  8. self.failure = None
  9. self.depth = 0
  10. # parent pointer for matched string travel
  11. self.parent = None
  12. def __repr__(self):
  13. return self.key
  14. class ACSearch:
  15. def __init__(self, keywords):
  16. self.root = Node()
  17. self.root.failure = self.root
  18. self.build(keywords)
  19. def build(self, keywords):
  20. # build trie tree
  21. for keyword in keywords:
  22. p = self.root
  23. d = 0
  24. for c in keyword:
  25. d += 1
  26. if c not in p.maps:
  27. t = Node()
  28. t.key = c
  29. t.depth = d
  30. p.maps[c] = t
  31. t.parent = p
  32. p = p.maps[c]
  33. if d == len(keyword):
  34. p.ending = True
  35. # build failure
  36. # build failure with BFS
  37. queue = deque()
  38. # 1, all 1-depth nodes' failure are root
  39. for k, v in self.root.maps.items():
  40. v.failure = self.root
  41. queue.append(v)
  42. while len(queue) != 0:
  43. p = queue.popleft()
  44. for k, v in p.maps.items():
  45. failure = p.failure
  46. while k not in failure.maps and failure is not self.root:
  47. failure = failure.failure
  48. if k in failure.maps:
  49. v.failure = failure.maps[k]
  50. else:
  51. v.failure = failure
  52. queue.append(v)
  53. def find_first(self, text):
  54. p = self.root
  55. for index, c in enumerate(text):
  56. while c not in p.maps and p is not self.root:
  57. p = p.failure
  58. if c in p.maps:
  59. p = p.maps[c]
  60. if p.ending is True:
  61. # matched
  62. keyword = []
  63. while p.parent is not None:
  64. keyword.append(p.key)
  65. p = p.parent
  66. keyword.reverse()
  67. keyword = ''.join(keyword)
  68. return index - len(keyword) + 1, keyword
  69. else:
  70. p = p.failure
  71. return -1, ''
  72. def find_all(self, text):
  73. res = []
  74. p = self.root
  75. for index, c in enumerate(text):
  76. while c not in p.maps and p is not self.root:
  77. p = p.failure
  78. if c in p.maps:
  79. p = p.maps[c]
  80. if p.ending is True:
  81. # matched
  82. keyword = []
  83. q = p
  84. while q.parent is not None:
  85. keyword.append(q.key)
  86. q = q.parent
  87. keyword.reverse()
  88. keyword = ''.join(keyword)
  89. res.append([index - len(keyword) + 1, keyword])
  90. return res
  91. if __name__ == '__main__':
  92. searcher = ACSearch(['ash', 'shex', 'bcd', 'sha'])
  93. print(searcher.find_first('secashcvashebcdashare'))
  94. print(searcher.find_all('secashcvashexbcdashare'))
  1. (3, 'ash')
  2. [[3, 'ash'], [8, 'ash'], [9, 'shex'], [13, 'bcd'], [16, 'ash'], [17, 'sha']]
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注