@zsh-o
2019-06-16T13:02:10.000000Z
字数 1791
阅读 1014
算法
# coding = uts-8from collections import dequeclass Node:def __init__(self):self.key = Noneself.ending = Falseself.maps = dict()self.failure = Noneself.depth = 0# parent pointer for matched string travelself.parent = Nonedef __repr__(self):return self.keyclass ACSearch:def __init__(self, keywords):self.root = Node()self.root.failure = self.rootself.build(keywords)def build(self, keywords):# build trie treefor keyword in keywords:p = self.rootd = 0for c in keyword:d += 1if c not in p.maps:t = Node()t.key = ct.depth = dp.maps[c] = tt.parent = pp = p.maps[c]if d == len(keyword):p.ending = True# build failure# build failure with BFSqueue = deque()# 1, all 1-depth nodes' failure are rootfor k, v in self.root.maps.items():v.failure = self.rootqueue.append(v)while len(queue) != 0:p = queue.popleft()for k, v in p.maps.items():failure = p.failurewhile k not in failure.maps and failure is not self.root:failure = failure.failureif k in failure.maps:v.failure = failure.maps[k]else:v.failure = failurequeue.append(v)def find_first(self, text):p = self.rootfor index, c in enumerate(text):while c not in p.maps and p is not self.root:p = p.failureif c in p.maps:p = p.maps[c]if p.ending is True:# matchedkeyword = []while p.parent is not None:keyword.append(p.key)p = p.parentkeyword.reverse()keyword = ''.join(keyword)return index - len(keyword) + 1, keywordelse:p = p.failurereturn -1, ''def find_all(self, text):res = []p = self.rootfor index, c in enumerate(text):while c not in p.maps and p is not self.root:p = p.failureif c in p.maps:p = p.maps[c]if p.ending is True:# matchedkeyword = []q = pwhile q.parent is not None:keyword.append(q.key)q = q.parentkeyword.reverse()keyword = ''.join(keyword)res.append([index - len(keyword) + 1, keyword])return resif __name__ == '__main__':searcher = ACSearch(['ash', 'shex', 'bcd', 'sha'])print(searcher.find_first('secashcvashebcdashare'))print(searcher.find_all('secashcvashexbcdashare'))
(3, 'ash')[[3, 'ash'], [8, 'ash'], [9, 'shex'], [13, 'bcd'], [16, 'ash'], [17, 'sha']]