96b8b260e1 2011-09-20 kinaba: //------------------------------------------------------------- 96b8b260e1 2011-09-20 kinaba: // Aho-Corasick multi pattern search 96b8b260e1 2011-09-20 kinaba: // O(|size-of-patterns|) for construction. O(1) per step. 96b8b260e1 2011-09-20 kinaba: // 96b8b260e1 2011-09-20 kinaba: // Verified by 96b8b260e1 2011-09-20 kinaba: // - SRM519 Div1 LV2 96b8b260e1 2011-09-20 kinaba: //------------------------------------------------------------- 96b8b260e1 2011-09-20 kinaba: 96b8b260e1 2011-09-20 kinaba: template<int NC=26, char BC='a'> 96b8b260e1 2011-09-20 kinaba: struct AhoCorasick 96b8b260e1 2011-09-20 kinaba: { 96b8b260e1 2011-09-20 kinaba: AhoCorasick( const vector<string>& p ) : link(NC+1) { 96b8b260e1 2011-09-20 kinaba: // Create a trie 96b8b260e1 2011-09-20 kinaba: for(int i=0; i<p.size(); ++i) { 96b8b260e1 2011-09-20 kinaba: AhoCorasick* t = this; 96b8b260e1 2011-09-20 kinaba: for(int k=0; k<p[i].size(); t=t->link[p[i][k++]-BC]) 96b8b260e1 2011-09-20 kinaba: if(!t->link[p[i][k]-BC]) 96b8b260e1 2011-09-20 kinaba: t->link[p[i][k]-BC]=new AhoCorasick; 96b8b260e1 2011-09-20 kinaba: t->final.insert(i); 96b8b260e1 2011-09-20 kinaba: } 96b8b260e1 2011-09-20 kinaba: 96b8b260e1 2011-09-20 kinaba: // Do BFS and draw failure links, and prepare for substring pattern 96b8b260e1 2011-09-20 kinaba: queue<AhoCorasick*> Q; 96b8b260e1 2011-09-20 kinaba: for(int c=0; c<NC; ++c) 96b8b260e1 2011-09-20 kinaba: if( link[c] ) { 96b8b260e1 2011-09-20 kinaba: Q.push(link[c]); 96b8b260e1 2011-09-20 kinaba: link[c]->link[NC] = this; // "c"'s suffix is "" 96b8b260e1 2011-09-20 kinaba: link[c]->final.insert(final.begin(), final.end()); 96b8b260e1 2011-09-20 kinaba: } 96b8b260e1 2011-09-20 kinaba: else 96b8b260e1 2011-09-20 kinaba: link[c] = this; 96b8b260e1 2011-09-20 kinaba: while( !Q.empty() ) { 96b8b260e1 2011-09-20 kinaba: AhoCorasick* t=Q.front(); Q.pop(); 96b8b260e1 2011-09-20 kinaba: for(int c=0; c<NC; ++c) 96b8b260e1 2011-09-20 kinaba: if( t->link[c] ) { 96b8b260e1 2011-09-20 kinaba: Q.push(t->link[c]); 96b8b260e1 2011-09-20 kinaba: AhoCorasick* r = t->link[NC]; // "r" is suffix of "t"... 96b8b260e1 2011-09-20 kinaba: while( !r->link[c] ) 96b8b260e1 2011-09-20 kinaba: r = r->link[NC]; 96b8b260e1 2011-09-20 kinaba: t->link[c]->link[NC] = r->link[c]; // then "rc" is suffix of "tc" 96b8b260e1 2011-09-20 kinaba: t->link[c]->final.insert(r->link[c]->final.begin(), r->link[c]->final.end()); 96b8b260e1 2011-09-20 kinaba: } 96b8b260e1 2011-09-20 kinaba: } 96b8b260e1 2011-09-20 kinaba: } 96b8b260e1 2011-09-20 kinaba: const AhoCorasick* start() const { return this; } 96b8b260e1 2011-09-20 kinaba: const AhoCorasick* next(char c) const { 96b8b260e1 2011-09-20 kinaba: const AhoCorasick* t = this; 96b8b260e1 2011-09-20 kinaba: while( !t->link[c-BC] ) 96b8b260e1 2011-09-20 kinaba: t = t->link[NC]; 96b8b260e1 2011-09-20 kinaba: return t->link[c-BC]; 96b8b260e1 2011-09-20 kinaba: } 96b8b260e1 2011-09-20 kinaba: const set<int>& accept() const { return final; } 96b8b260e1 2011-09-20 kinaba: ~AhoCorasick() { for(int i=0; i<NC; ++i) if(link[i]!=this) delete link[i]; } 96b8b260e1 2011-09-20 kinaba: private: 96b8b260e1 2011-09-20 kinaba: AhoCorasick() : link(NC+1) {} 96b8b260e1 2011-09-20 kinaba: vector<AhoCorasick*> link; 96b8b260e1 2011-09-20 kinaba: set<int> final; 96b8b260e1 2011-09-20 kinaba: };