Pagini recente » Cod sursa (job #692008) | Cod sursa (job #2692036) | Cod sursa (job #438191) | Cod sursa (job #51611) | Cod sursa (job #3217598)
#include <iostream>
#include <vector>
#include <map>
#include <queue>
#include <fstream>
#include <cstring>
using namespace std;
#define cin in
#define cout out
//ifstream in("grader_test4.in");
ifstream in("ahocorasick.in");
ofstream out("ahocorasick.out");
struct trie_node{ // structure of trie node
map<char, trie_node*> child;
trie_node* suffix_link;
trie_node* output_link;
int pattern_ind;
};
typedef struct trie_node node;
node* add_node(){ // to add new trie node
node* temp = new node; // allocating memory for new trie node
// assigning default values
temp ->suffix_link = nullptr;
temp ->output_link = nullptr;
temp ->pattern_ind = -1;
return temp;
}
void build_Trie(node* root, vector<string> &patterns, size_t &size){
for(int i = 0; i < patterns.size() ;i++){ // iterating over patterns
node* cur = root;
for(auto c : patterns[i]){ // iterating over characters in pattern[i]
if(cur ->child.count(c)) // if node corresponding to current character is already present, follow it
cur = cur ->child[c];
else{
node* new_child = add_node(); // if node is not present, add new node to trie
size++;
cur ->child.insert({c, new_child});
cur = new_child;
}
}
cur ->pattern_ind = i; // marking node as i-th pattern ends here
}
}
void build_suffix_and_output_links(node* root){
root ->suffix_link = root; // pointing suffix link of root back to itself
queue<node*> qu; // taking queue for breadth first search
for(auto &it : root ->child){
qu.push(it.second); // pushing nodes directly attached to root
it.second ->suffix_link = root; // setting suffix link of these nodes back to the root
}
while(qu.size()){
node* cur_state = qu.front();
qu.pop();
for(auto &it : cur_state ->child){ // iterating over all children of current node
char c = it.first;
node* temp = cur_state ->suffix_link;
while(temp ->child.count(c) == 0 && temp != root) // finding longest proper suffix
temp = temp ->suffix_link;
if(temp ->child.count(c))
it.second ->suffix_link = temp ->child[c]; // if proper suffix is found
else it.second ->suffix_link = root; // if proper suffix not found
qu.push(it.second);
}
// setting up output link
if(cur_state ->suffix_link ->pattern_ind >= 0)
cur_state ->output_link = cur_state ->suffix_link;
else cur_state ->output_link = cur_state ->suffix_link ->output_link;
}
}
struct Node{
unsigned char bitmap;
unsigned offset;
int fail_link;
int output_link;
int pattern_idx;
};
std::map<trie_node*, int> mapped_to;
std::map<int, trie_node*> mapped_back;
// Storing trie in vector, all the chilldren of a node are placed together
void build_vec(node* cur, Node vec[], size_t &size, int parent_idx /*index of cur in vector*/, size_t &idx/*next available space in vector*/){
// no children
if(!cur->child.size())return;
vec[parent_idx].offset = idx;
size_t child_idx = idx;
for(auto c : cur->child){ // iterating over children
mapped_to[c.second] = idx;
mapped_back[idx] = c.second;
vec[parent_idx].bitmap |= (1 << (c.first - 'A'));
idx++;
}
for(auto c : cur->child){
build_vec(c.second, vec, size, child_idx, idx);
child_idx++;
}
}
void fill_data(Node vec[], size_t &size){
for(int i = 0; i < size; ++i){
vec[i].fail_link = mapped_to[mapped_back[i]->suffix_link];
vec[i].output_link = mapped_to[mapped_back[i]->output_link];
vec[i].pattern_idx = mapped_back[i]->pattern_ind;
}
}
void search_pattern(Node vec[], string &text, auto &indices){
int cur = 0;
for(int i = 0; i < text.length() ;i++){
int c = text[i] - 'A';
if(vec[cur].bitmap & (1 << c)){ // if link to character exists follow it
// Apply the mask to the number to zero out bits beyond the i-th bit
unsigned char maskedNumber = vec[cur].bitmap & ((1 << (c + 1)) - 1);
// Move to child node
cur = vec[cur].offset + __builtin_popcount(maskedNumber) - 1;
if(vec[cur].pattern_idx >= 0){
indices[vec[cur].pattern_idx].push_back(i); // if this node is marked then a pattern ends here
}
int temp = vec[cur].output_link;
while(temp != 0){
indices[vec[temp].pattern_idx].push_back(i); // follow all output links to get patterns ending at this position
temp = vec[temp].output_link;
}
}
else{
while(cur != 0 && ((vec[cur].bitmap & (1 << c)) == 0)){ // follow suffix links till matching suffix or root is found
cur = vec[cur].fail_link;
}
if((vec[cur].bitmap & (1 << c)))
i--;
}
}
}
int main(){
string text; // given string in which pattern is to be searched
cin >>text;
int k; //Number of patterns
cin >> k;
vector<string> patterns(k); // vector or array of patterns
for(int i = 0; i < k ;i++)
cin >>patterns[i];
size_t size = 1;
node* root = add_node(); // allocating memory for root node
build_Trie(root, patterns, size); // building trie out of patterns
build_suffix_and_output_links(root); // creating appropriate suffix and output links
Node vec[size];
memset(vec, 0, sizeof(vec));
mapped_to[root] = 0;
mapped_back[0] = root;
size_t next_idx = 1;
build_vec(root, vec, size, 0, next_idx);
fill_data(vec, size);
vector<vector<int>> indices(k, vector<int>());
search_pattern(vec, text, indices);
std::map<string, int> M;
for(int i = 0; i < k ;i++){
M[patterns[i]] = 0;
}
for(int i = 0; i < k ;i++){
M[patterns[i]] = max(M[patterns[i]], (int)indices[i].size());
}
for(int i = 0; i < k ;i++){
cout << M[patterns[i]] << '\n';
}
return 0;
}