Cod sursa(job #3217598)

Utilizator drknss_Hehe hehe drknss_ Data 23 martie 2024 19:24:14
Problema Aho-Corasick Scor 10
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 6.06 kb
#include <iostream>
#include <vector>
#include <map>
#include <queue>
#include <fstream>
#include <cstring>
using namespace std;


#define cin in
#define cout out
 
//ifstream in("grader_test4.in");
ifstream in("ahocorasick.in");
ofstream out("ahocorasick.out");


struct trie_node{ // structure of trie node
    map<char, trie_node*> child;
    trie_node* suffix_link;
    trie_node* output_link;
    int pattern_ind;
};

typedef struct trie_node node;

node* add_node(){ // to add new trie node
    node* temp = new node; // allocating memory for new trie node

    // assigning default values
    temp ->suffix_link = nullptr;
    temp ->output_link = nullptr;
    temp ->pattern_ind = -1;

    return temp;
}

void build_Trie(node* root, vector<string> &patterns, size_t &size){
    for(int i = 0; i < patterns.size() ;i++){ // iterating over patterns
        node* cur = root;
        for(auto c : patterns[i]){ // iterating over characters in pattern[i]
            if(cur ->child.count(c)) // if node corresponding to current character is already present, follow it
                cur = cur ->child[c];
            else{
                node* new_child = add_node(); // if node is not present, add new node to trie
                size++;
                cur ->child.insert({c, new_child});
                cur = new_child;
            }
        }
        cur ->pattern_ind = i; // marking node as i-th pattern ends here
    }
}

void build_suffix_and_output_links(node* root){
    root ->suffix_link = root; // pointing suffix link of root back to itself

    queue<node*> qu; // taking queue for breadth first search

    for(auto &it : root ->child){
        qu.push(it.second); // pushing nodes directly attached to root
        it.second ->suffix_link = root; // setting suffix link of these nodes back to the root
    }

    while(qu.size()){
        node* cur_state = qu.front();
        qu.pop();

        for(auto &it : cur_state ->child){ // iterating over all children of current node
            char c = it.first;
            node* temp = cur_state ->suffix_link;

            while(temp ->child.count(c) == 0 && temp != root) // finding longest proper suffix
                temp = temp ->suffix_link;

            if(temp ->child.count(c))
                it.second ->suffix_link = temp ->child[c]; // if proper suffix is found
            else it.second ->suffix_link = root; // if proper suffix not found

            qu.push(it.second);
        }

        // setting up output link
        if(cur_state ->suffix_link ->pattern_ind >= 0)
            cur_state ->output_link = cur_state ->suffix_link; 
        else cur_state ->output_link = cur_state ->suffix_link ->output_link;
    }
}

struct Node{
    unsigned char bitmap;
    unsigned offset;
    int fail_link;
    int output_link;
    int pattern_idx;
};

std::map<trie_node*, int> mapped_to;
std::map<int, trie_node*> mapped_back;
// Storing trie in vector, all the chilldren of a node are placed together
void build_vec(node* cur, Node vec[], size_t &size, int parent_idx /*index of cur in vector*/, size_t &idx/*next available space in vector*/){

    // no children
    if(!cur->child.size())return;

    vec[parent_idx].offset = idx;
    size_t child_idx = idx;
    for(auto c : cur->child){ // iterating over children

        mapped_to[c.second] = idx;
        mapped_back[idx] = c.second;

        vec[parent_idx].bitmap |= (1 << (c.first - 'A'));

        idx++;
    }

    for(auto c : cur->child){
        build_vec(c.second, vec, size, child_idx, idx);
        child_idx++;
    }
}

void fill_data(Node vec[], size_t &size){

    for(int i = 0; i < size; ++i){
        vec[i].fail_link = mapped_to[mapped_back[i]->suffix_link];
        vec[i].output_link = mapped_to[mapped_back[i]->output_link];
        vec[i].pattern_idx = mapped_back[i]->pattern_ind;
    }
}

void search_pattern(Node vec[], string &text, auto &indices){

    int cur = 0;

    for(int i = 0; i < text.length() ;i++){
        int c = text[i] - 'A';
        if(vec[cur].bitmap & (1 << c)){ // if link to character exists follow it

            // Apply the mask to the number to zero out bits beyond the i-th bit
            unsigned char maskedNumber = vec[cur].bitmap & ((1 << (c + 1)) - 1);
            // Move to child node
            cur = vec[cur].offset + __builtin_popcount(maskedNumber) - 1;

            if(vec[cur].pattern_idx >= 0){
                indices[vec[cur].pattern_idx].push_back(i); // if this node is marked then a pattern ends here
            }
            int temp = vec[cur].output_link;
            while(temp != 0){ 
                indices[vec[temp].pattern_idx].push_back(i); // follow all output links to get patterns ending at this position
                temp = vec[temp].output_link;
            }
        }
        else{
            while(cur != 0 && ((vec[cur].bitmap & (1 << c)) == 0)){ // follow suffix links till matching suffix or root is found
                cur = vec[cur].fail_link;
            }

            if((vec[cur].bitmap & (1 << c)))
                i--;
        }
    }
}


int main(){

    string text; // given string in which pattern is to be searched
    cin >>text;

    int k; //Number of patterns
    cin >> k;
    vector<string> patterns(k); // vector or array of patterns

    for(int i = 0; i < k ;i++)
        cin >>patterns[i];

    size_t size = 1;
    node* root = add_node(); // allocating memory for root node

    build_Trie(root, patterns, size); // building trie out of patterns

    build_suffix_and_output_links(root); // creating appropriate suffix and output links


    Node vec[size];
    memset(vec, 0, sizeof(vec));

    mapped_to[root] = 0;
    mapped_back[0] = root;

    size_t next_idx = 1;
    build_vec(root, vec, size, 0, next_idx);
    fill_data(vec, size);

    vector<vector<int>> indices(k, vector<int>());
    search_pattern(vec, text, indices);


    std::map<string, int> M;
    for(int i = 0; i < k ;i++){
        M[patterns[i]] = 0;
    }
    for(int i = 0; i < k ;i++){
        M[patterns[i]] = max(M[patterns[i]], (int)indices[i].size());
    }

    for(int i = 0; i < k ;i++){
        cout <<  M[patterns[i]] << '\n';

    }

    return 0;
}