Cod sursa(job #3239464)

Utilizator _andrei4567Stan Andrei _andrei4567 Data 5 august 2024 18:16:15
Problema Aho-Corasick Scor 100
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 4.64 kb
///the idea is that we store in a trie the given set of words
///then we want to set for each prefix of the words that exist in the trie the longest sufix that is a prefix in the given set
///so let's think exactly like for the kmp algorithm
///let's suppose we've found the maximal suffix for a string 'x'
///then we add a new character 'ch'
///if in the trie x + ch exists, then this is the new maximal suffix which is a prefix of the given set of words
///otherwise we have to go to the suffix of x till it has a son of the character 'ch'

#include <fstream>
#include <queue>
#include <algorithm>

using namespace std;

ifstream cin ("ahocorasick.in");
ofstream cout ("ahocorasick.out");

const int M = 1e6;
const int N = 100;
int rez[N + 1];

bool viz[M + 1];
int val[M + 1];

vector <int> g[M + 1], v;

struct trie
{
    int nr;
    vector <int> index;
    bool isleaf;
    trie *children[26], *suff_link;///this for a word in a trie(this meaning any prefix of the given set) is a pointer to the maximal prefix which is a suffix of the current word
    trie (int x = 0)
    {
        nr = x;
        index.clear();///the index in the set given(we may have duplicates)
        isleaf = false;///this is actually where a word ends
        suff_link = nullptr;
        for (int i = 0; i < 26; ++i)
            children[i] = nullptr;
    }
};

trie *root = new trie();

string a, s;

queue <trie*> pq;

int n, poz, t;

void add_word (trie *root, string s)
{
    for (auto it : s)
    {
        int ch = it - 'a';
        if (root->children[ch] == nullptr)
            root->children[ch] = new trie(++t);
        root = root->children[ch];
    }
    root->isleaf = 1;
    root->index.push_back(poz);
}

trie* suff (trie *aux, int poz)///finds the suffix that has to connect with the new letter 'poz' (ch - 'a')
{
    if (aux == root && aux->children[poz] != nullptr)
        return aux->children[poz];
    while (aux != root)
    {
        aux = aux->suff_link;
        if (aux->children[poz] != nullptr)
            return aux->children[poz];
    }
    return aux;
}

void bfs (trie *root)///to find the suffix for the trie's words we have to do it in a bfs order
{
    queue <trie*> q;
    for (int i = 0; i < 26; ++i)
        if (root->children[i] != nullptr)
            root->children[i]->suff_link = root, q.push(root->children[i]);
    while (!q.empty())
    {
        trie *aux = q.front();
        q.pop();
        for (int i = 0; i < 26; ++i)
            if (aux->children[i] != nullptr)
            {
                aux->children[i]->suff_link = suff(aux, i);
                q.push(aux->children[i]);
            }
    }

}

void dfs (trie *root)
{
    for (int i = 0; i < 26; ++i)
        if (root->children[i] != nullptr)
        {
            dfs (root->children[i]);
        }
    if (root->suff_link != nullptr)
        g[root->nr].push_back(root->suff_link->nr);///we have to compute the DAG of the suff_links of the current trie
}

void dfs1 (int node) ///topological sort
{
    viz[node] = 1;
    for (auto it : g[node])
        if (!viz[it])
        dfs1 (it);
    v.push_back(node);
}

void dfs2 (trie *root)///find the answer
{
    if (root->isleaf)
    {
        for (auto it : root->index)
            rez[it] = val[root->nr];
    }
    for (int i = 0; i < 26; ++i)
        if (root->children[i] != nullptr)
            dfs2 (root->children[i]);
}

void solve (trie *root)
{
    for (auto it : s)///for the given string where we have to find the occurences we have to find for every prefix of s the maximal suffix that is a prefix in the trie
    {
        ///to do that is actually easy, we start from the root and then using the suffix function we add each letter one by one, maintaining the suffix
        int ch = it - 'a';
        if (root->children[ch] != nullptr)///this means the precedent best suffix can be extended
            root = root->children[ch], val[root->nr]++;
        else
            root = suff(root, ch), val[root->nr]++;///search for the suffix that can be extended with the new letter
    }
}

int main()
{
    ios_base :: sync_with_stdio(false);
    cin.tie(0);
    cin >> s >> n;
    for (int i = 1; i <= n; ++i)
    {
        cin >> a;
        ++poz;
        add_word(root, a);
    }
    bfs(root);
    solve(root);
    dfs (root);
    for (int i = 1; i <= t; ++i)
        if (!viz[i])
            dfs1(i);
    reverse (v.begin(), v.end());
    for (auto it : v)
        for (auto it1 : g[it])
            val[it1] += val[it];
    dfs2 (root);
    for (int i = 1; i <= n; ++i)
        cout << rez[i] << '\n';
    return 0;
}