Cod sursa(job #3124789)

Utilizator Vlad_NituNitu Vlad-Petru Vlad_Nitu Data 30 aprilie 2023 01:33:11
Problema Aho-Corasick Scor 0
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 3.99 kb
// Aho-Corasik Automaton - https://www.geeksforgeeks.org/aho-corasick-algorithm-pattern-searching/
using namespace std;

#include <bits/stdc++.h>

// dict size.
const int KMAX = (1e2 + 5);
// MMAX = max(|w[0]| + |w[1]| + ... + |w[k - 1]|)
const int MMAX = (int) (1e6 + 5); //
// Maximum number of characters in input alphabet
const int ALPHABET_SIZE = 26;

int out[MMAX]; // bitmask out[s] = (1 << i) | (1 << j) -> both w[i] and w[j] end in state 's'; compute w/ BFS
int f[MMAX]; // f[s] = s'; -> s' = longest proper suffix which is a proper prefix of some pattern; compute w/ BFS
//int g[MMAX][ALPHABET_SIZE]; // g[s][ch] = s'; (s --ch--> s')

unordered_map<char, int> g[MMAX];

vector<string> w;
string str;
int fv[KMAX];
string text;
int k;


int buildGoTo(const vector<string>& w, int k) {
    memset(out, 0, sizeof out);
    // Initially, we just have the 0 state
    int states = 1;

    //fill goto function: g[][] (eq. build Trie)
    for (int i = 0; i < k; ++i) {
        const string &word = w[i];
        int currentState = 0;

        for (int j = 0; j < word.size(); ++j) {
            int ch = word[j] - 'a';

            // Create new state if node does not exist
            if (g[currentState].find(ch) == g[currentState].end())
                g[currentState][ch] = states++;

            currentState = g[currentState][ch];
        }

        // Current word ends in `currentState`
        out[currentState] |= (1 << i);
    }

    // (!) For all characters which don't have an edge from root (or state 0) in Trie, add a goto edge to state 0 itself
    for (int ch = 0; ch < ALPHABET_SIZE; ++ch)
        if (g[0].find(ch) == g[0].end())
            g[0][ch] = 0;
    return states;
}

int buildAhoCorasikAutomaton(const vector<string>& w, int k) {
    int states = buildGoTo(w, k);

    // Build f[] using BFS w/ Queue

    memset(f, -1, sizeof f);

    queue<int> q;

    for (int ch = 0; ch < ALPHABET_SIZE; ++ch) {
        // All nodes 's' of depth 1 have failure f[s] = 0
        if (g[0][ch] != 0) {
            f[g[0][ch]] = 0;
            q.push(g[0][ch]);
        }
    }

    while (q.size()) {
        int state = q.front();
        q.pop();

        // For the removed state, find failure function for
        // all those characters for which goto function is
        // not defined.
        for (int ch = 0; ch <= ALPHABET_SIZE; ++ch) {
            if (g[state].find(ch) != g[state].end()) { // If goto function is defined

                // Find failure state of removed state
                int failure = f[state];

                // Find the deepest node labeled by proper
                // suffix of string from root to current
                // state.
                while (g[failure].find(ch) == g[failure].end())
                    failure = f[failure];

                failure = g[failure][ch];
                f[g[state][ch]] = failure;

                // Merge output values
                out[g[state][ch]] |= out[failure];

                q.push(g[state][ch]);
            }
        }
    }

    return states;
}

int findNextState(int currentState, char nextInput) {
    int answer = currentState;
    int ch = nextInput - 'a';

    // If goto is not defined, use failure function
    while (g[answer].find(ch) == g[answer].end())
        answer = f[answer];

    return g[answer][ch];
}

void searchWords(const vector<string>& w, int k, const string &text) {
    buildAhoCorasikAutomaton(w, k);

    int currentState = 0;

    for (auto const &ch: text) {
        currentState = findNextState(currentState, ch);

        if (out[currentState] == 0)
            continue;

        for (int j = 0; j < k; ++j) {
            if (out[currentState] & (1 << j))
                fv[j]++;

        }
    }
}

int main() {

    freopen("ahocorasick.in", "r", stdin);
    freopen("ahocorasick.out", "w", stdout);

    cin >> text;
    cin >> k;
    for (int i = 0; i < k; ++i) {
        cin >> str;
        w.push_back(str);
    }

    searchWords(w, k, text);

    for (int i = 0; i < k; ++i)
        cout << fv[i] << '\n';

    return 0;
}