Cod sursa(job #2679428)

Utilizator Mihai_BarbuMihai Barbu Mihai_Barbu Data 30 noiembrie 2020 15:52:05
Problema Aho-Corasick Scor 100
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 3.22 kb
#include <bits/stdc++.h>

using namespace std;

#define SIGMA 26
#define NMAX 102
const int MAX_SIZE = 1e6 + 2;
const int WSIZE = 1e4 + 2;

#define in "ahocorasick.in"
#define out "ahocorasick.out"

int g[MAX_SIZE][SIGMA];
int f[MAX_SIZE];
int res[MAX_SIZE];

int wordState[NMAX];

vector<int> q;

ifstream fin(in);
ofstream fout(out);

/**
 * Extend the automaton by generating the failed_links
 */
void getFailure(int state) {
    int i, curr, suf;

    for (i = 0; i < SIGMA; ++i) {
        if (g[state][i]) {
            curr = g[state][i];
            suf = f[state];

            /**
             * Find the longest suffix of - curr - state
             * which is also a prefix in trie
             */
            while (suf != 1 && !g[suf][i]) {
                suf = f[suf];
            }
            
            if (g[suf][i] && g[suf][i] != curr) {
                suf = g[suf][i];
            } else {
                suf = 1;  
            }
            f[curr] = suf;
        }
    }
}

void buildTrie(unsigned int n) {
    char word[WSIZE];
    int j, currState, nextState;
    unsigned int i;
    int c;

    nextState = 2;
    for (i = 0; i < n; ++i) {
        fin >> word;
        int lw = strlen(word);
        /**
         * Build Trie
         * For each new word -> the current state is reinitialized
         * Assign new states starting from line 1 
         *                   --> each first letter is on the first line
         */
        currState = 1;
        for (j = 0; j < lw; ++j) {
            c = word[j] - 'a';

            if (!g[currState][c]) {
                g[currState][c] = nextState++;
            }

            currState = g[currState][c];
        }
        /**
         * wordState[i] - the next state indicated
                   by the last character of *word
         */
        wordState[i] = currState;
    }
}

void statesTraversal() {
    unsigned int i = 0;
    int j;

    q.push_back(1);
    /**
     * Level traversal - BFS - of the states in g
     */
    while (i < q.size()) {
        for (j = 0; j < SIGMA; ++j) {
            if (g[q[i]][j]) {
                q.push_back(g[q[i]][j]);
            }
        }
        i++;
    }
}

/**
 * Calculate the failed links for each state in q
 */
void calcFailed(char *s, int ls) {
    unsigned int i;
    int j;
    int c;

    f[1] = 1;
    for (i = 0; i < q.size(); ++i) {
        getFailure(q[i]);
    }

    int currState = 1;
    for (j = 0; j < ls; ++j) {
        c = s[j] - 'a';
        while (currState != 1 && !g[currState][c]) {
            currState = f[currState];
        }
        if (g[currState][c]) {
            currState = g[currState][c];
        }

        res[currState]++;
    }

    /**
     * Add the correspondent occurences to the failed links 
     */
    for (i = q.size() - 1; i; --i) {
        /**
         * f[q[i]] - suffix of q[i]
         *         --> all occurences of q[i] ==
         *           == occurences for f[q[i]]
         */
        res[f[q[i]]] += res[q[i]];
    }
}

int main() {
    char s[MAX_SIZE];
    unsigned int n, i;

    fin >> s;
    int ls = strlen(s);

    fin >> n;

    buildTrie(n);

    statesTraversal();

    calcFailed(s, ls);

    for (i = 0; i < n; ++i) {
        fout << res[wordState[i]] << '\n';
    }

    return 0;
}