Cod sursa(job #2022432)

Utilizator DorelBarbuBarbu Dorel DorelBarbu Data 16 septembrie 2017 15:49:48
Problema Aho-Corasick Scor 0
Compilator cpp Status done
Runda Arhiva educationala Marime 4.35 kb
#include <iostream>
#include <fstream>
#include <queue>
#include <vector>
#include <string>
#include <map>
using namespace std;

const int MAXN = 200, ALPHALEN = 26;


ifstream in("ahocorasick.in");
ofstream out("ahocorasick.out");

vector <string> words;
string text;
int n;
map <string,int> ap;




void read()
{
    in>>text;
    in>>n;
    for(int i = 1; i <= n; i++)
    {
        string buff;
        in>>buff;
        words.push_back(buff);
    }
}

struct node
{
    node* fail;
    vector <string> output;
    node* next[ALPHALEN];
    int index;

    node()
    {
        fail = NULL;
        for(int i = 0; i < ALPHALEN; i++)
            next[i] = NULL;
    }
};


struct finiteAutomata
{
    node* root;
    int numStates;
    finiteAutomata()
    {
        root = new node;
        root->index = 0;
        numStates = 0;
    }

    void addWord(node* &root,string subS,string s)
    {
        if(root == NULL)
        {
            root = new node;
            numStates++;
            root->index = numStates;
            //cout<<"New state created"<<' '<<subS<<' '<<s<<endl;
        }

        if(subS.size() == 1)
        {
            for(string word: root->output)
                if(word == s)
                    return;

            root->output.push_back(s);

            return;
        }

        addWord(root->next[subS[0]-'a'],subS.erase(0,1),s);
    }

    void buildTrie(vector <string> v)
    {
        for(int i = 0; i < v.size(); i++)
            addWord(root->next[v[i][0]-'a'],v[i],v[i]);
    }

    void buildFailFunction()
    {
        queue <node*> Q;

        for(int i = 0; i < ALPHALEN; i++)
            if(root->next[i] != NULL)
            {
                root->next[i]->fail = root;
                Q.push(root->next[i]);
                //printf("fail(%d) = %d\n",root->next[i]->index,0);
            }

        while(Q.empty() == false)
        {
            node* curr = Q.front();
            Q.pop();

            for(int i = 0; i < ALPHALEN; i++)
                if(curr->next[i] != NULL)
                {
                    node* state = curr->fail;
                    while(state != NULL && state->next[i] == NULL)
                        state = state->fail;

                    if(state != NULL)
                    {
                        curr->next[i]->fail = state->next[i];
                        //printf("fail(%d) = %d\n",curr->next[i]->index,state->next[i]->index);

                        node* s = curr->next[i];
                        node* fs = state->next[i];

                        for(int i = 0; i < fs->output.size(); i++)
                            s->output.push_back(fs->output[i]);
                    }
                    else
                    {
                        curr->next[i]->fail = root;
                        //printf("fail(%d) = %d\n",curr->next[i]->index,0);
                    }

                    Q.push(curr->next[i]);
                }
        }
    }

    void matchText(string text)
    {
        node* state = root;
        for(int i = 0; i < text.size(); i++)
        {
            char currChar = text[i];

            while(state != NULL && state->next[currChar-'a'] == NULL)
                state = state->fail;

            if(state != NULL)
            {
                state = state->next[currChar-'a'];
                /*
                if(state->output.empty() == false)
                {
                    for(string out: state->output)
                    {
                        ap[out]++;
                    }
                }*/
            }
            else
                state = root;
        }
    }


    void afis(node* root)
    {
        if(root == NULL)
            return;

        if(root->output.empty() == false)
        {
            for(int i = 0; i < root->output.size(); i++)
                cout<<root->output[i]<<endl;
        }

        for(int i = 0; i < ALPHALEN; i++)
            afis(root->next[i]);

    }
};




int main()
{
    finiteAutomata T;
    read();
    T.buildTrie(words);
    //T.afis(T.root);
    //cout<<"Number of states is: "<<T.numStates<<endl;
    T.buildFailFunction();
    T.matchText(text);

    //for(string word: words)
       //out<<ap[word]<<endl;

    return 0;
}