Cod sursa(job #1041793)

Utilizator sziliMandici Szilard szili Data 26 noiembrie 2013 09:33:04
Problema Aho-Corasick Scor 95
Compilator cpp Status done
Runda Arhiva educationala Marime 4 kb
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <vector>
#include <queue>
#include <string.h>

#define MAX_LENGTH 1000001
#define WORD_LENGTH 10001
#define MAX_WORD_NR 101

using namespace std;

typedef struct node{
    int index;
    int occurences;
    struct node* children['z' - 'a' + 2];
    struct node *failNode;

    int outputSize;
    int outputStringIndices[MAX_WORD_NR];
} NODE;

char a[MAX_LENGTH];
char words[MAX_WORD_NR][WORD_LENGTH];
int occurenceNr[MAX_WORD_NR];

int wordsNr;

NODE *trie;

void readLineAsString(char *a){
    char c;
    int length = 0;

    do {
        scanf("%c", &c);
        a[length++] = c;
    } while (c != '\n');

    length--;

    a[length] = '\0';
}

NODE * addToTrie(NODE * currentNode, char *word, int index){
    if (currentNode == NULL){
        currentNode = (NODE *) malloc(sizeof(NODE));
        currentNode->occurences = 0;
        currentNode->index = -1;
        currentNode->failNode = NULL;

        currentNode->outputSize = 0;

        for (int i=0; i<='z'-'a'; i++){
            currentNode->children[i] = NULL;
        }
    }

    if (word[0] == '\0'){
        (currentNode->occurences)++;

        currentNode->outputStringIndices[currentNode->outputSize ++] = index;
        currentNode->index = index;
    } else {
        currentNode->children[word[0] - 'a'] =
                addToTrie(currentNode->children[word[0] - 'a'], word+1, index);
    }

    return currentNode;
}


void constructTrie(){

    for (int i=0; i<wordsNr; i++){
        char * currentWord = words[i];
        trie = addToTrie(trie, currentWord, i);
    }

}

void constructFailureFunction(){

    //the root's failure node is itself
    trie->failNode = trie;

    queue<NODE *> q;
    q.push(trie);

    while (!q.empty()){
        NODE *p = q.front();
        q.pop();

        for (int i=0; i<='z' - 'a'; i++){
            if (p->children[i] != NULL){
                NODE *qq = p->children[i];
                q.push(qq);

                NODE *r = p->failNode;
                while (r != trie && r->children[i] == NULL){
                    r = r->failNode;
                }

                if (r->children[i] == NULL || r->children[i] == qq){
                    //there is no word which starts with the character the current node represents
                    qq->failNode = trie;
                } else {
                    qq->failNode = r->children[i];
                }

                //append the failnode's output to the current node
                for (int i=0; i<qq->failNode->outputSize; i++){
                    qq->outputStringIndices[qq->outputSize++] = qq->failNode->outputStringIndices[i];
                }
            }
        }

    }

}

void constructAhoCorasickAutomaton(){
    //phase 1: construct prefix tree + initialize output for each node
    constructTrie();

    //phase 2: construct the failure function and update the output for each state in the FSM
    constructFailureFunction();
}


int main()
{
    freopen("ahocorasick.in", "r", stdin);
    freopen("ahocorasick.out", "w", stdout);

    readLineAsString(a);

    scanf("%d\n", &wordsNr);
    for (int i=0; i<wordsNr; i++){
        readLineAsString(words[i]);
    }

    constructAhoCorasickAutomaton();

    int aLen = strlen(a);

    NODE *currentState = trie;
    for (int i=0; i<aLen; i++){
        char currentCar = a[i];

        while (currentState != trie && currentState->children[currentCar - 'a'] == NULL){
            currentState = currentState->failNode;
        }

        if (currentState->children[currentCar - 'a'] != NULL){
            currentState = currentState->children[currentCar - 'a'];
        }

        for (int j=0; j<currentState->outputSize; j++){
            occurenceNr[currentState->outputStringIndices[j]]++;
        }
    }

    for (int i=0; i<wordsNr; i++){
        printf("%d\n", occurenceNr[i]);
    }

    return 0;
}