Cod sursa(job #664419)

Utilizator claudiumihailClaudiu Mihail claudiumihail Data 20 ianuarie 2012 06:21:02
Problema Aho-Corasick Scor 100
Compilator cpp Status done
Runda Arhiva educationala Marime 3.56 kb
#include <fstream>
#include <iostream>
#include <cstring>
#include <vector>

#define MAX_TEXT_SIZE 1000001
#define MAX_QUEUE_SIZE 1001001
#define MAX_WORD_SIZE 10000
#define MAX_WORDS 101
#define MAX_CHILDREN 26

using namespace std;

typedef unsigned int uint32;
typedef unsigned short uint16;

template<typename MaxNumWordsType>
struct Trie
{
	Trie() :
		pFail(NULL),
		numWordsEndingHere(0)
	{
		memset(vChild, NULL, MAX_CHILDREN*sizeof(Trie<MaxNumWordsType>*));
	}

	Trie<MaxNumWordsType>* pFail;
	uint32 numWordsEndingHere;
	vector<MaxNumWordsType> IDs;
	Trie<MaxNumWordsType>* vChild[MAX_CHILDREN];
};

int uQL, uQR;
Trie<uint16>* Q[MAX_QUEUE_SIZE];
Trie<uint16> trieRoot;

inline bool empty()
{
	return (uQL >= uQR);
}

inline uint32 get_child_index(char ch)
{
	return ch - 'a';
}

inline void push(Trie<uint16>* ptr)
{
	Q[uQR] = ptr;
	uQR++;
}

inline Trie<uint16>* pop()
{
	uQL++;
	return Q[uQL-1];
}

inline void insert(const char* szWord, const uint16 uID)
{
	uint16 index = 0;
	Trie<uint16>* ptrTrie = &trieRoot;
	
	while (szWord[index] != 0)
	{
		if (ptrTrie->vChild[get_child_index(szWord[index])] == NULL)
		{
			ptrTrie->vChild[get_child_index(szWord[index])] = new (std::nothrow) Trie<uint16>;
		}

		ptrTrie = ptrTrie->vChild[get_child_index(szWord[index])];
		if (szWord[index+1] == 0)
		{
			ptrTrie->IDs.push_back(uID);
		}
		index++;
	}
}

inline void SetFailLinks()
{
	trieRoot.pFail = &trieRoot;
	push(&trieRoot);
	
	while (!empty())
	{
		Trie<uint16>* ptrNode = pop();

		for (uint32 i=0; i<26; ++i)
		{
			if (ptrNode->vChild[i] != NULL)
			{
				Trie<uint16>* ptrFail = ptrNode->pFail;
				while (ptrFail != &trieRoot && ptrFail->vChild[i] == NULL)
				{
					ptrFail = ptrFail->pFail;
				}
				
				if (ptrFail->vChild[i] != NULL && ptrFail->vChild[i] != ptrNode->vChild[i])
				{
					ptrFail = ptrFail->vChild[i];
				}
				
				ptrNode->vChild[i]->pFail = ptrFail;
				push(ptrNode->vChild[i]);
			}
		}
	}
	
	trieRoot.pFail = NULL;
	
	//cout << trieRoot.vChild[2]->vChild[0]->vChild[0]->ID << " " << trieRoot.vChild[0] << endl;
}

inline void ComputeFreq(uint32 wordFreq[])
{
	while (uQR > 0)
	{
		Trie<uint16>* ptrNode = Q[uQR-1];
		uQR--;
		
		if (ptrNode->pFail != NULL)
		{
			ptrNode->pFail->numWordsEndingHere += ptrNode->numWordsEndingHere;
		}
		
		for (uint32 i=0; i<ptrNode->IDs.size(); ++i)
		{
			wordFreq[ptrNode->IDs[i]] = ptrNode->numWordsEndingHere;
		}
	}
}

inline void AhoCorasick(const char szText[], uint32 wordFreq[])
{
	SetFailLinks();
	
	Trie<uint16>* ptrNode = &trieRoot;
	const uint32 strSize = strlen(szText);
	for (uint32 i=0; i<strSize; ++i)
	{
		const int index = get_child_index(szText[i]);
		while (ptrNode != &trieRoot && ptrNode->vChild[index] == NULL)
		{
			ptrNode = ptrNode->pFail;
		}
		if (ptrNode->vChild[index] != NULL)
		{
			ptrNode = ptrNode->vChild[index];
		}
		ptrNode->numWordsEndingHere++;
	}
	
	ComputeFreq(wordFreq);
}

char text[MAX_TEXT_SIZE];
char word[MAX_WORD_SIZE];

uint32 wordFreq[MAX_WORDS];

int main()
{
	uint32 dictSize;
	fstream fin("ahocorasick.in", fstream::in);
	fstream fout("ahocorasick.out", fstream::out);
	
	fin >> text;
	//cout << text << endl;
	
	fin >> dictSize;
	//cout << dictSize << endl;
	
	for (uint32 i=0; i<dictSize; ++i)
	{
		fin >> word;
		//cout << word << endl;
		insert(word, i);
	}
	
	AhoCorasick(text, wordFreq);
	
	for (uint32 i=0; i<dictSize; ++i)
	{
		fout << wordFreq[i] << "\n";
	}
	
	fin.close();
	fout.close();
	return 0;
}