Cod sursa(job #1744273)

Utilizator mouse_wirelessMouse Wireless mouse_wireless Data 19 august 2016 15:53:30
Problema Aho-Corasick Scor 100
Compilator cpp Status done
Runda Arhiva educationala Marime 5.18 kb
#include <cstdio>
#include <cstring>
#include <cstdlib>

typedef unsigned char BYTE;
typedef unsigned int DWORD;
typedef bool BOOL;
#define FALSE false
#define TRUE true
#define CHECK(expr) if (!expr) { return FALSE; }
#define CHECKRET(expr, retval) if (!expr) return retval;

#define AmmountToMalloc 10384000
BYTE bigBuffer[AmmountToMalloc];
BYTE *fakeMallocBuffer = bigBuffer;

void *fakeMalloc(int sizeToAllocate) {
	void *returnValue = (void*)fakeMallocBuffer;
	fakeMallocBuffer += sizeToAllocate;
	return returnValue;
}

#define AhoCorasick_AlphabetSize 26

struct AhoCorasick_Node {
	DWORD finishedWord;
	DWORD numberOfMatches;
	AhoCorasick_Node *next[AhoCorasick_AlphabetSize], *fail;
};

inline BYTE AhoCorasick_ByteConversion(BYTE b) {
	return b - 'a';
}

AhoCorasick_Node *AhoCorasick_NodeConstructor() {
	AhoCorasick_Node *nod = (AhoCorasick_Node*)fakeMalloc(sizeof(AhoCorasick_Node));
	CHECKRET(nod, NULL);
	nod->finishedWord = (DWORD)(-1);
	nod->numberOfMatches = 0;
	memset(nod->next, 0, sizeof(nod->next));
	nod->fail = NULL;
	return nod;
}

BYTE AhoCorasick_Insert(AhoCorasick_Node *nod, BYTE *word, DWORD sizeRemaining, DWORD wordID) {
	BYTE next = AhoCorasick_ByteConversion(*word);
	if (nod->next[next] == NULL) {
		nod->next[next] = AhoCorasick_NodeConstructor();
		CHECKRET(nod->next[next] != NULL, (BYTE)(-1));
	}
	return next;
}

void AhoCorasick_SetFailsBFS(AhoCorasick_Node *nod, AhoCorasick_Node **nodeQueue, int *queueSize) {
	nod->fail = nod;
	int qindex = 0; *queueSize = 1;
	for (nodeQueue[0] = nod; qindex < *queueSize; ++qindex) {
		AhoCorasick_Node *front = nodeQueue[qindex];
		for (int i = 0; i < AhoCorasick_AlphabetSize; ++i) {
			AhoCorasick_Node *curNode = front->next[i];
			if (curNode == NULL) continue;
			AhoCorasick_Node *dolar = front->fail;
			for (; dolar != nod && dolar->next[i] == NULL; dolar = dolar->fail)
				;
			if (dolar->next[i] != NULL && dolar->next[i] != front->next[i]) dolar = dolar->next[i];
			curNode->fail = dolar;
			nodeQueue[(*queueSize)++] = curNode;
		}
	}

	nod->fail = NULL;
}

#define MaxBigwordLen 1000010
#define SmallWordLen 10010
#define MaxNumberOfWords 110

BYTE buf[MaxBigwordLen];
BYTE words[MaxNumberOfWords][SmallWordLen];
DWORD similars[MaxNumberOfWords];

#define MAX(i, j) (((i) > (j)) ? (i) : (j))

BOOL AhoCorasick_Precompute(
	BYTE words[][SmallWordLen],
	DWORD *wordSizes,
	DWORD numWords,
	//OUT parameters:
	AhoCorasick_Node **root,
	AhoCorasick_Node ***nodeQueue,
	int *queueSize) {

	DWORD totalLen = 0;
	for (DWORD i = 0; i < numWords; i++)
		totalLen += wordSizes[i];
	*nodeQueue = (AhoCorasick_Node**)fakeMalloc(sizeof(AhoCorasick_Node*)* (totalLen + 2));
	CHECK(*nodeQueue);

	*root = AhoCorasick_NodeConstructor();

	CHECK(*root);
	for (DWORD i = 0; i < numWords; ++i) {
		AhoCorasick_Node *curNode = *root;
		BYTE next = 0;
		BYTE *positionInWord = words[i];
		DWORD sizeRemaining = wordSizes[i];
		while (sizeRemaining) {
			next = AhoCorasick_Insert(curNode, positionInWord, wordSizes[i], i);
			curNode = curNode->next[next];
			++positionInWord;
			--sizeRemaining;
		}
		if (curNode->finishedWord != (DWORD)(-1))
			similars[i] = curNode->finishedWord;
		else
			curNode->finishedWord = i;
	}
	AhoCorasick_SetFailsBFS(*root, *nodeQueue, queueSize);

	return TRUE;
}

BOOL AhoCorasick_Matching(
	BYTE *buffer,
	DWORD buffSz,
	AhoCorasick_Node *root,
	AhoCorasick_Node **nodeQueue,
	int queueSize,
	DWORD *numberOfMatches) {

	AhoCorasick_Node *curNode = root;

	for (DWORD i = 0; i < buffSz; i++) {
		BYTE curByte = AhoCorasick_ByteConversion(buffer[i]);
		for (; curNode->next[curByte] == NULL && curNode != root; curNode = curNode->fail)
			;
		if (curNode->next[curByte] != NULL) curNode = curNode->next[curByte];
		++(curNode->numberOfMatches);
	}

	for (int qindex = queueSize - 1; qindex >= 0; --qindex) {
		AhoCorasick_Node *front = nodeQueue[qindex];
		CHECK(front);
		if (front->fail != NULL)
			front->fail->numberOfMatches += front->numberOfMatches;

		DWORD finWord = front->finishedWord;
		if (finWord != (DWORD)(-1))
			numberOfMatches[finWord] = front->numberOfMatches;
	}
	return TRUE;
}

BOOL DoEverything() {
	scanf("%s", buf);
	DWORD numberOfWords;
	scanf("%u", &numberOfWords);
	DWORD wordSizes[MaxNumberOfWords];
	for (DWORD i = 0; i < numberOfWords; ++i) {
		scanf("%s", words[i]);
		wordSizes[i] = strlen((char*)words[i]);
	}

	AhoCorasick_Node *root;
	AhoCorasick_Node **nodeQueue;
	int queueSize;
	DWORD numberOfMatches[MaxNumberOfWords] = { 0 };
	memset(similars, 0xFF, sizeof(similars));
	AhoCorasick_Precompute(words, wordSizes, numberOfWords, &root, &nodeQueue, &queueSize);

	if (!AhoCorasick_Matching(buf, strlen((char*)buf), root, nodeQueue, queueSize, numberOfMatches))
		return FALSE;

	for (DWORD i = 0; i < numberOfWords; i++) {
		DWORD j = similars[i];
		if (j != (DWORD)(-1))
			printf("%u\n", numberOfMatches[j]);
		else
			printf("%u\n", numberOfMatches[i]);
	}

	return TRUE;
}

int main() {
#ifdef INFOARENA
	freopen("ahocorasick.in", "r", stdin);
	freopen("ahocorasick.out", "w", stdout);
#else
	freopen("fis.in", "r", stdin);
#endif
	DoEverything();
	return 0;
}