Cod sursa(job #1744113)

Utilizator mouse_wirelessMouse Wireless mouse_wireless Data 19 august 2016 12:38:57
Problema Aho-Corasick Scor 80
Compilator cpp Status done
Runda Arhiva educationala Marime 5.53 kb
#include <cstdio>
#include <cstring>
#include <cstdlib>

typedef unsigned char BYTE;
typedef unsigned int DWORD;
typedef bool BOOL;
#define FALSE false
#define TRUE true
#define CHECK(expr) if (!expr) return FALSE;
#define CHECKRET(expr, retval) if (!expr) return retval;

void *fakeMalloc(int sizeToAllocate, BYTE **fakeMallocBuffer, int *fakeMallocSizeRemaining) {
	CHECKRET((*fakeMallocSizeRemaining) - sizeToAllocate >= 0, NULL)
		*fakeMallocSizeRemaining -= sizeToAllocate;
	void *returnValue = (void*)(*fakeMallocBuffer);
	*fakeMallocBuffer += sizeToAllocate;
	return returnValue;
}

#define AhoCorasick_AlphabetSize 27

struct AhoCorasick_Node {
	DWORD finishedWord;
	DWORD numberOfMatches;
	AhoCorasick_Node *next[AhoCorasick_AlphabetSize], *fail;
};

BYTE AhoCorasick_ByteConversion(BYTE b) {
	if (b >= 'A' && b <= 'Z') return b - 'A' + 1;
	if (b >= 'a' && b <= 'z') return (b ^ 0x20) - 'A' + 1;
	return 0;
}

AhoCorasick_Node *AhoCorasick_NodeConstructor(BYTE **fakeMallocBuffer, int *fakeMallocSizeRemaining) {
	AhoCorasick_Node *nod = (AhoCorasick_Node*)fakeMalloc(sizeof(AhoCorasick_Node), fakeMallocBuffer, fakeMallocSizeRemaining);
	CHECKRET(nod, NULL)
		nod->finishedWord = (DWORD)(-1);
	nod->numberOfMatches = 0;
	memset(nod->next, 0, sizeof(nod->next));
	nod->fail = NULL;
	return nod;
}

BOOL AhoCorasick_Insert(AhoCorasick_Node *nod, BYTE *word, DWORD sizeRemaining, DWORD wordID, BYTE **fakeMallocBuffer, int *fakeMallocSizeRemaining) {
	if (!sizeRemaining) {
		nod->finishedWord = wordID;
		return TRUE;
	}
	BYTE next = AhoCorasick_ByteConversion(*word);
	if (nod->next[next] == NULL) {
		nod->next[next] = AhoCorasick_NodeConstructor(fakeMallocBuffer, fakeMallocSizeRemaining);
		CHECK(nod->next[next] != NULL)
	}
	return AhoCorasick_Insert(nod->next[next], word + 1, sizeRemaining - 1, wordID, fakeMallocBuffer, fakeMallocSizeRemaining);
}

void AhoCorasick_SetFailsBFS(AhoCorasick_Node *nod, AhoCorasick_Node **nodeQueue, int *queueSize) {
	nod->fail = nod;
	int qindex = 0; *queueSize = 1;
	for (nodeQueue[0] = nod; qindex < *queueSize; ++qindex) {
		AhoCorasick_Node *front = nodeQueue[qindex];
		for (int i = 0; i < AhoCorasick_AlphabetSize; ++i) {
			AhoCorasick_Node *curNode = front->next[i];
			if (curNode == NULL) continue;
			//ne ducem in fail pana gasim un nod care are ca fiu ultimul byte al nodului sau pana ajungem in radacina
			AhoCorasick_Node *dolar = front->fail;
			for (; dolar != nod && dolar->next[i] == NULL; dolar = dolar->fail)
				;
			if (dolar->next[i] != NULL && dolar->next[i] != front->next[i]) dolar = dolar->next[i];
			curNode->fail = dolar;
			nodeQueue[(*queueSize)++] = curNode;
		}
	}

	nod->fail = NULL;
}

#define AmmountToMalloc 10384000
#define MaxBigwordLen 1000010
#define SmallWordLen 10010
#define MaxNumberOfWords 110

BYTE buf[MaxBigwordLen];
BYTE words[MaxNumberOfWords][SmallWordLen];

BOOL AhoCorasick_Precompute(
	BYTE words[][SmallWordLen],
	DWORD *wordSizes,
	DWORD numWords,
	BYTE *fakeMallocBuffer,
	int fakeMallocSizeRemaining,
	//OUT parameters:
	AhoCorasick_Node **root,
	AhoCorasick_Node ***nodeQueue,
	int *queueSize) {

	DWORD totalLen = 0;
	for (DWORD i = 0; i < numWords; i++)
		totalLen += wordSizes[i];
	*nodeQueue = (AhoCorasick_Node**)fakeMalloc(sizeof(AhoCorasick_Node*)* (totalLen + 2), &fakeMallocBuffer, &fakeMallocSizeRemaining);
	CHECK(*nodeQueue)

		*root = AhoCorasick_NodeConstructor(&fakeMallocBuffer, &fakeMallocSizeRemaining);

	CHECK(*root)
	for (DWORD i = 0; i < numWords; ++i)
		CHECK(AhoCorasick_Insert(*root, words[i], wordSizes[i], i, &fakeMallocBuffer, &fakeMallocSizeRemaining));
	AhoCorasick_SetFailsBFS(*root, *nodeQueue, queueSize);

	return TRUE;
}

BOOL AhoCorasick_Matching(
	BYTE *buffer,
	DWORD buffSz,
	AhoCorasick_Node *root,
	AhoCorasick_Node **nodeQueue,
	int queueSize,
	AhoCorasick_Node **curNode,
	DWORD *numberOfMatches) {

	for (DWORD i = 0; i < buffSz; i++) {
		BYTE curByte = AhoCorasick_ByteConversion(buffer[i]);
		for (; (*curNode)->next[curByte] == NULL && *curNode != root; *curNode = (*curNode)->fail)
			;
		if ((*curNode)->next[curByte] != NULL) (*curNode) = (*curNode)->next[curByte];
		++((*curNode)->numberOfMatches);
	}

	for (int qindex = queueSize - 1; qindex >= 0; --qindex) {
		AhoCorasick_Node *front = nodeQueue[qindex];
		CHECK(front)
		if (front->fail != NULL)
			front->fail->numberOfMatches += front->numberOfMatches;

		DWORD finWord = front->finishedWord;
		if (finWord != (DWORD)(-1))
			numberOfMatches[finWord] = front->numberOfMatches;
	}
	return TRUE;
}

BOOL DoEverything() {
	scanf("%s", buf);
	DWORD numberOfWords;
	scanf("%u", &numberOfWords);
	DWORD wordSizes[MaxNumberOfWords];
	for (DWORD i = 0; i < numberOfWords; ++i) {
		scanf("%s", words[i]);
		wordSizes[i] = strlen((char*)words[i]);
	}

	AhoCorasick_Node *root;
	AhoCorasick_Node **nodeQueue;
	int queueSize;
	BYTE *fakeMallocBuffer = (BYTE*)malloc(AmmountToMalloc);
	DWORD numberOfMatches[MaxNumberOfWords] = { 0 };
	CHECK(AhoCorasick_Precompute(words, wordSizes, numberOfWords, fakeMallocBuffer, AmmountToMalloc, &root, &nodeQueue, &queueSize));

	AhoCorasick_Node *lastNode = root;
	if (!AhoCorasick_Matching(buf, strlen((char*)buf), root, nodeQueue, queueSize, &lastNode, numberOfMatches))
		return FALSE;

	for (DWORD i = 0; i < numberOfWords; i++)
		printf("%u\n", numberOfMatches[i]);

	return TRUE;
}

int main() {
#ifdef INFOARENA
	freopen("ahocorasick.in", "r", stdin);
	freopen("ahocorasick.out", "w", stdout);
#endif
	DoEverything();
	return 0;
}