Cod sursa(job #2924532)

Utilizator radu.z5Zamfirescu Radu Ioan radu.z5 Data 4 octombrie 2022 16:40:40
Problema Aho-Corasick Scor 100
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 3.51 kb
#include <fstream>
#include <vector>
#include <list>

using namespace std;


/**
 * Pe langa goto, se initializeaza si fail si out
 */
void build_goto(ifstream &fin, vector<vector<int> *> &go_to,
				vector<int> &fail, vector<vector<int> *> &out,
				int &no_states, int n)
{
	int q;
	string pattern;

	for (int i = 0; i < n; i++) {
		q = 0;
		fin >> pattern;

		for (char c : pattern) {
			int a = c - 'a';

			if ((*go_to[q])[a] <= 0) {
				// adaug o stare noua in automat
				go_to.emplace_back(new vector<int>(26, -1));
				fail.emplace_back(0);
				out.push_back(nullptr);

				// adaug muchia catre noua stare
				(*go_to[q])[a] = no_states;
				q = no_states;

				no_states++;
			} else {
				q = (*go_to[q])[a];
			}
		}

		if (out[q] == nullptr)
			out[q] = new vector<int>();
		out[q]->push_back(i);
	}
}


void build_failure(vector<vector<int> *> &go_to, vector<int> &fail,
					list<int> &reverse)
{
	list<int> queue;

	for (char c = 'a'; c <= 'z'; c++) {
		int a = c - 'a';
		int next = (*go_to[0])[a];

		if (next > 0) {
			queue.emplace_back(next);
			reverse.push_front(next);
		}
	}

	while (!queue.empty()) {
		int r = queue.front();
		queue.pop_front();

		for (char c = 'a'; c <= 'z'; c++) {
			int a = c - 'a';
			int u = (*go_to[r])[a];

			if (u > 0) {
				queue.emplace_back(u);
				reverse.push_front(u);
				int v = fail[r];

				while ((*go_to[v])[a] == -1)
					v = fail[v];

				fail[u] = (*go_to[v])[a];
				// reuninuea se bazeaza pe linkul de fail
			}
		}
	}
}


vector<int> *search(string &text, vector<vector<int> *> &go_to,
			vector<int> &fail, vector<vector<int> *> &out)
{
	int q = 0;
	vector<int> *states_count = new vector<int>(out.size(), 0);
	vector<int> &count = *states_count;

	count[q] = 1;

	for (char c : text) {
		int a = c - 'a';

		while ((*go_to[q])[a] == -1)
			q = fail[q];

		q = (*go_to[q])[a];
		count[q]++;
	}

	return states_count;
}


void write_apps(vector<vector<int> *> &go_to, vector<int> &fail,
				vector<vector<int> *> &out, list<int> &reverse,
				vector<int> &states_count, ofstream &fout, int n)
{
	vector<int> words_count(n, 0);

	// motivul pentru care nodurile se parcurg in ordine inversa este sa
	// vad intai cand apar cuvintele mai lungi (sau cuvintele
	// corespunzatoare unor stari de adancime mai mare), care eventual pot
	// contine sufixe corespunzatoare altor noduri
	while (!reverse.empty()) {
		int q = reverse.front();
		reverse.pop_front();

		if (out[q])
			for (int w : *out[q])
				words_count[w] += states_count[q];

		// pentru starea care e sufix pentru cuvantul starii q, i.e fail[q],
		// adun de cate ori se trece prin q; de asta se face parcurgere
		// bottom up
		states_count[fail[q]] += states_count[q];
	}

	for (int i = 0; i < n; i++)
		fout << words_count[i] << '\n';
}


int main(void)
{
	ifstream fin("ahocorasick.in");
	ofstream fout("ahocorasick.out");

	string text;
	int n;

	fin >> text;
	fin >> n;

	// initializez automatul cu radacina, i.e
	// go_to[0][_] = 0, fail[0] = 0, out[0] = null
	vector<vector<int> *> go_to = {new vector<int>(26, 0)};
	vector<int> fail(1, 0);
	vector<vector<int> *> out = {nullptr};
	int no_states = 1;
	list<int> reverse;

	build_goto(fin, go_to, fail, out, no_states, n);
	build_failure(go_to, fail, reverse);
	vector<int> *states_count = search(text, go_to, fail, out);

	write_apps(go_to, fail, out, reverse, *states_count, fout, n);

	fin.close();
	fout.close();
	return 0;
}