Cod sursa(job #2924508)

Utilizator radu.z5Zamfirescu Radu Ioan radu.z5 Data 3 octombrie 2022 21:44:27
Problema Aho-Corasick Scor 100
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 3.8 kb
#include <fstream>
#include <vector>
#include <list>
#include <unordered_map>


struct word_list_node {
	// pastreaza indecsii cuvintelor din lista data pentru care starea e
	// marcata ca finalul unui cuvant (deoarece in input sunt cuvinte duplicate)
	std::vector<int> words_ids;
	struct word_list_node *next;

	word_list_node(int q) : next(nullptr) {
		words_ids = {q};
	}
};

struct word_list {
	word_list_node *head;
	word_list_node *back;

	bool empty() const {
		return head == nullptr;
	}

	void push_front(word_list_node *node) {
		node->next = head;
		head = node;

		if (back == nullptr)
			back = node;
	}

	void push_back(word_list_node *node) {
		if (back != nullptr) {
			back->next = node;
			back = node;
		} else {
			head = back = node;
		}
	}
};


using namespace std;


/**
 * Pe langa goto, se initializeaza si fail si out
 */
void build_goto(ifstream &fin, vector<vector<int> *> &go_to,
				vector<int> &fail, vector<word_list> &out,
				int &no_states, int n)
{
	int q;
	string pattern;

	for (int i = 0; i < n; i++) {
		q = 0;
		fin >> pattern;

		for (char c : pattern) {
			int a = c - 'a';

			if ((*go_to[q])[a] <= 0) {
				// adaug o stare noua in automat
				go_to.emplace_back(new vector<int>(26, -1));
				fail.emplace_back(0);
				out.push_back({nullptr, nullptr});

				// adaug muchia catre noua stare
				(*go_to[q])[a] = no_states;
				q = no_states;

				no_states++;
			} else {
				q = (*go_to[q])[a];
			}
		}

		if (out[q].empty())
			out[q].push_back(new word_list_node(i));
		else
			out[q].head->words_ids.emplace_back(i);
	}
}


void build_failure(vector<vector<int> *> &go_to, vector<int> &fail,
				vector<word_list> &out)
{
	list<int> queue;

	for (char c = 'a'; c <= 'z'; c++) {
		int a = c - 'a';
		int next = (*go_to[0])[a];

		if (next > 0)
			queue.emplace_back(next);
	}

	while (!queue.empty()) {
		int r = queue.front();
		queue.pop_front();

		for (char c = 'a'; c <= 'z'; c++) {
			int a = c - 'a';
			int u = (*go_to[r])[a];

			if (u > 0) {
				queue.emplace_back(u);
				int v = fail[r];

				while ((*go_to[v])[a] == -1)
					v = fail[v];

				fail[u] = (*go_to[v])[a];

				if (out[u].empty()) {
					out[u] = out[fail[u]];
				} else {
					out[u].back->next = out[fail[u]].head;
					out[u].back = out[fail[u]].back;
				}
			}
		}
	}
}


void search(string &text, vector<vector<int> *> &go_to, vector<int> &fail,
			vector<word_list> &out, ofstream &fout, int n)
{
	int q = 0;
	vector<int> words_count(n, 0);

	// numara de cate ori s-a trecut printr-o stare care are nevida multimea
	// out asociata ei
	unordered_map<int, int> unempty_states;

	for (char c : text) {
		int a = c - 'a';

		while ((*go_to[q])[a] == -1)
			q = fail[q];

		q = (*go_to[q])[a];

		if (!out[q].empty()) {
			if (unempty_states.find(q) == unempty_states.end())
				unempty_states[q] = 1;
			else
				unempty_states[q]++;
		}
	}

	for (const auto &[q, state_traces] : unempty_states) {
		for (auto it = out[q].head; it != nullptr; it = it->next)
			for (auto idx : it->words_ids)
				words_count[idx] += state_traces;
	}

	for (int i = 0; i < words_count.size(); i++)
		fout << words_count[i] << '\n';
}


int main(void)
{
	ifstream fin("ahocorasick.in");
	ofstream fout("ahocorasick.out");

	string text;
	int n;

	fin >> text;
	fin >> n;

	// initializez automatul cu radacina, i.e
	// go_to[0][_] = 0, fail[0] = 0, out[0] = null
	vector<vector<int> *> go_to = {new vector<int>(26, 0)};
	vector<int> fail(1, 0);
	vector<word_list> out = {{nullptr, nullptr}};
	int no_states = 1;

	build_goto(fin, go_to, fail, out, no_states, n);
	build_failure(go_to, fail, out);
	search(text, go_to, fail, out, fout, n);

	fin.close();
	fout.close();
	return 0;
}