Pagini recente » Cod sursa (job #3330454) | Cod sursa (job #3311437) | Cod sursa (job #3319394) | Cod sursa (job #3354247) | Cod sursa (job #3354277)
#include <stdio.h>
#include <string>
#include <queue>
using namespace std;
#define TEXT_LEN 1000005
#define WORD_LEN 10005
#define ALPHABET_SIZE 26
#define NUM_WORDS 105
char text[TEXT_LEN];
char word[WORD_LEN];
int word_freq[NUM_WORDS];
unordered_map<int, string> indexToWord;
struct TrieNode {
TrieNode() : count(0), children{}, failure(nullptr), output(nullptr) {}
TrieNode* children[ALPHABET_SIZE];
/**
* Pointer to another node that represents the `longest suffix for current node`,
* equivalent to `the largest prefix in trie that is suffix for current node`.
* This is the node where we can restart the matching from.
* This link (blue link) is used for fast prefix matching when advancing the main text.
*/
TrieNode* failure;
/**
* Pointer to the next node in the dictionary that can be reached by following failure arcs.
* This link is used to reconstruct the solution: the prefixes that match and that are a word in the dictionary.
* This is an optimization to avoid iterating failure links that are not a complete word when reconstructing the solution.
*/
TrieNode *output;
/**
* Number of words ending in this node.
*/
int count;
/**
* Cache for all words that end in this node.
*/
vector<int> word_index;
};
void trie_insert(TrieNode *node, char *suffix, const int i) {
const char c = *suffix;
if (c < 'a' || 'z' < c) {
++node->count;
node->word_index.push_back(i);
return;
}
if (!node->children[c - 'a']) {
node->children[c - 'a'] = new TrieNode();
}
trie_insert(node->children[c - 'a'], suffix + 1, i);
}
/**
* For every node sets a failure link to a node representing the longest possible strict suffix of it in the trie.
* For every node sets an output link to the first node in the dictionary that can be reached by following failure links.
*/
void bfs_build_links(TrieNode *root) {
queue<TrieNode*> q;
q.push(root);
while (!q.empty()) {
TrieNode *parent = q.front();
q.pop();
for (int i = 0; i < ALPHABET_SIZE; ++i) {
TrieNode* child = parent->children[i];
if (!child) {
continue;
}
q.push(child);
// Compute failure node for child
if (parent == root) {
// Nodes on the first level don't have suffixes. Assigning the root as failure link.
child->failure = root;
} else {
// Search for the largest suffix
TrieNode *parentfailure = parent->failure;
TrieNode *parentfailureChild = parentfailure->children[i];
while (!parentfailureChild && parentfailure != root) {
parentfailure = parentfailure->failure;
parentfailureChild = parentfailure->children[i];
}
child->failure = parentfailureChild ? parentfailureChild : root;
}
// Compute the output node for child
TrieNode* output = child->failure;
while (output != root) {
if (output->count > 0) {
child->output = output;
break;
}
// Use memoization to avoid double traversal of nodes
if (output->output) {
child->output = output->output;
break;
}
output = output->failure;
}
}
}
}
void aho_corasick_collect_solution(TrieNode *root, const char *suffix) {
int pos = 0;
TrieNode *node = root;
while (*suffix != '\0') {
TrieNode* next = node->children[(*suffix) - 'a'];
while (!next) {
node = node->failure;
if (!node) {
break;
}
next = node->children[(*suffix) - 'a'];
}
node = next ? next : root;
suffix++;
pos++;
if (node->count > 0 || node->output) {
for (int i = 0; i < node->count; ++i) {
// printf("%s:%d, ", indexToWord[node->word_index[i]].c_str(), pos);
word_freq[node->word_index[i]]++;
}
TrieNode *print = node->output;
while (print && print != root) {
for (int i = 0; i < print->count; ++i) {
// printf("%s:%d, ", indexToWord[print->word_index[i]].c_str(), pos);
word_freq[print->word_index[i]]++;
}
print = print->output;
}
// printf("\n");
}
}
}
int main() {
freopen("ahocorasick.in", "r", stdin);
freopen("ahocorasick.out", "w", stdout);
int n; // n <= 100
auto *root = new TrieNode();
scanf("%s", text);
scanf("%d", &n);
for (int i = 0; i < n; ++i) {
scanf("%s", word);
trie_insert(root, word, i);
indexToWord.insert({i, string(word)});
}
bfs_build_links(root);
aho_corasick_collect_solution(root, text);
for (int i = 0; i < n; ++i) {
printf("%d\n", word_freq[i]);
}
return 0;
}