Pagini recente » Cod sursa (job #3322727) | Cod sursa (job #3355966) | Cod sursa (job #3324557) | Cod sursa (job #3319353) | Cod sursa (job #3354240)
#include <stdio.h>
#include <string>
#include <queue>
using namespace std;
#define TEXT_LEN 1000005
#define WORD_LEN 10005
#define ALPHABET_SIZE 26
#define NUM_WORDS 105
char text[TEXT_LEN];
char word[WORD_LEN];
int word_freq[NUM_WORDS];
// unordered_map<int, string> indexToWord;
struct TrieNode {
TrieNode() : count(0), children{}, blue(nullptr), green(nullptr) {}
/**
* Number of words ending in this node.
*/
int count;
TrieNode* children[ALPHABET_SIZE];
/**
* Pointer to another node that represents the `longest suffix for current node`,
* equivalent to `the largest prefix in trie that is suffix for current node`.
*/
TrieNode* blue;
/**
* Pointer to the next node in the dictionary that can be reached by following blue arcs.
*/
TrieNode *green;
/**
* Cache for all words that end in this node.
*/
vector<int> word_index;
};
void trie_insert(TrieNode *node, char *suffix, const int i) {
const char c = *suffix;
if (c < 'a' || 'z' < c) {
++node->count;
node->word_index.push_back(i);
return;
}
if (!node->children[c - 'a']) {
node->children[c - 'a'] = new TrieNode();
}
trie_insert(node->children[c - 'a'], suffix + 1, i);
}
/**
* For every node sets a blue link to a node representing the longest possible strict suffix of it in the trie.
* For every node sets a green link to the first node in the dictionary that can be reached by following blue links.
*/
void bfs_build_links(TrieNode *root) {
queue<TrieNode*> q;
q.push(root);
while (!q.empty()) {
TrieNode *parent = q.front();
q.pop();
for (int i = 0; i < ALPHABET_SIZE; ++i) {
TrieNode* child = parent->children[i];
if (!child) {
continue;
}
q.push(child);
// Compute blue node for child
if (parent == root) {
// Nodes on the first level don't have suffixes. Assigning the root as blue link.
child->blue = root;
} else {
// Search for the largest suffix
TrieNode *parentBlue = parent->blue;
TrieNode *parentBlueChild = parentBlue->children[i];
while (!parentBlueChild && parentBlue != root) {
parentBlue = parentBlue->blue;
parentBlueChild = parentBlue->children[i];
}
child->blue = parentBlueChild ? parentBlueChild : root;
}
// Compute the green node for child
TrieNode* green = child->blue;
while (green != root) {
if (green->count > 0) {
child->green = green;
break;
}
// Use memoization to avoid double traversal of nodes
if (green->green) {
child->green = green->green;
break;
}
green = green->blue;
}
}
}
}
void aho_corasick_collect_solution(TrieNode *root, TrieNode *node, const char *suffix, const int pos) {
return;
if (node->count > 0 || node->green) {
for (int i = 0; i < node->count; ++i) {
// printf("%s:%d, ", indexToWord[node->word_index[i]].c_str(), pos);
word_freq[node->word_index[i]]++;
}
TrieNode *print = node->green;
while (print && print != root) {
for (int i = 0; i < print->count; ++i) {
// printf("%s:%d, ", indexToWord[print->word_index[i]].c_str(), pos);
word_freq[print->word_index[i]]++;
}
print = print->green;
}
// printf("\n");
}
const char c = *suffix;
if (c < 'a' || 'z' < c) {
return;
}
// Find a child or a suffix's child
TrieNode* parent = node;
TrieNode* next = parent->children[(*suffix) - 'a'];
while (!next) {
parent = parent->blue;
next = parent->children[(*suffix) - 'a'];
}
if (next) {
aho_corasick_collect_solution(root, next, suffix + 1, pos + 1);
}
}
int main() {
freopen("../ahocorasick.in", "r", stdin);
freopen("../ahocorasick.out", "w", stdout);
int n; // n <= 100
auto *root = new TrieNode();
scanf("%s", text);
scanf("%d", &n);
for (int i = 0; i < n; ++i) {
scanf("%s", word);
trie_insert(root, word, i);
// indexToWord.insert({i, string(word)});
}
bfs_build_links(root);
aho_corasick_collect_solution(root, root, text, 0);
for (int i = 0; i < n; ++i) {
printf("%d\n", word_freq[i]);
}
return 0;
}