Cod sursa(job #3354277)

Utilizator batistaUPB-Oprea-Cosmin-Dumitru batista Data 17 mai 2026 11:56:03
Problema Aho-Corasick Scor 0
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 5.09 kb
#include <stdio.h>
#include <string>
#include <queue>

using namespace std;

#define TEXT_LEN 1000005
#define WORD_LEN 10005
#define ALPHABET_SIZE 26
#define NUM_WORDS 105

char text[TEXT_LEN];
char word[WORD_LEN];
int word_freq[NUM_WORDS];
unordered_map<int, string> indexToWord;

struct TrieNode {
    TrieNode() : count(0), children{}, failure(nullptr), output(nullptr) {}

    TrieNode* children[ALPHABET_SIZE];

    /**
     * Pointer to another node that represents the `longest suffix for current node`,
     * equivalent to `the largest prefix in trie that is suffix for current node`.
     * This is the node where we can restart the matching from.
     * This link (blue link) is used for fast prefix matching when advancing the main text.
     */
    TrieNode* failure;

    /**
     * Pointer to the next node in the dictionary that can be reached by following failure arcs.
     * This link is used to reconstruct the solution: the prefixes that match and that are a word in the dictionary.
     * This is an optimization to avoid iterating failure links that are not a complete word when reconstructing the solution.
     */
    TrieNode *output;

    /**
     * Number of words ending in this node.
     */
    int count;

    /**
     * Cache for all words that end in this node.
     */
    vector<int> word_index;
};

void trie_insert(TrieNode *node, char *suffix, const int i) {
    const char c = *suffix;
    if (c < 'a' || 'z' < c) {
        ++node->count;
        node->word_index.push_back(i);
        return;
    }

    if (!node->children[c - 'a']) {
        node->children[c - 'a'] = new TrieNode();
    }

    trie_insert(node->children[c - 'a'], suffix + 1, i);
}

/**
 * For every node sets a failure link to a node representing the longest possible strict suffix of it in the trie.
 * For every node sets an output link to the first node in the dictionary that can be reached  by following failure links.
 */
void bfs_build_links(TrieNode *root) {
    queue<TrieNode*> q;
    q.push(root);

    while (!q.empty()) {
        TrieNode *parent = q.front();
        q.pop();

        for (int i = 0; i < ALPHABET_SIZE; ++i) {
            TrieNode* child = parent->children[i];
            if (!child) {
                continue;
            }
            q.push(child);

            // Compute failure node for child
            if (parent == root) {
                // Nodes on the first level don't have suffixes. Assigning the root as failure link.
                child->failure = root;
            } else {
                // Search for the largest suffix
                TrieNode *parentfailure = parent->failure;
                TrieNode *parentfailureChild = parentfailure->children[i];
                while (!parentfailureChild && parentfailure != root) {
                    parentfailure = parentfailure->failure;
                    parentfailureChild = parentfailure->children[i];
                }
                child->failure = parentfailureChild ? parentfailureChild : root;
            }

            // Compute the output node for child
            TrieNode* output = child->failure;
            while (output != root) {
                if (output->count > 0) {
                    child->output = output;
                    break;
                }
                // Use memoization to avoid double traversal of nodes
                if (output->output) {
                    child->output = output->output;
                    break;
                }
                output = output->failure;
            }
        }
    }
}

void aho_corasick_collect_solution(TrieNode *root, const char *suffix) {
    int pos = 0;
    TrieNode *node = root;

    while (*suffix != '\0') {
        TrieNode* next = node->children[(*suffix) - 'a'];
        while (!next) {
            node = node->failure;
            if (!node) {
                break;
            }
            next = node->children[(*suffix) - 'a'];
        }

        node = next ? next : root;
        suffix++;
        pos++;

        if (node->count > 0 || node->output) {
            for (int i = 0; i < node->count; ++i) {
                // printf("%s:%d, ", indexToWord[node->word_index[i]].c_str(), pos);
                word_freq[node->word_index[i]]++;
            }
            TrieNode *print = node->output;
            while (print && print != root) {
                for (int i = 0; i < print->count; ++i) {
                    // printf("%s:%d, ", indexToWord[print->word_index[i]].c_str(), pos);
                    word_freq[print->word_index[i]]++;
                }
                print = print->output;
            }
            // printf("\n");
        }
    }
}

int main() {
    freopen("ahocorasick.in", "r", stdin);
    freopen("ahocorasick.out", "w", stdout);

    int n; // n <= 100
    auto *root = new TrieNode();

    scanf("%s", text);
    scanf("%d", &n);

    for (int i = 0; i < n; ++i) {
        scanf("%s", word);
        trie_insert(root, word, i);
        indexToWord.insert({i, string(word)});
    }
    bfs_build_links(root);
    aho_corasick_collect_solution(root, text);
    for (int i = 0; i < n; ++i) {
        printf("%d\n", word_freq[i]);
    }

    return 0;
}