Cod sursa(job #3354304)

Utilizator batistaUPB-Oprea-Cosmin-Dumitru batista Data 17 mai 2026 13:31:14
Problema Aho-Corasick Scor 100
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 5.79 kb
#include <stdio.h>
#include <string>
#include <queue>
#include <vector>
#include <unordered_map>

using namespace std;

#define TEXT_LEN 1000005
#define WORD_LEN 10005
#define ALPHABET_SIZE 26
#define NUM_WORDS 105

char text[TEXT_LEN];
char word[WORD_LEN];
int word_freq[NUM_WORDS];

struct TrieNode {
    TrieNode() : children{}, failure(nullptr), output(nullptr), visit_count(0) {}

    TrieNode* children[ALPHABET_SIZE];

    /**
     * Pointer to another node that represents the `longest suffix for current node`,
     * equivalent to `the largest prefix in trie that is suffix for current node`.
     * This is the node where we can restart the matching from.
     * This link (blue link) is used for fast prefix matching when advancing the main text.
     */
    TrieNode* failure;

    /**
     * Pointer to the next node in the dictionary that can be reached by following failure arcs.
     * This link is used to reconstruct the solution: the prefixes that match and that are a word in the dictionary.
     * This is an optimization to avoid iterating failure links that are not a complete word when reconstructing the solution.
     */
    TrieNode *output;

    /**
     * Ids of all words that end in this node.
     */
    vector<int> pattern_ids;

    /**
     * Number of times this automaton state was reached while scanning the text.
     */
    int visit_count;
};
vector<TrieNode*> bfs_order;

void trie_insert(TrieNode *node, const char *str, const int i) {
    while (*str) {
        const int c = *str - 'a';
        if (!node->children[c]) {
            node->children[c] = new TrieNode();
        }

        node = node->children[c];
        ++str;
    }

    node->pattern_ids.push_back(i);
}

/**
 * For every node sets a failure link to a node representing the longest possible strict suffix of it in the trie.
 * For every node sets an output link to the first node in the dictionary that can be reached  by following failure links.
 */
void bfs_build_links(TrieNode *root) {
    queue<TrieNode*> q;
    q.push(root);
    root->failure = root;

    while (!q.empty()) {
        TrieNode *parent = q.front();
        q.pop();
        bfs_order.push_back(parent);

        for (int i = 0; i < ALPHABET_SIZE; ++i) {
            TrieNode* child = parent->children[i];
            if (!child) {
                continue;
            }
            q.push(child);

            // Compute failure node for child
            if (parent == root) {
                // Nodes on the first level don't have suffixes. Assigning the root as failure link.
                child->failure = root;
            } else {
                const TrieNode *fail = parent->failure;
                while (fail != root && !fail->children[i]) {
                    fail = fail->failure;
                }
                if (fail->children[i] && fail->children[i] != child) {
                    child->failure = fail->children[i];
                } else {
                    child->failure = root;
                }
            }

            // Compute the output node for child
            if (child->failure->pattern_ids.size() > 0) {
                child->output = child->failure;
            } else {
                child->output = child->failure->output;
            }
        }
    }
}

void aho_corasick_print_matches(TrieNode *root, const char *suffix, unordered_map<int, string>& index_to_word) {
    TrieNode *node = root;
    int pos = 0;

    while (*suffix != '\0') {
        const int c = *suffix - 'a';

        // Follow failure links and advance the matching.
        while (node != root && !node->children[c]) {
            node = node->failure;
        }
        if (node->children[c]) {
            node = node->children[c];
        }

        // Print exact matches
        for (int id : node->pattern_ids) {
            printf("%s:%d, ", index_to_word[id].c_str(), pos);
            word_freq[id]++;
        }
        // Print suffix matches
        TrieNode *out = node->output;
        while (out) {
            for (int id : out->pattern_ids) {
                printf("%s:%d, ", index_to_word[id].c_str(), pos);
                word_freq[id]++;
            }
            out = out->output;
        }

        suffix++;
        pos++;
    }
}

/**
 * Optimally counts the number of occurrences for each word in dictionary.
 */
void aho_corasick_count_occurrences(TrieNode *root, const char *suffix) {
    TrieNode *node = root;

    // Scan text and count state visits
    while (*suffix) {
        int c = *suffix - 'a';
        while (node != root && !node->children[c]) {
            node = node->failure;
        }
        if (node->children[c]) {
            node = node->children[c];
        }

        node->visit_count++;
        ++suffix;
    }

    // Propagate counts upward through failure links in reverse BFS order
    for (int i = bfs_order.size() - 1; i >= 0; --i) {
        node = bfs_order[i];

        if (node->failure) {
            node->failure->visit_count += node->visit_count;
        }

        for (int id : node->pattern_ids) {
            word_freq[id] = node->visit_count;
        }
    }
}

int main() {
    freopen("ahocorasick.in", "r", stdin);
    freopen("ahocorasick.out", "w", stdout);

    int n; // n <= 100
    auto *root = new TrieNode();
    unordered_map<int, string> index_to_word;

    const bool print_matches = false;

    scanf("%s", text);
    scanf("%d", &n);

    for (int i = 0; i < n; ++i) {
        scanf("%s", word);
        trie_insert(root, word, i);

        if (print_matches) {
            index_to_word.insert({i, string(word)});
        }
    }

    bfs_build_links(root);
    if (print_matches) {
        aho_corasick_print_matches(root, text,  index_to_word);
    } else {
        aho_corasick_count_occurrences(root, text);
        for (int i = 0; i < n; ++i) {
            printf("%d\n", word_freq[i]);
        }
    }

    return 0;
}