Cod sursa(job #3298084)

Utilizator ArdeleanOficialAlexandru ArdeleanOficial Data 26 mai 2025 20:47:51
Problema Potrivirea sirurilor Scor 18
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 7.48 kb
#include <iostream>
#include <fstream>
#include <vector>

using namespace std;

class KMP_Automaton {
public:
    KMP_Automaton(const string& pattern) : pattern(pattern) {
        // Initialize the failure function for the KMP automaton
        // and build the automaton based on the provided pattern.
        build();
    }

    string pattern; // The pattern string

    struct AutomatonState {
        AutomatonState *next[256]; // Transition table for each character
        AutomatonState *fail; // Failure link
        int length; // Length of the matched prefix
        AutomatonState() : fail(nullptr) {
            fill(begin(next), end(next), nullptr);
            length = 0; // Initialize length to 0
        }

        AutomatonState(int length) : fail(nullptr), length(length) {
            fill(begin(next), end(next), nullptr);
        }
    };

    AutomatonState* root; // Root state of the automaton

    void build() {
        int m = pattern.size();

        // We create the root state
        root = new AutomatonState();

        // Progressively build the automaton states, without using a vector
        AutomatonState* current = root; // root->fail is nullptr
        for (int i = 0; i < m; ++i) {
            // Create a new state for each character in the pattern
            current->next[pattern[i]] = new AutomatonState(current->length + 1);

            // Set the failure link for the new state
            AutomatonState* failState = AddChar(current->fail, pattern[i]); // Guranteed to not be nullptr
            current->next[pattern[i]]->fail = failState;

            // Move to the next state
            current = current->next[pattern[i]];
        }
    }

    AutomatonState* AddChar(AutomatonState* state, char c) {
        // Transition to the next state based on the character
        while (state && !state->next[c]) {
            state = state->fail; // Follow failure links if no transition exists
        }
        return state ? state->next[c] : root; // Return the next state or root if no transition
    }
};

// Pattern matching using KMP automaton
vector<int> kmpSearch(const string& text, const string& pattern) {
    KMP_Automaton automaton(pattern); // Create the KMP automaton for the pattern
    vector<int> result; // To store the starting indices of matches
    KMP_Automaton::AutomatonState* current = automaton.root; // Start at the root state

    for (int i = 0; i < text.size(); ++i) {
        char c = text[i];
        current = automaton.AddChar(current, c); // Transition to the next state based on the character
        if (current->length == pattern.size()) { // If we reached a state that matches the entire pattern
            result.push_back(i - pattern.size() + 1); // Store the starting index of the match
        }
    }
    return result; // Return all found indices
}

// Pattern matching using KMP automaton by concatenating the pattern with the text
vector<int> kmpSearchConcat(const string& text, const string& pattern) {
    string concat = pattern + "#" + text; // Concatenate pattern and text with a separator
    vector<int> result, matchedLength;
    int m = pattern.size();
    matchedLength.resize(concat.size());
    int j = -1; // Index for pattern
    for (int i = 1; i < concat.size(); ++i) {
        while (j > 0 && (j == m || concat[i] != pattern[j])) {
            j = matchedLength[j]; // Backtrack in the failure function
        }
        ++j;
        matchedLength[i] = j; // Store the length of the matched prefix
        if (j == m) {
            result.push_back(i - m + 1); // Match found, store the starting index
        }
    }
    // Adjust indices to point to the original text
    for (int& index : result) {
        index -= m + 1; // Subtract the length of the pattern and the separator
    }
    return result;
}

// namespace ZP {
//     // integers modulo a prime number

//     const int MOD = 1e9 + 7;

//     // logpow

//     long long logpow(long long base, long long exp) {
//         long long result = 1;
//         while (exp > 0) {
//             if (exp % 2 == 1) {
//                 result = (result * base) % MOD;
//             }
//             base = (base * base) % MOD;
//             exp /= 2;
//         }
//         return result;
//     }

//     // Inverse modulo using Fermat's little theorem
//     long long inv(long long a) {
//         return logpow(a, MOD - 2);
//     }
// }

class STR_Hash {
    // Compute hash values over substrings of a string using polynomial rolling hash
public:

    // Find integer value of a character knowing it's a lowercase or uppercase letter of the english alphabet or a number
    static int charToInt(char c) {
        if (c >= 'a' && c <= 'z') return c - 'a'; // Lowercase letters
        if (c >= 'A' && c <= 'Z') return c - 'A' + 'z' - 'a' + 1; // Uppercase letters
        if (c >= '0' && c <= '9') return c - '0' + 2 * ('z' - 'a' + 1); // Digits
        return -1; // Invalid character
    }

    STR_Hash(const string& s, int base = 2 * ('z' - 'a' + 1) + 10, int mod = 1e9 + 7) : base(base), mod(mod) {
        n = s.size();
        hash.resize(n + 1);
        power.resize(n + 1);
        // invPower.resize(n + 1);

        power[0] = 1;
        // invPower[0] = 1;
        // invPower[1] = ZP::inv(base);

        for (int i = 0; i < n; ++i) {
            hash[i + 1] = (hash[i] * base + charToInt(s[i])) % mod;
            power[i + 1] = (power[i] * base) % mod;
            // invPower[i + 1] = invPower[i] * invPower[1] % mod;
        }
    }

    long long getHash(int l, int r) const {
        // Get the hash of the substring s[l:r]
        return (hash[r + 1] - hash[l] * power[r - l + 1] % mod + mod) % mod;
    }

    int n; // Length of the string
    vector<long long> hash; // Hash values for prefixes
    vector<long long> power; // Powers of the base for rolling hash
    // vector<long long> invPower; // Inverse powers of the base for rolling hash
    int base; // Base for polynomial rolling hash
    int mod; // Modulus for hash values
};

// pattern matching using rolling hash

vector<int> rollingHashSearch(const string& text, const string& pattern) {
    STR_Hash textHash(text);
    STR_Hash patternHash(pattern);
    vector<int> result;

    long long patternHashValue = patternHash.getHash(0, pattern.size() - 1);
    int m = pattern.size();
    for (int i = 0; i <= text.size() - m; ++i) {
        if (textHash.getHash(i, i + m - 1) == patternHashValue) {
            result.push_back(i); // Match found, store the starting index
        }
    }
    return result;
}

int main() {
    #define METHOD 2 // 1 for KMP automaton, 2 for KMP concatenation, 3 for rolling hash

    ifstream fin("strmatch.in");
    ofstream fout("strmatch.out");

    string pattern, text;
    fin >> pattern >> text; // Read the pattern and text from the input file

    vector<int> result;
    if (METHOD == 1) {
        result = kmpSearch(text, pattern); // Use KMP automaton for pattern matching
    } else if (METHOD == 2) {
        result = kmpSearchConcat(text, pattern); // Use KMP concatenation for pattern matching
    } else if (METHOD == 3) {
        result = rollingHashSearch(text, pattern); // Use rolling hash for pattern matching
    }

    fout << result.size() << endl; // Output the number of matches found
    for (int i = 0; i < min(1000, (int)result.size()); ++i) {
        fout << result[i] << " "; // Output the first 1000 matches
    }
}