Pagini recente » Winter Challenge 2008 | Profil ionutzm05 | Cod sursa (job #1284925) | autumn-warmup-2007/runda-3 | Cod sursa (job #3298090)
#include <iostream>
#include <fstream>
#include <vector>
#include <unordered_map>
using namespace std;
class KMP_Automaton {
public:
KMP_Automaton(const string& pattern) : pattern(pattern) {
// Initialize the failure function for the KMP automaton
// and build the automaton based on the provided pattern.
build();
}
string pattern; // The pattern string
struct AutomatonState {
unordered_map < char, AutomatonState* > next; // Transition table for each character
AutomatonState *fail; // Failure link
int length; // Length of the matched prefix
AutomatonState() : fail(nullptr) {
length = 0; // Initialize length to 0
}
AutomatonState(int length) : fail(nullptr), length(length) {}
};
AutomatonState* root; // Root state of the automaton
void build() {
int m = pattern.size();
// We create the root state
root = new AutomatonState();
// Progressively build the automaton states, without using a vector
AutomatonState* current = root; // root->fail is nullptr
for (int i = 0; i < m; ++i) {
// Create a new state for each character in the pattern
current->next[pattern[i]] = new AutomatonState(current->length + 1);
// Set the failure link for the new state
AutomatonState* failState = AddChar(current->fail, pattern[i]); // Guranteed to not be nullptr
current->next[pattern[i]]->fail = failState;
// Move to the next state
current = current->next[pattern[i]];
}
}
AutomatonState* AddChar(AutomatonState* state, char c) {
// Transition to the next state based on the character
while (state && state->next.find(c) == state->next.end()) {
state = state->fail; // Follow failure links if no transition exists
}
return state ? state->next[c] : root; // Return the next state or root if no transition
}
};
// Pattern matching using KMP automaton
vector<int> kmpSearch(const string& text, const string& pattern) {
KMP_Automaton automaton(pattern); // Create the KMP automaton for the pattern
vector<int> result; // To store the starting indices of matches
KMP_Automaton::AutomatonState* current = automaton.root; // Start at the root state
for (int i = 0; i < text.size(); ++i) {
char c = text[i];
current = automaton.AddChar(current, c); // Transition to the next state based on the character
if (current->length == pattern.size()) { // If we reached a state that matches the entire pattern
result.push_back(i - pattern.size() + 1); // Store the starting index of the match
}
}
return result; // Return all found indices
}
// Pattern matching using KMP automaton by concatenating the pattern with the text
vector<int> kmpSearchConcat(const string& text, const string& pattern) {
string concat = pattern + "#" + text; // Concatenate pattern and text with a separator
vector<int> result, matchedLength;
int m = pattern.size();
matchedLength.resize(concat.size());
int j = 0; // Index for pattern
for (int i = 1; i < concat.size(); ++i) {
while (j > 0 && (j == m || concat[i] != concat[j])) {
j = matchedLength[j - 1]; // Backtrack in the failure function
}
if (concat[i] == concat[j]) {
++j; // If characters match, move to the next character in the pattern
} else {
j = 0; // If no match, reset j to 0
}
matchedLength[i] = j; // Store the length of the matched prefix
// debug output
// cout << "i: " << i << ", j: " << j << ", matchedLength[i]: " << matchedLength[i] << endl;
if (j == m) {
result.push_back(i - m + 1); // Match found, store the starting index
}
}
// Adjust indices to point to the original text
for (int& index : result) {
index -= m + 1; // Subtract the length of the pattern and the separator
}
return result;
}
// namespace ZP {
// // integers modulo a prime number
// const int MOD = 1e9 + 7;
// // logpow
// long long logpow(long long base, long long exp) {
// long long result = 1;
// while (exp > 0) {
// if (exp % 2 == 1) {
// result = (result * base) % MOD;
// }
// base = (base * base) % MOD;
// exp /= 2;
// }
// return result;
// }
// // Inverse modulo using Fermat's little theorem
// long long inv(long long a) {
// return logpow(a, MOD - 2);
// }
// }
class STR_Hash {
// Compute hash values over substrings of a string using polynomial rolling hash
public:
// Find integer value of a character knowing it's a lowercase or uppercase letter of the english alphabet or a number
static int charToInt(char c) {
if (c >= 'a' && c <= 'z') return c - 'a'; // Lowercase letters
if (c >= 'A' && c <= 'Z') return c - 'A' + 'z' - 'a' + 1; // Uppercase letters
if (c >= '0' && c <= '9') return c - '0' + 2 * ('z' - 'a' + 1); // Digits
return -1; // Invalid character
}
STR_Hash(const string& s, int base = 2 * ('z' - 'a' + 1) + 10, int mod = 1e9 + 7) : base(base), mod(mod) {
n = s.size();
hash.resize(n + 1);
power.resize(n + 1);
// invPower.resize(n + 1);
power[0] = 1;
// invPower[0] = 1;
// invPower[1] = ZP::inv(base);
for (int i = 0; i < n; ++i) {
hash[i + 1] = (hash[i] * base + charToInt(s[i])) % mod;
power[i + 1] = (power[i] * base) % mod;
// invPower[i + 1] = invPower[i] * invPower[1] % mod;
}
}
long long getHash(int l, int r) const {
// Get the hash of the substring s[l:r]
return (hash[r + 1] - hash[l] * power[r - l + 1] % mod + mod) % mod;
}
int n; // Length of the string
vector<long long> hash; // Hash values for prefixes
vector<long long> power; // Powers of the base for rolling hash
// vector<long long> invPower; // Inverse powers of the base for rolling hash
int base; // Base for polynomial rolling hash
int mod; // Modulus for hash values
};
// pattern matching using rolling hash
vector<int> rollingHashSearch(const string& text, const string& pattern) {
STR_Hash textHash(text);
STR_Hash patternHash(pattern);
vector<int> result;
long long patternHashValue = patternHash.getHash(0, pattern.size() - 1);
int m = pattern.size();
for (int i = 0; i <= text.size() - m; ++i) {
if (textHash.getHash(i, i + m - 1) == patternHashValue) {
result.push_back(i); // Match found, store the starting index
}
}
return result;
}
int main() {
#define METHOD 2 // 1 for KMP automaton, 2 for KMP concatenation, 3 for rolling hash
ifstream fin("strmatch.in");
ofstream fout("strmatch.out");
string pattern, text;
fin >> pattern >> text; // Read the pattern and text from the input file
vector<int> result;
if (METHOD == 1) {
result = kmpSearch(text, pattern); // Use KMP automaton for pattern matching
} else if (METHOD == 2) {
result = kmpSearchConcat(text, pattern); // Use KMP concatenation for pattern matching
} else if (METHOD == 3) {
result = rollingHashSearch(text, pattern); // Use rolling hash for pattern matching
}
fout << result.size() << endl; // Output the number of matches found
for (int i = 0; i < min(1000, (int)result.size()); ++i) {
fout << result[i] << " "; // Output the first 1000 matches
}
}