Pagini recente » Cod sursa (job #827965) | Cod sursa (job #1424431) | Statistici Ghinoiu Dragos (suntunprost) | Cod sursa (job #417321) | Cod sursa (job #2443405)
#include <iostream>
#include <fstream>
#include <cstring>
#include <cassert>
#include <cctype>
#include <vector>
#include <algorithm>
#include <unordered_map>
#include <set>
#define MAX_N 1000000
#define MAX_M 10000
using namespace std;
unsigned n, m, size;
char str[MAX_N + 5], pattern[MAX_N + 5];
//vector<unsigned> match;
// unsigned match[MAX_COUNT]; // print only the first 1000 matches
unsigned pi[MAX_M + 2];
unsigned psi[MAX_M + 2];
//unsigned degree[MAX_N + 2];
//unsigned *edges[MAX_N + 2];
//unsigned omega[MAX_N + 2];
#if 0
//-------------------------------------------------------------
void dfs(unsigned currentState, unordered_map<char, unsigned>& stackTable, set<unsigned>& setOfStates)
// at every currentState we explore a branch, having stored the active nodes in a hash table
{
unsigned replacer;
bool replaced = false;
unsigned minimumHalt = 0;
// We don't use the state 0, because in the search of KMP we always stop before reaching 0
if (currentState != 0) {
auto iter = stackTable.find(pattern[currentState]);
// Not yet in table? Add it.
if (iter == stackTable.end()) {
stackTable[pattern[currentState]] = currentState;
setOfStates.insert(currentState);
} else {
// Than, see which index should be replaced
// With this, update now the overall minimum, to use it for the next steps
replaced = true;
replacer = iter->second;
// TODO: use a fibonaci heap / priority_queue with lazy-update / or google set, but please, no STL-set!
// Why fibonacci-heap? Because we increase for every char the last seen position, so an operation of increaseKey.
// Update the map
stackTable[pattern[currentState]] = currentState;
// Update the set
setOfStates.erase(replacer);
setOfStates.insert(currentState);
}
minimumHalt = psi[*setOfStates.begin()];
}
omega[currentState] = minimumHalt;
// Explore the neighbours
for (unsigned index = 0; index < degree[currentState]; ++index)
dfs(edges[currentState][index], stackTable, setOfStates);
// Reset the table
if (currentState != 0) {
// Did we change something?
if (!replaced) {
// That's the first time the char has been inserted
// Simply delete it.
stackTable.erase(pattern[currentState]);
setOfStates.erase(currentState);
} else {
// We've changed the last index for the char
// Update the last index
stackTable[pattern[currentState]] = replacer;
// Update the state
setOfStates.erase(currentState);
setOfStates.insert(replacer);
}
}
}
#endif
//-------------------------------------------------------------
void compressPi()
// compress pi[] and create two new arrays: psi[] and omega[]
// description in README
{
// Initialize the first values
// 0 is the first state. At this step it receives 2 sons
psi[0] = 0;
psi[1] = 0;
//degree[0] = 2;
// Compute pi and psi
unsigned k = 0;
for (unsigned q = 1; q < m; ++q) {
// Compute the normal pi
// If I'm not wrong, it could be also done with psi!
while ((k > 0) && (pattern[q] != pattern[k])) {
k = pi[k - 1];
}
if (pattern[q] == pattern[k]) {
k++;
}
pi[q] = k;
// Compress possbile path
// If no jump, connect directly to 0
if (pi[q] == 0) {
psi[q + 1] = 0;
} else if (pattern[q + 1] == pattern[pi[q]]) {
// Try to hang the edge (q, pi[q]) at the root of pi[q]
psi[q + 1] = psi[pi[q]];
} else {
// Put the edge back where it was
psi[q + 1] = pi[q];
}
// build the graph of psi, by counting the degree of each node
//degree[psi[q + 1]]++;
}
#if 0
// Alloc the graph and reset the degrees to 0
for (unsigned q = 0; q < m; ++q) {
if (degree[q])
edges[q] = (unsigned*)calloc(degree[q], sizeof(unsigned));
degree[q] = 0;
}
// Compute the graph
for (unsigned q = 1; q <= m; ++q) {
edges[psi[q]][degree[psi[q]]++] = q;
}
#if 0
cerr << "Debug" << endl;
for (unsigned index = 0; index < m; ++index) {
cerr << "Main node : " << index << " ";
for (unsigned ptr = 0; ptr < degree[index]; ++ptr) {
cerr << edges[index][ptr] << " ";
}
cerr << endl;
}
cerr << endl;
#endif
// Compute omega[] with dfs
unordered_map<char, unsigned> stackTable;
set<unsigned> setOfStates;
dfs(0, stackTable, setOfStates);
assert(stackTable.empty());
assert(setOfStates.empty());
#if 0
cerr << "Debug" << endl;
for (unsigned index = 0; index <= m; ++index) {
cerr << index << " with " << pattern[index] << " psi = " << psi[index] << " and omega = " << omega[index] << endl;
}
cerr << endl;
#endif
// delete the graph
for (unsigned index = 0; index < m; ++index)
if (degree[index])
free(edges[index]);
#endif
}
//-------------------------------------------------------------
static unsigned hyperKmp() {
if (m > n) {
return 0;
}
// Create pi[], psi[] and omega[]
compressPi();
unsigned q = 0; // current state
//unsigned maximum = 0; // to test how many loop steps are maximal done
//unsigned sum = 0;
unsigned countMatches = 0;
for (unsigned index = 0; index < n; ++index) {
// Search only on psi now
#if 0
unsigned loopsCtr = 0;
#if 1
// lastHalt represents the last state which should be discovered (starting from q)
unsigned lastHalt = omega[q];
while ((q > lastHalt) && (str[index] != pattern[q])) {
q = psi[q];
loopsCtr++;
}
#else
while ((q > 0) && (str[index] != pattern[q])) {
q = pi[q - 1];
loopsCtr++;
}
#endif
#if 0
//cerr << q << " vs " << after << endl;
if (q != after) {
cerr << "assert : at " << index << " " << q << " vs " << after << endl;
assert(0);
}
#endif
// Compute the average and the maximum loop steps
sum += loopsCtr;
if (loopsCtr > maximum)
maximum = loopsCtr;
#else
// lastHalt represents the last state which should be discovered (starting from q)
//unsigned lastHalt = omega[q];
while ((q > 0) && (str[index] != pattern[q])) {
q = psi[q];
}
#endif
if (str[index] == pattern[q]) {
++q;
}
// Found?
countMatches += (q == m);
}
return countMatches;
//cerr << "sum = " << sum << " and max loopsCtr = " << maximum << endl;
}
//-------------------------------------------------------------
int main(int argc, char** argv) {
ifstream cin;
#if 0
if (argc < 2) {
cerr << "Usage: " << argv[0] << " file " << endl;
return 0;
}
in.open(argv[1]);
#else
cin.open("ahocorasick.in");
ofstream out("ahocorasick.out");
// cin.open(1 ? "multest.in" : "test/grader_test30.in");
#endif
cin >> str >> size;
n = strlen(str);
while (size--) {
cin >> pattern;
m = strlen(pattern);
pattern[m] = '#';
out << hyperKmp() << endl;
}
return 0;
}