Cod sursa(job #2354241)

Utilizator benjamin2205Zeic Beniamin benjamin2205 Data 25 februarie 2019 06:56:53
Problema Potrivirea sirurilor Scor 40
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 6 kb
#include <iostream>
#include <fstream>
#include <string>

std::ifstream f("strmatch.in");
std::ofstream g("strmatch.out");

using namespace std;

string word;
string phrase;
int t[2000005];

/// Must search the string for patterns.
/// pos_t is the current position in T
/// i is the index of the next character of the current
/// candidate substring
void search_pattern() {
    /// We'll start searching with the second character of W,
    /// because we only consider proper substrings (substrings that
    /// are not the string itself). If we would start searching
    /// with the first character, that would give us a full match
    /// of the substring with the word.
    int i = 0;
    int pos_t = 1;
    /// We initialize the first position of T with the value
    /// of one because if there is any mismatch between the
    /// word and the phrase the value in the "t" vector will
    /// be substracted from the current position in "phrase"
    /// So, if we substract one, that will mean that we will go
    /// one position further.
    t[0] = -1;

    /// As long as we have chracters that must be processed
    while (pos_t < word.size()) {
//        cerr << word.size () << " " << pos_t << " " << i << "\n";
        /// If the character from W that we are currently processing
        /// continues (or starts) a substring that reproduces a proper
        /// prefix of W:
        if (word[pos_t] == word[i]) {
            /// Mark the currently processed positon in T with the value
            /// (from T) of the last character from the prefix
            /// discovered so far (value which will obviously be -1 for
            /// the first match and 0 for the next ones).
            t[pos_t] = t[i];
//            cerr << t[pos_t] << "\n";
        }
        /// Else, if the character from W doesn't continue the prefix
        /// discovered so far:
        else {
            /// The position in T, corresponding to the character in W
            /// receives the value of "i", which is the number of
            /// characters the proper prefix has matched so far has.
            t[pos_t] = i;

            /// The prefix iterator ("i") receives the value found in the
            /// T array, at the "i" position. T[i] represents the distance
            /// from the current position to the character where a new match
            /// can start.
            i = t[i];

//            cerr << " IDX: i, pos_t and W[pos_t]:   " << i << " " << pos_t;
//            cerr << " " << word[pos_t] << "\n";
            /// If i is greater or equal to zero and the character at the
            /// current position in W is different from the character
            /// indicated by the iterator "i":
            while(i >= 0 && word[pos_t] != word[i]) {
                /// I don't understand what's the purpose of this while loop.
//                cerr << "  BEFORE: i and t[i]:  " << i << " " << t[i] << "\n";
                i = t[i];
//                cerr << "     AFT: i and t[i]:  " << i << " " << t[i] << "\n";
            }
        }

        /// Increase the counter of both the iterators
        pos_t = pos_t + 1;
        i = i + 1;
    }

    /// The postion after the last one in "t" receives a corresponding
    /// number
    t[pos_t] = i;
}

void kmp_table();
void string_match();

int main() {
    getline(f,word);
    getline(f,phrase);

    kmp_table();
    string_match();

//    for (int i = 0; i < word.size(); ++i) {
//        g << t[i] << ' ';
//    }
//
//    g << t[word.size()];
//
//    g << "\n";
//
//    for (int i = 0; i < word.size(); ++i) {
//        g << word[i] << ' ';
//    }
}

void string_match() {
    int num_strings = 0;
    int solutions[1005];

    int i = 0, m = 0;
    int k = 0;

    /// Take each letter from the phrase and process it
    while (m < phrase.size()) {
        /// If the current character from S matches the current character
        /// from W, increase both the iterators for W and for S.
        if (phrase[m] == word[i]) {
            ++m;
            ++i;
            /// If we have reached the end of the word, we have found a
            /// soultion.
            if (i == word.size()) {
            /// Keep the position where the match has begun
                if (k <= 1000) {
                    solutions[k] = m - i;
                }
                ++k;
            /// Restart the position of searching
                i = t[i];
            }
        }
        /// If the current character didn't match the character in the
        /// word:
        else {
            i = t[i];
        ///If the value at the current position in T is -1, then we must increase both
        /// the value of i and m
            if (i < 0)  {
                ++i;
                ++m;
            }
        }
    }

    g << k << "\n";

    for (int i = 0; i < k && i <= 1000; ++i) {
        g << solutions[i] << " ";
    }
}

void kmp_table() {
    /// Initializations
    int post, i;

    t[0] = -1;

    post = 1;
    i = 0;

    while (post < word.size()) {
        /// Check if the substring continues (or begins) or it doesn't
        /// match anymore
        if (word[post] == word[i]) {
        /// The currently processed letter receives a value equal to its match.
        /// (So, if the mismatch occurs at any of the two positions, the
        /// ammount of backtracking will be the same.) (Logically obvious,
        /// because if the substring matches so far, that means it is also the
        /// same with the string starting at 0 and continuing up to i.)
            t[post] = t[i];
        }
        /// Else, if the substring doesn't match anymore
        else {
        /// Give the last matching letter the number of characters matched so far.
            t[post] = i;

            i = t[i];

            while (i >= 0 && word[post] != word[i]) {
                i = t[i];
            }
        }
        ++post;
        ++i;
    }

    t[post] = i;
}