Cod sursa(job #2962217)

Utilizator AndreiAncutaAndrei Ancuta AndreiAncuta Data 7 ianuarie 2023 23:52:50
Problema ADN Scor 100
Compilator cpp-64 Status done
Runda Arhiva de probleme Marime 7.09 kb
#include <algorithm>
#include <fstream>
#include <string>
#include <vector>

using std::string;
using std::vector;

class Kmp {
public:
    string pattern;
    vector<int> prefixFunc;

    Kmp(const string &pattern)
        : pattern(pattern), prefixFunc(pattern.length(), 0) {
        prefixFunc[0] = -1;
        for (int pos = 1; pos < pattern.length(); ++pos) {
            prefixFunc[pos] = prefixFunc[pos - 1];

            while (prefixFunc[pos] >= 0 && pattern[pos] != pattern[prefixFunc[pos] + 1]) {
                prefixFunc[pos] = prefixFunc[prefixFunc[pos]];
            }

            if (pattern[pos] == pattern[prefixFunc[pos] + 1]) {
                ++prefixFunc[pos];
            }
        }
    }

    bool appearsIn(const string &haystack) const {
        int i = -1;
        for (auto c : haystack) {
            while (i >= 0 && pattern[i + 1] != c) {
                i = prefixFunc[i];
            }

            if (c == pattern[i + 1]) {
                ++i;
            }

            if (i + 1 == pattern.length()) {
                return true;
            }
        }

        return false;
    }

    // Concatenation: left ++ right.
    // A suffix of left may be the same as a prefix of right.
    // What is the length of the overlap?
    // Pattern is right, left is a parameter.
    int getConcatOverlapLength(const string &left) const {
        const auto &right = pattern;

        int i = -1;
        for (auto c : left) {
            while (i >= 0 && right[i + 1] != c) {
                i = prefixFunc[i];
            }

            if (c == pattern[i + 1]) {
                ++i;
            }
        }

        return i + 1;
    }
};

// Eliminate DNAs that are contained in other DNAs
// dnas must be sorted.
void filterRedundantDnas(vector<string> &dnas, vector<Kmp> &kmps) {
    vector<bool> isDnaEliminated(dnas.size(), false);

    for (size_t i = 0; i < dnas.size(); ++i) {
        for (size_t j = i + 1; j < dnas.size(); ++j) {
            if (isDnaEliminated[j]) {
                // Already eliminated
                continue;
            }

            if (kmps[j].appearsIn(dnas[i])) {
                isDnaEliminated[j] = true;
            }
        }
    }

    vector<string> filteredDnas;
    vector<Kmp> filteredKmps;
    for (size_t i = 0; i < dnas.size(); ++i) {
        if (!isDnaEliminated[i]) {
            filteredDnas.emplace_back(dnas[i]);
            filteredKmps.emplace_back(kmps[i]);
        }
    }

    dnas = std::move(filteredDnas);
    kmps = std::move(filteredKmps);
}

int main() {
    std::ifstream in("adn.in");

    int dnaCount;
    in >> dnaCount;

    vector<string> dnas;
    for (int i = 0; i < dnaCount; ++i) {
        string dna;
        in >> dna;
        dnas.push_back(std::move(dna));
    }

    in.close();

    std::sort(dnas.begin(), dnas.end(), [](const string &s1, const string &s2) {
        return s1.length() > s2.length();
    });

    // Compute prefix functions for each dna
    vector<Kmp> kmps;
    for (const auto &dna : dnas) {
        kmps.emplace_back(Kmp(dna));
    }

    // Eliminate DNAs that are contained in other DNAs
    filterRedundantDnas(dnas, kmps);

    // Compute overlap lengths for each pair of DNAs.
    // We want to do dna1 ++ dna2.
    // Store the max length of a suffix of dna1 that is also a prefix of dna2.
    vector<vector<int>> concatOverlapLength(
        dnas.size(),
        vector<int>(dnas.size()));

    for (size_t i = 0; i < dnas.size(); ++i) {
        for (size_t j = 0; j < dnas.size(); ++j) {
            if (i == j) continue;
            concatOverlapLength[i][j] = kmps[j].getConcatOverlapLength(dnas[i]);
        }
    }

    /// minLen[mask][finalDna] = min length of the string containing all the
    /// DNAs in the subset represented by the bitmask, with the final chosen DNA
    /// being finalDna
    vector<vector<int>> minLen(
        1 << dnas.size(),
        vector<int>(dnas.size(), 1e9));

    auto setDnaBit = [](int mask, int dnaId) -> int {
        return mask | (1 << dnaId);
    };

    auto maskContainsDna = [](int mask, int dnaId) -> bool {
        auto idBit = 1 << dnaId;
        return (mask & idBit) == idBit;
    };

    /// What would be the string length if we added nextDna to the
    /// current mask?
    auto costWithNextDna = [&](int mask, int finalId, int nextId) -> int {
        return minLen[mask][finalId] - concatOverlapLength[finalId][nextId] +
               dnas[nextId].length();
    };

    int allDnasMask = (1 << dnas.size()) - 1;

    // Initialize subsets with one element
    for (int id = 0; id < dnas.size(); ++id) {
        minLen[1 << id][id] = dnas[id].length();
    }

    for (int mask = 1; mask <= allDnasMask; ++mask) {
        for (int finalDnaId = 0; finalDnaId < dnas.size(); ++finalDnaId) {
            if (!maskContainsDna(mask, finalDnaId)) {
                // Final DNA must be in the mask
                continue;
            }

            for (int nextDnaId = 0; nextDnaId < dnas.size(); ++nextDnaId) {
                if (maskContainsDna(mask, nextDnaId)) {
                    // Next chosen DNA can't already be in the mask
                    continue;
                }

                auto nextMask = setDnaBit(mask, nextDnaId);
                minLen[nextMask][nextDnaId] =
                    std::min(minLen[nextMask][nextDnaId],
                             costWithNextDna(mask, finalDnaId, nextDnaId));
            }
        }
    }

    // Rebuild the concatenated string

    // Find the final DNA in the string
    int resultLen = 1e9;
    int finalId = 0;

    for (int id = 0; id < dnas.size(); ++id) {
        if (minLen[allDnasMask][id] < resultLen) {
            resultLen = minLen[allDnasMask][id];
            finalId = id;
        }
    }

    // Each DNA in the concatenation must be reversed, because we are building
    // the result from end to beginning.
    string result(dnas[finalId].rbegin(), dnas[finalId].rend());

    auto unsetDnaBit = [](int mask, int dnaId) -> int {
        return mask & ~(1 << dnaId);
    };

    int beforeMask = unsetDnaBit(allDnasMask, finalId);
    while (beforeMask > 0) {
        // We're looking for the DNA before the current finalDna
        // Search using the cost to the current finalDna
        int currentMask = setDnaBit(beforeMask, finalId);
        auto targetCost = minLen[currentMask][finalId];
        int beforeFinalId = 0;
        for (int candidateId = 0; candidateId < dnas.size(); ++candidateId) {
            if (!maskContainsDna(beforeMask, candidateId)) continue;

            auto cost = costWithNextDna(beforeMask, candidateId, finalId);
            if (cost == targetCost) {
                beforeFinalId = candidateId;
            }
        }

        const auto &foundDna = dnas[beforeFinalId];
        // Skip overlapping chars in the suffix of the beforeDna
        auto skipChars = concatOverlapLength[beforeFinalId][finalId];
        result.append(foundDna.rbegin() + skipChars, foundDna.rend());

        beforeMask = unsetDnaBit(beforeMask, beforeFinalId);
        finalId = beforeFinalId;
    }

    std::reverse(result.begin(), result.end());

    std::ofstream out("adn.out");
    out << result << '\n';
    out.close();
    return 0;
}