简体   繁体   中英

Optimizing algorithm for constructing a string given costs to operations

I'm doing the following problem (not homework): I'm doing an exercise (not homework) and I decided to go with backtracking, The problem says as follows:

You are given as input a target string. Starting with an empty string, you add characters to it, until your new string is same as the target. You have two options to add characters to a string: You can append an arbitrary character to your new string, with cost x You can clone any substring of your new string so far, and append it to the end of your new string, with cost y For a given target, append cost x, and clone cost y, we want to know what the cheapest cost is of building the target string

And some examples:

Target "aa", append cost 1, clone cost 2: the cheapest cost is 2:

Start with an empty string, ""
Append 'a' (cost 1), giving the string "a"
Append 'a' (cost 1), giving the string "aa"

Target "aaaa", append cost 2, clone cost 3: the cheapest cost is 7:

Start with an empty string, ""
Append 'a' (cost 2), giving the string "a"
Append 'a' (cost 2), giving the string "aa"
Clone "aa" (cost 3), giving the string "aaaa"

Target "xzxpzxzxpq", append cost 10, clone cost 11: the cheapest cost is 71:

Start with an empty string, ""
Append 'x' (cost 10): "x"
Append 'z' (cost 10): "xz"
Append 'x' (cost 10): "xzx"
Append 'p' (cost 10): "xzxp"
Append 'z' (cost 10): "xzxpz"
Clone "xzxp" (cost 11): "xzxpzxzxp"
Append 'q' (cost 10) : "xzxpzxzxpq"

So far so good. I first tried to do it with backtracking, but then the following test case came:

string bigString = "abcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcqaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjoirmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcqaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaip";
string doubleIt = bigString + bigString;

Now that's big. Given costs of 1234 , 1235 to append and clone respectivly, the total cost of building it is 59249 . So no more backtracking for this one because of the stack overflow. I tried a more efficient approach:

#include <iostream>
#include <vector>
#include <string>
#include <set>

int isWorthClone(const int size, const std::string& target) {
    int worth = 0;
    for (int j = size; j < target.size() and worth < size; j++) {
        if (target[j] == target[worth]) {
            worth++;
        }
        else break;
    }
    return worth;
}

int buildSolution(const std::string& target, int cpyCst, int apndCst) {
    int index = 0;
    int cost = 0;
    while (int(target.size()) != (index)) {
        int hasta = isWorthClone(index, target);
        if (cpyCst < hasta * apndCst) {
            cost += cpyCst;
            index += hasta ;
        }
        else {
            cost += apndCst;
            index++;
        }
    }
    return cost;
}


int main() {

    std::string bigString = "abcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcqaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjoirmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcqaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaip";
    std::string doubleIt = bigString + bigString;
    std::string target = bigString;
    int copyCost = 1235;
    int appendCost = 1234;
    std::cout << buildSolution(target, copyCost, appendCost) << std::endl;
}

but the output is 3588498 , and from the test case, the correct output should be 59249 . I can't find why this approach is giving me that result. I tried debugging it, and it seems like isWorthClone is not finding the right position to clone in some cases. Also it seems a little strange, because it works for the other cases, but as this is somewhat "clone expensive" I think is propagating the error.

Any clues on why is this happening? This is O(n^2), so I think this should be the optimal solution.

Edit:

My code now looks like the following, trying to follow the dp approach:

int canCopy(const int i, const string& target, int posCopied) {
    int iStartArray = 0;
    bool canCopy = true;
    int aux = i;
    while (canCopy) {
        if (aux - 1 + posCopied > target.size() or target[iStartArray] != target[aux - 1]) {
            canCopy = false;
        }
        else {
            posCopied += 1;
            iStartArray++;
            aux++;
        }
    }
    return posCopied;
}

int stringConstruction(string target, int copyCost, int appendCost) {
    vector<int> dp(target.size() + 1, std::numeric_limits<int>::max());

    dp[1] = appendCost;
    for (int i = 2; i < dp.size(); i++) {
        dp[i] = std::min(dp[i], dp[i - 1] + appendCost);
        int posCopied = canCopy(i, target, 0);
        if (posCopied != 0 and (posCopied + i) < dp.size()) {
            dp[posCopied + i] = dp[i] + copyCost;
        }
    }
    return dp[dp.size()-1];
}

This still doesn't work for the test case presented here.

Edit2: Finally I implemented the solution provided by @David Eisenstat (thanks!), with a really naive approach:

int best_clone(const string& s) {
    int j = s.size() - 1;
    while (s.substr(0, j).find(s.substr(j, s.size() - j)) != std::string::npos) {
        j--;
    }
    return j + 1;
}

int stringConstruction(string target, int copyCost, int appendCost) {
    vector<int> v = vector<int> (1, 0);
    for (int i = 0; i < target.size(); i++) {
        int cost = v[i] + appendCost;
        int j = best_clone(target.substr(0, i+1));
        if (j <= i) {
            cost = std::min(cost, v[j] + copyCost);
        }
        v.push_back(cost);
    }
    return v[v.size() - 1];

}

It seems like I missunderstood the problem. This is giving the solution for the test cases, but it takes too long. best_clone needs to be optimized.

Edit 3: (Hope this is the last one)

I added the following class SA for storing the suffix array:

#pragma once
#include <vector>
#include <string>
#include <algorithm>
#include <iostream>
#include <chrono>
using namespace std;

typedef struct {
    int index;
    string s;
} suffix;

struct comp
{
    inline bool operator() (const suffix& s1, const suffix& s2)
    {
        return (s1.s < s2.s);
    }
};

class SA
{
private:
    vector<suffix> values;
public:
    SA(const string& s) : values(s.size()) {
        string aux = s;
        for (int i = 0; i < s.length(); i++) {
            values[i].index = i;
            values[i].s = s.substr(i, s.size() - i);;
        }
        sort(values.begin(), values.end(), comp());
    }

    friend ostream& operator<<(ostream& os, const SA& dt)
    {
        for (int i = 0; i < dt.values.size(); i++) {
            os << dt.values[i].index << ": " << dt.values[i].s << "\n";
        }
        return os;
    }

    int search(const string& subst, int i, int j) {
        while (j >= i) {
            int mid = (i + j) / 2;
            if (this->values[mid].s > subst) {
                j = mid-1;
            }
            else if (this->values[mid].s < subst) {
                i = mid+1;
            }
            else return mid;
        }
        return -1;
    }

};

But know I don't know how to search here for the best clone in this array. (I know this is slow, n*2log(n) I would say, but I think is going to be good enough for this one. So now I need to put together these parts.

The problem is that you're making the decision to clone greedily. Let's look at a case where the append cost is 2 and the clone cost is 3. If you process the string aabaaaba , you'll append aab , clone aa , and clone aba , whereas the best solution is to append aaba and clone it.

The fix is dynamic programming, specifically, to build an array of the cost to make each prefix of the target string. To fill each entry, take the min of (append cost plus previous entry, clone cost plus cost for the shortest prefix that can be completed with one clone). Since the clone cost is constant, the array is nondecreasing, and therefore we don't need to check all of the possible prefixes.

Depending on the constraints you may need to construct a suffix array/longest common prefix array (using eg, SA-IS) to identify all of the best clones quickly. This will run in time o(n²) for sure (quite possibly O(n), but there are enough moving parts that I don't want to claim that).

This Python is too slow but gets the right answer on the large test case:

def best_clone(s):
    j = len(s) - 1
    while s[j:] in s[:j]:
        j -= 1
    return j + 1


def construction_cost(s, append_cost, clone_cost):
    table = [0]
    for i in range(len(s)):
        cost = table[i] + append_cost
        j = best_clone(s[: i + 1])
        if j <= i:
            cost = min(cost, table[j] + clone_cost)
        table.append(cost)
    return table[len(s)]

If the limit of your ambitions is quadratic, then we can put the Z function for string matching to good use.

#include <algorithm>
#include <cstddef>
#include <iostream>
#include <string>
#include <string_view>
#include <vector>

using Cost = unsigned long long;

// Adapted from https://cp-algorithms.com/string/z-function.html
std::vector<std::size_t> ZFunction(std::string_view s) {
  std::size_t n = s.length();
  std::vector<std::size_t> z(n);
  for (std::size_t i = 1, l = 0, r = 0; i < n; i++) {
    if (i <= r) {
      z[i] = std::min(r - i + 1, z[i - l]);
    }
    while (i + z[i] < n && s[z[i]] == s[i + z[i]]) {
      z[i]++;
    }
    if (i + z[i] - 1 > r) {
      l = i;
      r = i + z[i] - 1;
    }
  }
  return z;
}

std::size_t BestClone(std::string_view s) {
  std::string r{s};
  std::reverse(r.begin(), r.end());
  auto z = ZFunction(r);
  std::size_t best = 0;
  for (std::size_t i = 0; i < z.size(); i++) {
    best = std::max(best, std::min(z[i], i));
  }
  return s.length() - best;
}

Cost ConstructionCost(std::string_view s, Cost append_cost, Cost clone_cost) {
  std::vector<Cost> costs = {0};
  for (std::size_t j = 0; j < s.length(); j++) {
    std::size_t i = BestClone(s.substr(0, j + 1));
    if (i <= j) {
      costs.push_back(
          std::min(costs.back() + append_cost, costs[i] + clone_cost));
    } else {
      costs.push_back(costs.back() + append_cost);
    }
  }
  return costs.back();
}

int main() {
  std::string s;
  while (std::cin >> s) {
    std::cout << ConstructionCost(s, 1234, 1235) << '\n';
  }
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM