簡體   English   中英

給定操作成本的用於構造字符串的優化算法

[英]Optimizing algorithm for constructing a string given costs to operations

我正在做以下問題(不是作業):我在做練習(不是作業),我決定回溯,問題如下:

您將獲得一個目標字符串作為輸入。 從一個空字符串開始,向其中添加字符,直到新字符串與目標相同。 您有兩種向字符串添加字符的選項: 您可以將任意字符附加到新字符串,成本為 x 到目前為止,您可以克隆新字符串的任何子字符串,並將其附加到新字符串的末尾,成本為y 對於給定的目標,附加成本 x 和克隆成本 y,我們想知道構建目標字符串的最便宜的成本是多少

還有一些例子:

目標“aa”,追加成本1,克隆成本2:最便宜的成本是2:

Start with an empty string, ""
Append 'a' (cost 1), giving the string "a"
Append 'a' (cost 1), giving the string "aa"

目標“aaaa”,追加成本2,克隆成本3:最便宜的成本是7:

Start with an empty string, ""
Append 'a' (cost 2), giving the string "a"
Append 'a' (cost 2), giving the string "aa"
Clone "aa" (cost 3), giving the string "aaaa"

目標“xzxpzxzxpq”,追加成本10,克隆成本11:最便宜的成本是71:

Start with an empty string, ""
Append 'x' (cost 10): "x"
Append 'z' (cost 10): "xz"
Append 'x' (cost 10): "xzx"
Append 'p' (cost 10): "xzxp"
Append 'z' (cost 10): "xzxpz"
Clone "xzxp" (cost 11): "xzxpzxzxp"
Append 'q' (cost 10) : "xzxpzxzxpq"

到現在為止還挺好。 我首先嘗試通過回溯來做到這一點,但隨后出現了以下測試用例:

string bigString = "abcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcqaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjoirmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcqaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaip";
string doubleIt = bigString + bigString;

現在大了。 給定12341235分別附加和克隆的成本,構建它的總成本是59249 所以不再因為堆棧溢出而回溯這一點。 我嘗試了一種更有效的方法:

#include <iostream>
#include <vector>
#include <string>
#include <set>

int isWorthClone(const int size, const std::string& target) {
    int worth = 0;
    for (int j = size; j < target.size() and worth < size; j++) {
        if (target[j] == target[worth]) {
            worth++;
        }
        else break;
    }
    return worth;
}

int buildSolution(const std::string& target, int cpyCst, int apndCst) {
    int index = 0;
    int cost = 0;
    while (int(target.size()) != (index)) {
        int hasta = isWorthClone(index, target);
        if (cpyCst < hasta * apndCst) {
            cost += cpyCst;
            index += hasta ;
        }
        else {
            cost += apndCst;
            index++;
        }
    }
    return cost;
}


int main() {

    std::string bigString = "abcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcqaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjoirmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipiblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifpblgmbtmblgmbaipfdmbcntdblgblgmbaipmbcntdblgblgmbaipcbcntdblgblgmbaipobacjodblgblgmbaiabcblgmbcntdblgblgmbaipmbcntdblgblgmbaipfdmbcqaipfdmbcntdblgblgmbaipmbcntdblgblgmbaiprtifbcntdblgblgmbaipmbcntdblgblgmbaip";
    std::string doubleIt = bigString + bigString;
    std::string target = bigString;
    int copyCost = 1235;
    int appendCost = 1234;
    std::cout << buildSolution(target, copyCost, appendCost) << std::endl;
}

但是輸出是3588498 ,從測試用例3588498 ,正確的輸出應該是59249 我不明白為什么這種方法會給我這樣的結果。 我嘗試調試它,似乎isWorthClone在某些情況下沒有找到正確的克隆位置。 這似乎也有點奇怪,因為它適用於其他情況,但由於這有點“克隆昂貴”,我認為這是傳播錯誤。

關於為什么會發生這種情況的任何線索? 這是 O(n^2),所以我認為這應該是最佳解決方案。

編輯:

我的代碼現在如下所示,嘗試遵循dp方法:

int canCopy(const int i, const string& target, int posCopied) {
    int iStartArray = 0;
    bool canCopy = true;
    int aux = i;
    while (canCopy) {
        if (aux - 1 + posCopied > target.size() or target[iStartArray] != target[aux - 1]) {
            canCopy = false;
        }
        else {
            posCopied += 1;
            iStartArray++;
            aux++;
        }
    }
    return posCopied;
}

int stringConstruction(string target, int copyCost, int appendCost) {
    vector<int> dp(target.size() + 1, std::numeric_limits<int>::max());

    dp[1] = appendCost;
    for (int i = 2; i < dp.size(); i++) {
        dp[i] = std::min(dp[i], dp[i - 1] + appendCost);
        int posCopied = canCopy(i, target, 0);
        if (posCopied != 0 and (posCopied + i) < dp.size()) {
            dp[posCopied + i] = dp[i] + copyCost;
        }
    }
    return dp[dp.size()-1];
}

這仍然不適用於此處提供的測試用例。

Edit2:最后我用一種非常天真的方法實現了@David Eisenstat 提供的解決方案(謝謝!):

int best_clone(const string& s) {
    int j = s.size() - 1;
    while (s.substr(0, j).find(s.substr(j, s.size() - j)) != std::string::npos) {
        j--;
    }
    return j + 1;
}

int stringConstruction(string target, int copyCost, int appendCost) {
    vector<int> v = vector<int> (1, 0);
    for (int i = 0; i < target.size(); i++) {
        int cost = v[i] + appendCost;
        int j = best_clone(target.substr(0, i+1));
        if (j <= i) {
            cost = std::min(cost, v[j] + copyCost);
        }
        v.push_back(cost);
    }
    return v[v.size() - 1];

}

好像我誤解了這個問題。 這為測試用例提供了解決方案,但需要的時間太長。 best_clone需要優化。

編輯3:(希望這是最后一個)

我添加了以下類SA來存儲后綴數組:

#pragma once
#include <vector>
#include <string>
#include <algorithm>
#include <iostream>
#include <chrono>
using namespace std;

typedef struct {
    int index;
    string s;
} suffix;

struct comp
{
    inline bool operator() (const suffix& s1, const suffix& s2)
    {
        return (s1.s < s2.s);
    }
};

class SA
{
private:
    vector<suffix> values;
public:
    SA(const string& s) : values(s.size()) {
        string aux = s;
        for (int i = 0; i < s.length(); i++) {
            values[i].index = i;
            values[i].s = s.substr(i, s.size() - i);;
        }
        sort(values.begin(), values.end(), comp());
    }

    friend ostream& operator<<(ostream& os, const SA& dt)
    {
        for (int i = 0; i < dt.values.size(); i++) {
            os << dt.values[i].index << ": " << dt.values[i].s << "\n";
        }
        return os;
    }

    int search(const string& subst, int i, int j) {
        while (j >= i) {
            int mid = (i + j) / 2;
            if (this->values[mid].s > subst) {
                j = mid-1;
            }
            else if (this->values[mid].s < subst) {
                i = mid+1;
            }
            else return mid;
        }
        return -1;
    }

};

但是知道我不知道如何在這里搜索此數組中的最佳clone (我知道這很慢,我會說 n*2log(n),但我認為對於這個已經足夠了。所以現在我需要把這些部分放在一起。

問題是您正在做出貪婪的克隆決定。 讓我們看一個附加成本為 2 且克隆成本為 3 的情況。如果您處理字符串aabaaaba ,您將附加aab 、克隆aa和克隆aba ,而最好的解決方案是附加aaba並克隆它。

解決方法是動態編程,具體來說,就是構建一個代價數組來制作目標字符串的每個前綴。 要填充每個條目,取最小值(附加成本加上前一個條目,克隆成本加上可以用一個克隆完成的最短前綴的成本)。 由於克隆成本是恆定的,數組是非遞減的,因此我們不需要檢查所有可能的前綴。

根據限制,您可能需要構建一個后綴數組/最長公共前綴數組(使用例如 SA-IS)以快速識別所有最佳克隆。 這肯定會在時間 o(n²) 中運行(很可能是 O(n),但有足夠多的移動部件,我不想聲稱)。

這個 Python 太慢了,但在大型測試用例上得到了正確的答案:

def best_clone(s):
    j = len(s) - 1
    while s[j:] in s[:j]:
        j -= 1
    return j + 1


def construction_cost(s, append_cost, clone_cost):
    table = [0]
    for i in range(len(s)):
        cost = table[i] + append_cost
        j = best_clone(s[: i + 1])
        if j <= i:
            cost = min(cost, table[j] + clone_cost)
        table.append(cost)
    return table[len(s)]

如果您的野心極限是二次的,那么我們可以充分利用用於字符串匹配的 Z 函數。

#include <algorithm>
#include <cstddef>
#include <iostream>
#include <string>
#include <string_view>
#include <vector>

using Cost = unsigned long long;

// Adapted from https://cp-algorithms.com/string/z-function.html
std::vector<std::size_t> ZFunction(std::string_view s) {
  std::size_t n = s.length();
  std::vector<std::size_t> z(n);
  for (std::size_t i = 1, l = 0, r = 0; i < n; i++) {
    if (i <= r) {
      z[i] = std::min(r - i + 1, z[i - l]);
    }
    while (i + z[i] < n && s[z[i]] == s[i + z[i]]) {
      z[i]++;
    }
    if (i + z[i] - 1 > r) {
      l = i;
      r = i + z[i] - 1;
    }
  }
  return z;
}

std::size_t BestClone(std::string_view s) {
  std::string r{s};
  std::reverse(r.begin(), r.end());
  auto z = ZFunction(r);
  std::size_t best = 0;
  for (std::size_t i = 0; i < z.size(); i++) {
    best = std::max(best, std::min(z[i], i));
  }
  return s.length() - best;
}

Cost ConstructionCost(std::string_view s, Cost append_cost, Cost clone_cost) {
  std::vector<Cost> costs = {0};
  for (std::size_t j = 0; j < s.length(); j++) {
    std::size_t i = BestClone(s.substr(0, j + 1));
    if (i <= j) {
      costs.push_back(
          std::min(costs.back() + append_cost, costs[i] + clone_cost));
    } else {
      costs.push_back(costs.back() + append_cost);
    }
  }
  return costs.back();
}

int main() {
  std::string s;
  while (std::cin >> s) {
    std::cout << ConstructionCost(s, 1234, 1235) << '\n';
  }
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM