簡體   English   中英

在 c# 中使用 span 替換小字符串中出現的最快方法

[英]Fastest way to replace occurences in small string using span in c#

我正在嘗試最大限度地提高 cpu 性能和 memory 字符串優化。C# 中的替換方法。目標是減少 memory 分配和 cpu 時間,因為該項目位於 asp.net 核心,轉速為 10000。

我發現了兩個提高性能的技巧:1)使用 Span Struct 2)使用 String.Create

   internal struct ContextData
    {
        public string Origin { get; set; }
        public string Replace { get; set; }
        public string With { get; set; }
    }




    internal string SpanReplaceWithCreate(ContextData context)
    {
        int count = 0;
     
        ReadOnlySpan<char> origin_span = context.Origin.AsSpan();
        ReadOnlySpan<char> replace_span = context.Replace.AsSpan();
        ReadOnlySpan<char> replace_with = context.With.AsSpan();

        int index;
        ReadOnlySpan<char> tmp = origin_span;

        while ((index = tmp.IndexOf(replace_span)) > 0)
        {
            count++;
            tmp = tmp.Slice(index + replace_span.Length, tmp.Length - replace_span.Length - index);
        }

        string a = string.Create(context.Origin.Length + (context.Replace.Length - context.With.Length) * count, context, (chars, state) =>
           {
               // NOTE: We don't access the context variable in this delegate since 
               // it would cause a closure and allocation.
               // Instead we access the state parameter.

               // will track our position within the string data we are populating
               var position = 0;
               ReadOnlySpan<char> origin = state.Origin.AsSpan();
               ReadOnlySpan<char> replace = state.Replace.AsSpan();
               ReadOnlySpan<char> with = state.With.AsSpan();

               ReadOnlySpan<char> tmp_context = origin;

               while ((index = tmp_context.IndexOf(replace)) > 0)
               {
                   tmp_context.Slice(0, index).CopyTo(chars.Slice(position));
                   with.CopyTo(chars.Slice(position + index));
                   position += (index + with.Length);
                   tmp_context = tmp_context.Slice(index + replace.Length, tmp_context.Length - replace.Length - index);
               }

               if (position < chars.Length) {
                   tmp_context.CopyTo(chars.Slice(position));
               }

           });


        return a;
    }

但與 string.Replace 相比,我的表現仍然最差

方法 URL 尋找 代替 意思 錯誤 標准偏差 中位數 0代 第一代 第 2 代 已分配
字符串替換 HTTP(...)凝視 [196] 谷歌 AFD 370.4 納秒 9.37 納秒 27.33 納秒 360.7 納秒 1個 0.0319 - - 336乙
StringReplaceWithCreate HTTP(...)凝視 [196] 谷歌 AFD 492.8 納秒 9.60 納秒 12.15 納秒 490.4 納秒 2個 0.0563 - - 592乙

有什么建議嗎?

這里的參數用於測試

https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}



{camp} : "campiagn_it_banner_size_360"
{publisher_id} : "78983"
{transaction} : "c1032072-f815-413b-a57c-4a027f681e60"
{aff_sub1} : "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"
{aff_site} : "vbvsdgdavhdgdvjs_46_789-p90"
{creative_id} : "360x360"
{ua} : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
{ip} : "192.168.1.1"
{lang} : "en"

更新 1

[Benchmark]
    public string FastTokenReplace()
    {

        string request = "http://wwww.example.com?a=campiagn_it_banner_size_360&b=78983&h=c1032072-f815-413b-a57c-4a027f681e6&y=78bea32a-6ead-4ea0-b9f2-9489ebc43d6a&ty=vbvsdgdavhdgdvjs_46_789-p90&yhhh=360x360&sua=Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F90.0.4430.93%20Safari%2F537.36&ppp=192.168.1.1";
        string redirecturl = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}&ieruiero{343454";

        int max_allocation = Math.Max(request.Length, redirecturl.Length) * 3;

        return string.Create(max_allocation, redirecturl, (chars, state) =>
        {
            ReadOnlySpan<char> tmp = state.AsSpan();
            int position = 0;
            int placeholder_start;
            while ((placeholder_start = tmp.IndexOf('{')) > 0)
            {
                int placeholder_end = tmp.Slice(placeholder_start).IndexOf('}');
                if (placeholder_end < 0)
                {
                    //copy the last part
                    tmp.CopyTo(chars.Slice(position));
                    break;
                }
                else
                {
                    tmp.Slice(0, placeholder_start).CopyTo(chars.Slice(position));
                    ReadOnlySpan<char> replace = tmp.Slice(placeholder_start, placeholder_end + 1);

                    //OPTIMIZE HERE?
                    ReadOnlySpan<char> with = Placeholders.getVal(replace.ToString()).AsSpan();

                    with.CopyTo(chars.Slice(position + placeholder_start));
                    position += (placeholder_start + with.Length);
                    tmp = tmp.Slice(placeholder_start + replace.Length, tmp.Length - replace.Length - placeholder_start);
                }

            }

        });
    }

 class Placeholders
{



    public const string camp = "{camp}";
    public const string publisher_id = "{publisher_id}";
    public const string creative_id = "{creative_id}";
    public const string ua = "{ua}";
    public const string lang = "{lang}";
    public const string ip = "{ip}";
    public const string Transaction = "{transaction}";
    public const string AffSite = "{aff_site}";
    public const string AdsetId = "{adset}";
    public const string AffSub1 = "{affsub1}";


    public static string getVal(string key)
    {

        switch (key)
        {
            case camp:
                return "campiagn_it_banner_size_360";
            case publisher_id:
                return "78983";
            case Transaction:
                return "c1032072-f815-413b-a57c-4a027f681e60";
            case AffSub1:
                return "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a";
            case AffSite:
                return "vbvsdgdavhdgdvjs_46_789-p90";
            case creative_id:
                return "360x360";
            case ua:
                return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36";
            case ip:
                return "192.168.1.1";
            default:
                return "";
        }
    }

    public static ReadOnlySpan<char> getVal(ReadOnlySpan<char> key)
    {

        if (MemoryExtensions.Equals(key, camp, StringComparison.Ordinal))
            return "campiagn_it_banner_size_360".AsSpan();
        else if (MemoryExtensions.Equals(key, publisher_id, StringComparison.Ordinal))
            return "78983".AsSpan();
        else if (MemoryExtensions.Equals(key, Transaction, StringComparison.Ordinal))
            return "c1032072-f815-413b-a57c-4a027f681e6".AsSpan();
        else if (MemoryExtensions.Equals(key, AffSub1, StringComparison.Ordinal))
            return "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a".AsSpan();
        else if (MemoryExtensions.Equals(key, AffSite, StringComparison.Ordinal))
            return "vbvsdgdavhdgdvjs_46_789-p90".AsSpan();
        else if (MemoryExtensions.Equals(key, creative_id, StringComparison.Ordinal))
            return "360x360".AsSpan();
        else if (MemoryExtensions.Equals(key, ua, StringComparison.Ordinal))
            return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36".AsSpan();
        else if (MemoryExtensions.Equals(key, ip, StringComparison.Ordinal))
            return "192.168.1.1".AsSpan();
        else
            return "".AsSpan();


    }




}
   [Benchmark]
    public string StandardTokenReplace()
    {

        string request = "http://wwww.example.com?a=campiagn_it_banner_size_360&b=78983&h=c1032072-f815-413b-a57c-4a027f681e6&y=78bea32a-6ead-4ea0-b9f2-9489ebc43d6a&ty=vbvsdgdavhdgdvjs_46_789-p90&yhhh=360x360&sua=Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F90.0.4430.93%20Safari%2F537.36&ppp=192.168.1.1";
        string redirecturl = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}&ieruiero{343454";
        int max_allocation = Math.Max(request.Length, redirecturl.Length) + Math.Abs(request.Length - redirecturl.Length);

        //get original url and take the longest one + domain


        return redirecturl.Replace(Placeholders.camp, Placeholders.getVal(Placeholders.camp))
            .Replace(Placeholders.publisher_id, Placeholders.getVal(Placeholders.publisher_id))
            .Replace(Placeholders.creative_id, Placeholders.getVal(Placeholders.creative_id))
            .Replace(Placeholders.ua, Placeholders.getVal(Placeholders.ua))
            .Replace(Placeholders.lang, Placeholders.getVal(Placeholders.lang))
            .Replace(Placeholders.ip, Placeholders.getVal(Placeholders.ip))
            .Replace(Placeholders.Transaction, Placeholders.getVal(Placeholders.Transaction))
            .Replace(Placeholders.AffSite, Placeholders.getVal(Placeholders.AffSite))
            .Replace(Placeholders.AdsetId, Placeholders.getVal(Placeholders.AdsetId))
            .Replace(Placeholders.AffSub1, Placeholders.getVal(Placeholders.AffSub1));

    }

1 最大分配

    int max_allocation = Math.Max(request.Length, redirecturl.Length) * 3;

我們可以計算出字符串的正確大小,但它的性能會更差。 對於這種情況,我們可以假設一個最大長度。

www.example.com?camp=1234567890123456789023456789012345678902345678 www.replace.com?{camp}{camp}{camp}{camp}{camp}{camp}{camp}

不會工作。

2 獲得價值

   ReadOnlySpan<char> with = Placeholders.getVal(replace.ToString()).AsSpan();

如果占位符重復,我們可以緩存該值或在移動到下一個占位符之前搜索所有出現的事件。

public static string getVal(string key) vs public static string getVal(ReadOnlySpan key)

使用字符串版本我們仍然有更好的性能。 有什么改進建議嗎?

// * 概括 *

BenchmarkDotNet=v0.12.1,OS=Windows 10.0.19041.928 (2004/?/20H1) Intel Core i9-10900 CPU 2.80GHz,1 個 CPU,20 個邏輯內核和 10 個物理內核 .NET Core SDK=5.0.202 [Host]:.NET Core 3.1.14(CoreCLR 4.700.21.16201,CoreFX 4.700.21.16208),X64 RyuJIT .NET Core 3.1:.NET Core 3.1.14(CoreCLR 4.700.21.16201,CoreFX20JIT 4.716)

Job=.NET Core 3.1 工具鏈=.NET Core 3.1

方法 URL 尋找 代替 意思 錯誤 標准偏差 0代 第一代 第 2 代 已分配
快速代幣替換 [196] 518.8 納秒 4.63 納秒 3.61 納秒 1個 0.2470 0.0038 - 2.52 KB
FastTokenReplaceImproveMem [196] 584.4 納秒 6.84 納秒 5.71 納秒 2個 0.2050 0.0010 - 2.09 KB
標准令牌替換 [196] 4,242.7 納秒 84.82 納秒 94.27 納秒 3個 0.6866 - - 7.06 KB

內置方法應該盡可能優化,因此嘗試制作比它更快的通用 Replace function 是不現實的。 也許使用不安全的代碼和/或用 C/C++ 之類的東西編寫的 dll 可能會很接近。 也就是說,如果您創建一個適用於您的特定約束的 function,您可能能夠獲得更多性能。 從您的示例中,有 3 種可能的方法可以更快地完成工作:

  • 與其一次替換一個令牌,不如批量替換它們。 當然,這假設您不依賴於遞歸替換,否則,這對您不起作用。
  • 假設需要替換的每個標記具有相同的分隔符,您可以大大減少對IndexOf的調用。
  • 如果你能猜到結果長度的上限,你可以跳過計算長度而只預分配 memory。

以下是我的基准測試結果(AMD Ryzen 9 5950X):

方法 意思 比率 已分配 分配比例
普通的 875.4 納秒 1.00 8.02 KB 1.00
BatchNaive 5,512.5 納秒 6.32 1.14 KB 0.14
BatchNaiveCached 2,838.7 納秒 3.24 1.47 KB 0.18
批緩存 964.8 納秒 1.10 1.47 KB 0.18
BatchDelimiter緩存 312.8 納秒 0.36 1.47 KB 0.18
BatchDelimiter預分配 266.0 納秒 0.30 2.5 KB 0.31
BatchDelimiterPreallocatedSafe 255.6 納秒 0.29 2.5 KB 0.31

如您所見,對於您的示例,后三個速度大約快 3 倍。 不僅如此,它們還使用更少的(堆)memory。

這是代碼:

  • BatchDelimiter緩存
const string origin = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}";

var replaceTasks = new (string, string)[]
{
    ("camp", "campiagn_it_banner_size_360"),
    ("publisher_id", "78983"),
    ("transaction", "c1032072-f815-413b-a57c-4a027f681e60"),
    ("aff_sub1", "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"),
    ("aff_site", "vbvsdgdavhdgdvjs_46_789-p90"),
    ("creative_id", "360x360"),
    ("ua", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"),
    ("ip", "192.168.1.1"),
    ("lang", "en")
};
char prefix = '{';
char postfix = '}';
// the above are arguments to the funtion

ReadOnlySpan<char> tmp = origin;

// we store the occurences in the queue while calculating the length of the final string
// so we don't have to search for them the 2nd time later
var occurrences = new Queue<(int at, int task)>();
int offset = 0;
int resultLength = tmp.Length;

int prefixIndex;
while ((prefixIndex = tmp.IndexOf(prefix)) != -1)
{
    (int at, int task) next = (prefixIndex, -1);
    for (int i = 0; i < replaceTasks.Length; i++)
    {
        // we expect the postfix to be at this place
        int postfixIndex = prefixIndex + replaceTasks[i].toReplace.Length + 1;
        if (tmp.Length > postfixIndex // check that we don't cross the bounds
            && tmp[postfixIndex] == postfix // check that the postfix IS were we expect it to be
            && tmp.Slice(prefixIndex + 1, postfixIndex - prefixIndex - 1).SequenceEqual(replaceTasks[i].toReplace)) // compare all the characters in between the delimiters
        {
            next.task = i;
            break;
        }
    }

    if (next.task == -1)
    {
        // this delimiter character is just part of the string, so skip it
        tmp = tmp.Slice(prefixIndex + 1);
        offset += prefixIndex + 1;
        continue;
    }

    int newStart = next.at + replaceTasks[next.task].toReplace.Length + 2;
    tmp = tmp.Slice(newStart, tmp.Length - newStart);

    occurrences.Enqueue((next.at + offset, next.task));
    offset += newStart;

    resultLength += replaceTasks[next.task].replaceWith.Length - replaceTasks[next.task].toReplace.Length - 2;
}

string result = string.Create(resultLength, (replaceTasks, occurrences), (chars, state) =>
{
    var replaceTasks = state.replaceTasks;
    var occurrences = state.occurrences;

    var position = 0;

    ReadOnlySpan<char> origin = origin;
    int lastStart = 0;

    while (occurrences.Count != 0)
    {
        (int at, int task) next = occurrences.Dequeue();

        origin.Slice(lastStart, next.at - lastStart).CopyTo(chars.Slice(position));
        replaceTasks[next.task].replaceWith.CopyTo(chars.Slice(position + next.at - lastStart));
        position += next.at - lastStart + replaceTasks[next.task].replaceWith.Length;
        lastStart = next.at + replaceTasks[next.task].toReplace.Length + 2;
    }

    origin.Slice(lastStart).CopyTo(chars.Slice(position));
});

return result;
  • BatchDelimiterPreallocatedSafe
const string origin = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}";

var replaceTasks = new (string, string)[]
{
    ("camp", "campiagn_it_banner_size_360"),
    ("publisher_id", "78983"),
    ("transaction", "c1032072-f815-413b-a57c-4a027f681e60"),
    ("aff_sub1", "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"),
    ("aff_site", "vbvsdgdavhdgdvjs_46_789-p90"),
    ("creative_id", "360x360"),
    ("ua", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"),
    ("ip", "192.168.1.1"),
    ("lang", "en")
};
char prefix = '{';
char postfix = '}';
// the above are arguments to the funtion

int resultLength = -1;

string result = string.Create(origin.Length * 2, (replaceTasks, prefix, postfix), (chars, state) =>
{
    var replaceTasks = state.ReplaceTasksWithoutDelimiters;
    char prefix = state.prefix;
    char postfix = state.postfix;

    var position = 0;

    ReadOnlySpan<char> tmp = origin;

    int prefixIndex;
    while ((prefixIndex = tmp.IndexOf(prefix)) != -1)
    {
        bool replaced = false;
        for (int i = 0; i < replaceTasks.Length; i++)
        {
            // we expect the postfix to be at this place
            int postfixIndex = prefixIndex + replaceTasks[i].toReplace.Length + 1;
            if (tmp.Length > postfixIndex // check that we don't cross the bounds
                && tmp[postfixIndex] == postfix // check that the postfix IS were we expect it to be
                && tmp.Slice(prefixIndex + 1, postfixIndex - prefixIndex - 1).SequenceEqual(replaceTasks[i].toReplace)) // compare all the characters in between the delimiters
            {
                if (position + prefixIndex + replaceTasks[i].replaceWith.Length <= chars.Length) // check if the following copy operations would exceed our preallocated memory bounds
                {
                    tmp.Slice(0, prefixIndex).CopyTo(chars.Slice(position));
                    replaceTasks[i].replaceWith.CopyTo(chars.Slice(position + prefixIndex));
                    position += prefixIndex + replaceTasks[i].replaceWith.Length;
                    tmp = tmp.Slice(postfixIndex + 1);
                    replaced = true;
                    break;
                }
                else
                {
                    return;
                }
            }
        }
        if (!replaced) { // this delimiter is just part of the string, so skip it
            tmp.Slice(0, prefixIndex + 1).CopyTo(chars.Slice(position));
            position += prefixIndex + 1;
            tmp = tmp.Slice(prefixIndex+ 1);
        }
    }

    if (position + tmp.Length <= chars.Length) // check if the following copy operation would exceed our preallocated memory bounds
    {
        // copy the remaining string
        tmp.CopyTo(chars.Slice(position));
        resultLength = position + tmp.Length;
    }
});

if (resultLength != -1)
{
    return result.Substring(0, resultLength); // there are many extrea null characters ('\0') at the end of our string, so we trim the string down
    
    // Note: the above realocates the string. if you can find a way to trim the string inplace, it should reduce the memory usage and speed a little.
}
else
{
    // the resulting string exceeded our preallocated memory. fallback to another method
    result = origin;

    for (int i = 0; i < ReplaceTasks.Length; i++)
    {
        (string toReplace, string replaceWith) = ReplaceTasks[i];
        result = result.Replace(toReplace, replaceWith);
    }

    return result;
}

當然,當僅替換 1 個令牌時,這會比構建方法更慢並且使用更多 memory。 另一方面,批次大小越大,速度和 memory 改進越大(即使批次中的標記不在原始字符串中):

方法 批量大小 意思 比率 已分配 分配比例
普通的 1個 103.4 納秒 1.00 736乙 1.00
BatchDelimiterPreallocatedSafe 1個 154.4 納秒 1.49 2192乙 2.98
普通的 50 1,870.1 納秒 1.00 8208乙 1.00
BatchDelimiterPreallocatedSafe 50 254.8 納秒 0.14 2560乙 0.31

使用示例數據運行一些測試,似乎String.Replace是高度優化的,並且StringBuilder.Replace和我的IndexOfAny的變體都返回首先找到的匹配項(基於 CodeReview 的改進)都較慢。 在我的測試中,使用一組元組進行替換是最快的:

var s = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}";

var replacementsa = new[] {
        ("{camp}", "campiagn_it_banner_size_360"),
        ("{publisher_id}", "78983"),
        ("{transaction}", "c1032072-f815-413b-a57c-4a027f681e60"),
        ("{aff_sub1}", "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"),
        ("{aff_site}", "vbvsdgdavhdgdvjs_46_789-p90"),
        ("{creative_id}", "360x360"),
        ("{ua}", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"),
        ("{ip}", "192.168.1.1"),
        ("{lang}", "en")
    };

public static string MultiReplace(this string s, (string match,string replace)[] replacements) {
    for (int replacementNum = 0; replacementNum < replacements.Length; ++replacementNum)
        s = s.Replace(replacements[replacementNum].match, replacements[replacementNum].replace);

    return s;
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM