[英]Fastest way to replace occurences in small string using span in c#
我正在嘗試最大限度地提高 cpu 性能和 memory 字符串優化。C# 中的替換方法。目標是減少 memory 分配和 cpu 時間,因為該項目位於 asp.net 核心,轉速為 10000。
我發現了兩個提高性能的技巧:1)使用 Span Struct 2)使用 String.Create
internal struct ContextData
{
public string Origin { get; set; }
public string Replace { get; set; }
public string With { get; set; }
}
internal string SpanReplaceWithCreate(ContextData context)
{
int count = 0;
ReadOnlySpan<char> origin_span = context.Origin.AsSpan();
ReadOnlySpan<char> replace_span = context.Replace.AsSpan();
ReadOnlySpan<char> replace_with = context.With.AsSpan();
int index;
ReadOnlySpan<char> tmp = origin_span;
while ((index = tmp.IndexOf(replace_span)) > 0)
{
count++;
tmp = tmp.Slice(index + replace_span.Length, tmp.Length - replace_span.Length - index);
}
string a = string.Create(context.Origin.Length + (context.Replace.Length - context.With.Length) * count, context, (chars, state) =>
{
// NOTE: We don't access the context variable in this delegate since
// it would cause a closure and allocation.
// Instead we access the state parameter.
// will track our position within the string data we are populating
var position = 0;
ReadOnlySpan<char> origin = state.Origin.AsSpan();
ReadOnlySpan<char> replace = state.Replace.AsSpan();
ReadOnlySpan<char> with = state.With.AsSpan();
ReadOnlySpan<char> tmp_context = origin;
while ((index = tmp_context.IndexOf(replace)) > 0)
{
tmp_context.Slice(0, index).CopyTo(chars.Slice(position));
with.CopyTo(chars.Slice(position + index));
position += (index + with.Length);
tmp_context = tmp_context.Slice(index + replace.Length, tmp_context.Length - replace.Length - index);
}
if (position < chars.Length) {
tmp_context.CopyTo(chars.Slice(position));
}
});
return a;
}
但與 string.Replace 相比,我的表現仍然最差
方法 | URL | 尋找 | 代替 | 意思 | 錯誤 | 標准偏差 | 中位數 | 秩 | 0代 | 第一代 | 第 2 代 | 已分配 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
字符串替換 | HTTP(...)凝視 [196] | 谷歌 | AFD | 370.4 納秒 | 9.37 納秒 | 27.33 納秒 | 360.7 納秒 | 1個 | 0.0319 | - | - | 336乙 |
StringReplaceWithCreate | HTTP(...)凝視 [196] | 谷歌 | AFD | 492.8 納秒 | 9.60 納秒 | 12.15 納秒 | 490.4 納秒 | 2個 | 0.0563 | - | - | 592乙 |
有什么建議嗎?
這里的參數用於測試
https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}
{camp} : "campiagn_it_banner_size_360"
{publisher_id} : "78983"
{transaction} : "c1032072-f815-413b-a57c-4a027f681e60"
{aff_sub1} : "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"
{aff_site} : "vbvsdgdavhdgdvjs_46_789-p90"
{creative_id} : "360x360"
{ua} : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
{ip} : "192.168.1.1"
{lang} : "en"
更新 1
[Benchmark]
public string FastTokenReplace()
{
string request = "http://wwww.example.com?a=campiagn_it_banner_size_360&b=78983&h=c1032072-f815-413b-a57c-4a027f681e6&y=78bea32a-6ead-4ea0-b9f2-9489ebc43d6a&ty=vbvsdgdavhdgdvjs_46_789-p90&yhhh=360x360&sua=Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F90.0.4430.93%20Safari%2F537.36&ppp=192.168.1.1";
string redirecturl = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}&ieruiero{343454";
int max_allocation = Math.Max(request.Length, redirecturl.Length) * 3;
return string.Create(max_allocation, redirecturl, (chars, state) =>
{
ReadOnlySpan<char> tmp = state.AsSpan();
int position = 0;
int placeholder_start;
while ((placeholder_start = tmp.IndexOf('{')) > 0)
{
int placeholder_end = tmp.Slice(placeholder_start).IndexOf('}');
if (placeholder_end < 0)
{
//copy the last part
tmp.CopyTo(chars.Slice(position));
break;
}
else
{
tmp.Slice(0, placeholder_start).CopyTo(chars.Slice(position));
ReadOnlySpan<char> replace = tmp.Slice(placeholder_start, placeholder_end + 1);
//OPTIMIZE HERE?
ReadOnlySpan<char> with = Placeholders.getVal(replace.ToString()).AsSpan();
with.CopyTo(chars.Slice(position + placeholder_start));
position += (placeholder_start + with.Length);
tmp = tmp.Slice(placeholder_start + replace.Length, tmp.Length - replace.Length - placeholder_start);
}
}
});
}
class Placeholders
{
public const string camp = "{camp}";
public const string publisher_id = "{publisher_id}";
public const string creative_id = "{creative_id}";
public const string ua = "{ua}";
public const string lang = "{lang}";
public const string ip = "{ip}";
public const string Transaction = "{transaction}";
public const string AffSite = "{aff_site}";
public const string AdsetId = "{adset}";
public const string AffSub1 = "{affsub1}";
public static string getVal(string key)
{
switch (key)
{
case camp:
return "campiagn_it_banner_size_360";
case publisher_id:
return "78983";
case Transaction:
return "c1032072-f815-413b-a57c-4a027f681e60";
case AffSub1:
return "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a";
case AffSite:
return "vbvsdgdavhdgdvjs_46_789-p90";
case creative_id:
return "360x360";
case ua:
return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36";
case ip:
return "192.168.1.1";
default:
return "";
}
}
public static ReadOnlySpan<char> getVal(ReadOnlySpan<char> key)
{
if (MemoryExtensions.Equals(key, camp, StringComparison.Ordinal))
return "campiagn_it_banner_size_360".AsSpan();
else if (MemoryExtensions.Equals(key, publisher_id, StringComparison.Ordinal))
return "78983".AsSpan();
else if (MemoryExtensions.Equals(key, Transaction, StringComparison.Ordinal))
return "c1032072-f815-413b-a57c-4a027f681e6".AsSpan();
else if (MemoryExtensions.Equals(key, AffSub1, StringComparison.Ordinal))
return "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a".AsSpan();
else if (MemoryExtensions.Equals(key, AffSite, StringComparison.Ordinal))
return "vbvsdgdavhdgdvjs_46_789-p90".AsSpan();
else if (MemoryExtensions.Equals(key, creative_id, StringComparison.Ordinal))
return "360x360".AsSpan();
else if (MemoryExtensions.Equals(key, ua, StringComparison.Ordinal))
return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36".AsSpan();
else if (MemoryExtensions.Equals(key, ip, StringComparison.Ordinal))
return "192.168.1.1".AsSpan();
else
return "".AsSpan();
}
}
[Benchmark]
public string StandardTokenReplace()
{
string request = "http://wwww.example.com?a=campiagn_it_banner_size_360&b=78983&h=c1032072-f815-413b-a57c-4a027f681e6&y=78bea32a-6ead-4ea0-b9f2-9489ebc43d6a&ty=vbvsdgdavhdgdvjs_46_789-p90&yhhh=360x360&sua=Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F90.0.4430.93%20Safari%2F537.36&ppp=192.168.1.1";
string redirecturl = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}&ieruiero{343454";
int max_allocation = Math.Max(request.Length, redirecturl.Length) + Math.Abs(request.Length - redirecturl.Length);
//get original url and take the longest one + domain
return redirecturl.Replace(Placeholders.camp, Placeholders.getVal(Placeholders.camp))
.Replace(Placeholders.publisher_id, Placeholders.getVal(Placeholders.publisher_id))
.Replace(Placeholders.creative_id, Placeholders.getVal(Placeholders.creative_id))
.Replace(Placeholders.ua, Placeholders.getVal(Placeholders.ua))
.Replace(Placeholders.lang, Placeholders.getVal(Placeholders.lang))
.Replace(Placeholders.ip, Placeholders.getVal(Placeholders.ip))
.Replace(Placeholders.Transaction, Placeholders.getVal(Placeholders.Transaction))
.Replace(Placeholders.AffSite, Placeholders.getVal(Placeholders.AffSite))
.Replace(Placeholders.AdsetId, Placeholders.getVal(Placeholders.AdsetId))
.Replace(Placeholders.AffSub1, Placeholders.getVal(Placeholders.AffSub1));
}
1 最大分配
int max_allocation = Math.Max(request.Length, redirecturl.Length) * 3;
我們可以計算出字符串的正確大小,但它的性能會更差。 對於這種情況,我們可以假設一個最大長度。
www.example.com?camp=1234567890123456789023456789012345678902345678 www.replace.com?{camp}{camp}{camp}{camp}{camp}{camp}{camp}
不會工作。
2 獲得價值
ReadOnlySpan<char> with = Placeholders.getVal(replace.ToString()).AsSpan();
如果占位符重復,我們可以緩存該值或在移動到下一個占位符之前搜索所有出現的事件。
public static string getVal(string key) vs public static string getVal(ReadOnlySpan key)
使用字符串版本我們仍然有更好的性能。 有什么改進建議嗎?
// * 概括 *
BenchmarkDotNet=v0.12.1,OS=Windows 10.0.19041.928 (2004/?/20H1) Intel Core i9-10900 CPU 2.80GHz,1 個 CPU,20 個邏輯內核和 10 個物理內核 .NET Core SDK=5.0.202 [Host]:.NET Core 3.1.14(CoreCLR 4.700.21.16201,CoreFX 4.700.21.16208),X64 RyuJIT .NET Core 3.1:.NET Core 3.1.14(CoreCLR 4.700.21.16201,CoreFX20JIT 4.716)
Job=.NET Core 3.1 工具鏈=.NET Core 3.1
方法 | URL | 尋找 | 代替 | 意思 | 錯誤 | 標准偏差 | 秩 | 0代 | 第一代 | 第 2 代 | 已分配 |
---|---|---|---|---|---|---|---|---|---|---|---|
快速代幣替換 | [196] | 518.8 納秒 | 4.63 納秒 | 3.61 納秒 | 1個 | 0.2470 | 0.0038 | - | 2.52 KB | ||
FastTokenReplaceImproveMem | [196] | 584.4 納秒 | 6.84 納秒 | 5.71 納秒 | 2個 | 0.2050 | 0.0010 | - | 2.09 KB | ||
標准令牌替換 | [196] | 4,242.7 納秒 | 84.82 納秒 | 94.27 納秒 | 3個 | 0.6866 | - | - | 7.06 KB |
內置方法應該盡可能優化,因此嘗試制作比它更快的通用 Replace function 是不現實的。 也許使用不安全的代碼和/或用 C/C++ 之類的東西編寫的 dll 可能會很接近。 也就是說,如果您創建一個適用於您的特定約束的 function,您可能能夠獲得更多性能。 從您的示例中,有 3 種可能的方法可以更快地完成工作:
IndexOf
的調用。以下是我的基准測試結果(AMD Ryzen 9 5950X):
方法 | 意思 | 比率 | 已分配 | 分配比例 |
---|---|---|---|---|
普通的 | 875.4 納秒 | 1.00 | 8.02 KB | 1.00 |
BatchNaive | 5,512.5 納秒 | 6.32 | 1.14 KB | 0.14 |
BatchNaiveCached | 2,838.7 納秒 | 3.24 | 1.47 KB | 0.18 |
批緩存 | 964.8 納秒 | 1.10 | 1.47 KB | 0.18 |
BatchDelimiter緩存 | 312.8 納秒 | 0.36 | 1.47 KB | 0.18 |
BatchDelimiter預分配 | 266.0 納秒 | 0.30 | 2.5 KB | 0.31 |
BatchDelimiterPreallocatedSafe | 255.6 納秒 | 0.29 | 2.5 KB | 0.31 |
如您所見,對於您的示例,后三個速度大約快 3 倍。 不僅如此,它們還使用更少的(堆)memory。
這是代碼:
const string origin = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}";
var replaceTasks = new (string, string)[]
{
("camp", "campiagn_it_banner_size_360"),
("publisher_id", "78983"),
("transaction", "c1032072-f815-413b-a57c-4a027f681e60"),
("aff_sub1", "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"),
("aff_site", "vbvsdgdavhdgdvjs_46_789-p90"),
("creative_id", "360x360"),
("ua", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"),
("ip", "192.168.1.1"),
("lang", "en")
};
char prefix = '{';
char postfix = '}';
// the above are arguments to the funtion
ReadOnlySpan<char> tmp = origin;
// we store the occurences in the queue while calculating the length of the final string
// so we don't have to search for them the 2nd time later
var occurrences = new Queue<(int at, int task)>();
int offset = 0;
int resultLength = tmp.Length;
int prefixIndex;
while ((prefixIndex = tmp.IndexOf(prefix)) != -1)
{
(int at, int task) next = (prefixIndex, -1);
for (int i = 0; i < replaceTasks.Length; i++)
{
// we expect the postfix to be at this place
int postfixIndex = prefixIndex + replaceTasks[i].toReplace.Length + 1;
if (tmp.Length > postfixIndex // check that we don't cross the bounds
&& tmp[postfixIndex] == postfix // check that the postfix IS were we expect it to be
&& tmp.Slice(prefixIndex + 1, postfixIndex - prefixIndex - 1).SequenceEqual(replaceTasks[i].toReplace)) // compare all the characters in between the delimiters
{
next.task = i;
break;
}
}
if (next.task == -1)
{
// this delimiter character is just part of the string, so skip it
tmp = tmp.Slice(prefixIndex + 1);
offset += prefixIndex + 1;
continue;
}
int newStart = next.at + replaceTasks[next.task].toReplace.Length + 2;
tmp = tmp.Slice(newStart, tmp.Length - newStart);
occurrences.Enqueue((next.at + offset, next.task));
offset += newStart;
resultLength += replaceTasks[next.task].replaceWith.Length - replaceTasks[next.task].toReplace.Length - 2;
}
string result = string.Create(resultLength, (replaceTasks, occurrences), (chars, state) =>
{
var replaceTasks = state.replaceTasks;
var occurrences = state.occurrences;
var position = 0;
ReadOnlySpan<char> origin = origin;
int lastStart = 0;
while (occurrences.Count != 0)
{
(int at, int task) next = occurrences.Dequeue();
origin.Slice(lastStart, next.at - lastStart).CopyTo(chars.Slice(position));
replaceTasks[next.task].replaceWith.CopyTo(chars.Slice(position + next.at - lastStart));
position += next.at - lastStart + replaceTasks[next.task].replaceWith.Length;
lastStart = next.at + replaceTasks[next.task].toReplace.Length + 2;
}
origin.Slice(lastStart).CopyTo(chars.Slice(position));
});
return result;
const string origin = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}";
var replaceTasks = new (string, string)[]
{
("camp", "campiagn_it_banner_size_360"),
("publisher_id", "78983"),
("transaction", "c1032072-f815-413b-a57c-4a027f681e60"),
("aff_sub1", "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"),
("aff_site", "vbvsdgdavhdgdvjs_46_789-p90"),
("creative_id", "360x360"),
("ua", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"),
("ip", "192.168.1.1"),
("lang", "en")
};
char prefix = '{';
char postfix = '}';
// the above are arguments to the funtion
int resultLength = -1;
string result = string.Create(origin.Length * 2, (replaceTasks, prefix, postfix), (chars, state) =>
{
var replaceTasks = state.ReplaceTasksWithoutDelimiters;
char prefix = state.prefix;
char postfix = state.postfix;
var position = 0;
ReadOnlySpan<char> tmp = origin;
int prefixIndex;
while ((prefixIndex = tmp.IndexOf(prefix)) != -1)
{
bool replaced = false;
for (int i = 0; i < replaceTasks.Length; i++)
{
// we expect the postfix to be at this place
int postfixIndex = prefixIndex + replaceTasks[i].toReplace.Length + 1;
if (tmp.Length > postfixIndex // check that we don't cross the bounds
&& tmp[postfixIndex] == postfix // check that the postfix IS were we expect it to be
&& tmp.Slice(prefixIndex + 1, postfixIndex - prefixIndex - 1).SequenceEqual(replaceTasks[i].toReplace)) // compare all the characters in between the delimiters
{
if (position + prefixIndex + replaceTasks[i].replaceWith.Length <= chars.Length) // check if the following copy operations would exceed our preallocated memory bounds
{
tmp.Slice(0, prefixIndex).CopyTo(chars.Slice(position));
replaceTasks[i].replaceWith.CopyTo(chars.Slice(position + prefixIndex));
position += prefixIndex + replaceTasks[i].replaceWith.Length;
tmp = tmp.Slice(postfixIndex + 1);
replaced = true;
break;
}
else
{
return;
}
}
}
if (!replaced) { // this delimiter is just part of the string, so skip it
tmp.Slice(0, prefixIndex + 1).CopyTo(chars.Slice(position));
position += prefixIndex + 1;
tmp = tmp.Slice(prefixIndex+ 1);
}
}
if (position + tmp.Length <= chars.Length) // check if the following copy operation would exceed our preallocated memory bounds
{
// copy the remaining string
tmp.CopyTo(chars.Slice(position));
resultLength = position + tmp.Length;
}
});
if (resultLength != -1)
{
return result.Substring(0, resultLength); // there are many extrea null characters ('\0') at the end of our string, so we trim the string down
// Note: the above realocates the string. if you can find a way to trim the string inplace, it should reduce the memory usage and speed a little.
}
else
{
// the resulting string exceeded our preallocated memory. fallback to another method
result = origin;
for (int i = 0; i < ReplaceTasks.Length; i++)
{
(string toReplace, string replaceWith) = ReplaceTasks[i];
result = result.Replace(toReplace, replaceWith);
}
return result;
}
當然,當僅替換 1 個令牌時,這會比構建方法更慢並且使用更多 memory。 另一方面,批次大小越大,速度和 memory 改進越大(即使批次中的標記不在原始字符串中):
方法 | 批量大小 | 意思 | 比率 | 已分配 | 分配比例 |
---|---|---|---|---|---|
普通的 | 1個 | 103.4 納秒 | 1.00 | 736乙 | 1.00 |
BatchDelimiterPreallocatedSafe | 1個 | 154.4 納秒 | 1.49 | 2192乙 | 2.98 |
普通的 | 50 | 1,870.1 納秒 | 1.00 | 8208乙 | 1.00 |
BatchDelimiterPreallocatedSafe | 50 | 254.8 納秒 | 0.14 | 2560乙 | 0.31 |
使用示例數據運行一些測試,似乎String.Replace
是高度優化的,並且StringBuilder.Replace
和我的IndexOfAny
的變體都返回首先找到的匹配項(基於 CodeReview 的改進)都較慢。 在我的測試中,使用一組元組進行替換是最快的:
var s = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}";
var replacementsa = new[] {
("{camp}", "campiagn_it_banner_size_360"),
("{publisher_id}", "78983"),
("{transaction}", "c1032072-f815-413b-a57c-4a027f681e60"),
("{aff_sub1}", "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"),
("{aff_site}", "vbvsdgdavhdgdvjs_46_789-p90"),
("{creative_id}", "360x360"),
("{ua}", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"),
("{ip}", "192.168.1.1"),
("{lang}", "en")
};
public static string MultiReplace(this string s, (string match,string replace)[] replacements) {
for (int replacementNum = 0; replacementNum < replacements.Length; ++replacementNum)
s = s.Replace(replacements[replacementNum].match, replacements[replacementNum].replace);
return s;
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.