[英]Extract First and Last word from a string and remove them if matches the third one using C#
我的字符串是这样的:
string str = "Psppsp palm springs airport, 3400 e tahquitz canyon way, Palm springs, CA, US, 92262-6966 psppsp";
我分别获得字符串“ psppsp”,需要将其与str中的第一个和最后一个单词进行比较,如果找到(在第一个或最后一个单词中),则需要将其从str中删除。
我需要知道最好和最快的方法。
禁食方式为O(n)。 下面是代码示例,可以对其进行改进。
string str = "Psppsp palm springs airport, 3400 e tahquitz canyon way, Palm springs, CA, US, 92262-6966 psppsp";
string word = "psppsp";
// Check if str and word are equals
if (str == word)
{
str = "";
}
// Check Firt word in str
if (str.Length > word.Length)
{
bool equal = true;
for (int i = 0; i < word.Length; i++)
{
if (str[i] != word[i])
{
equal = false;
break;
}
}
if (equal && str[word.Length] == ' ')
{
str = str.Substring(word.Length);
}
}
// Check Last word in str
if (str.Length > word.Length)
{
bool equal = true;
for (int i = word.Length - 1; i >= 0; i--)
{
if (str[str.Length - word.Length + i] != word[i])
{
equal = false;
break;
}
}
if (equal)
{
str = str.Substring(0, str.Length - word.Length);
}
}
有几种方法可以做到这一点。 这是使用正则表达式的一种方法。 您可以预编译正则表达式,如果您在许多字符串上执行此操作,则可以加快处理速度:
string str = "Psppsp palm springs airport, 3400 e tahquitz canyon way, Palm springs, CA, US, 92262-6966 psppsp";
string match = "psppsp";
// Build 2 re-usable regexes
string pattern1 = "^" + match + "\\s*";
string pattern2 = "\\s*" + match + "$";
Regex rgx1 = new Regex(pattern1, RegexOptions.Compiled | RegexOptions.IgnoreCase);
Regex rgx2 = new Regex(pattern2, RegexOptions.Compiled | RegexOptions.IgnoreCase);
// Apply the 2 regexes
str = rgx1.Replace(rgx2.Replace(str, ""), "");
如果没有机会将匹配项放在字符串的其他位置,则可以使用linq。 这涉及将split返回的数组转换为列表:
// Convert to list
var tempList = new List<string>(str.Split());
// Remove all occurences of match
tempList.RemoveAll(x => String.Compare(x, match, StringComparison.OrdinalIgnoreCase) == 0);
// Convert list back to string
str = String.Join(" ", tempList.ToArray());
或者,更简单的方法
if (str.StartsWith(match, StringComparison.InvariantCultureIgnoreCase)) {
str = str.Substring(match.Length);
}
if (str.EndsWith(match, StringComparison.InvariantCultureIgnoreCase)) {
str = str.Substring(0, str.Length - match.Length);
}
str = str.Trim();
不知道其中哪一个(如果有)是“最佳”的。 我喜欢最后一个。
您可以使用str.StartsWith(x),str.EndsWith(x),str.Contains(x),str.IndexOf(x)查找和定位搜索字符串,并使用str.Substring(start,len)更改字符串。 您可以通过多种方法来实现此字符串操作,但您要求...
最佳和最快:让我们使用一些完全安全的“不安全”代码,以便我们可以使用指针。
// note this is an extension method so you need to include it in a static class
public unsafe static string RemoveCaseInsensitive(this string source, string remove)
{
// convert to lower to enable case insensitive comparison
string sourceLower = source.ToLower();
// define working pointers
int srcPos = 0;
int srcLen = source.Length;
int dstPos = 0;
int rmvPos = 0;
int rmvLen = remove.Length;
// create char arrays to work with in the 'unsafe' code
char[] destChar = new char[srcLen];
fixed (char* srcPtr = source, srcLwrPtr = sourceLower, rmvPtr = remove, dstPtr = destChar)
{
// loop through each char in the source array
while (srcPos < srcLen)
{
// copy the char and move dest position on
*(dstPtr + dstPos) = *(srcPtr + srcPos);
dstPos++;
// compare source char to remove char
// note we're comparing against the sourceLower but copying from source so that
// a case insensitive remove preserves the rest of the string's original case
if (*(srcLwrPtr + srcPos) == *(rmvPtr + rmvPos))
{
rmvPos++;
if (rmvPos == rmvLen)
{
// if the whole string has been matched
// reverse dest position back by length of remove string
dstPos -= rmvPos;
rmvPos = 0;
}
}
else
{
rmvPos = 0;
}
// move to next char in source
srcPos++;
}
}
// return the string
return new string(destChar, 0, dstPos);
}
用法:
str.RemoveCaseInsensitive("Psppsp"); // this will remove all instances throughout the string
str.RemoveCaseInsensitive("Psppsp "); // space included at the end so in your example will remove the first instance and trailing space.
str.RemoveCaseInsensitive(" psppsp"); // space included at the start so in your example will remove the final instance and leading space.
为什么使用您可能会问的不安全代码? 处理数组时,每次指向该数组中的元素时,都会进行边界检查。 因此str [1],str [2],str [3]等都有开销。 因此,当您要对数千个字符进行这种检查时,它就会加起来。 使用不安全的代码将使直接使用指针访问内存。 没有边界检查,否则会减慢操作速度。 性能上的差异可能很大。
作为性能差异的一个示例,我创建了两个版本。 使用标准字符串指针的一种安全,而使用不安全的一种。 我通过递归添加数千个字符串副本来保留和删除字符串,从而创建了一个字符串。 结果很明显,不安全版本的完成时间是安全版本的一半。 除了安全与不安全之外,这些方法是相同的。
public static class StringExtensions
{
public unsafe static string RemoveUnsafe(this string source, string remove)
{
// convert to lower to enable case insensitive comparison
string sourceLower = source.ToLower();
// define working pointers
int srcPos = 0;
int srcLen = source.Length;
int dstPos = 0;
int rmvPos = 0;
int rmvLen = remove.Length;
// create char arrays to work with in the 'unsafe' code
char[] destChar = new char[srcLen];
fixed (char* srcPtr = source, srcLwrPtr = sourceLower, rmvPtr = remove, dstPtr = destChar)
{
// loop through each char in the source array
while (srcPos < srcLen)
{
// copy the char and move dest position on
*(dstPtr + dstPos) = *(srcPtr + srcPos);
dstPos++;
// compare source char to remove char
// note we're comparing against the sourceLower but copying from source so that
// a case insensitive remove preserves the rest of the string's original case
if (*(srcLwrPtr + srcPos) == *(rmvPtr + rmvPos))
{
rmvPos++;
if (rmvPos == rmvLen)
{
// if the whole string has been matched
// reverse dest position back by length of remove string
dstPos -= rmvPos;
rmvPos = 0;
}
}
else
{
rmvPos = 0;
}
// move to next char in source
srcPos++;
}
}
// return the string
return new string(destChar, 0, dstPos);
}
public static string RemoveSafe(this string source, string remove)
{
// convert to lower to enable case insensitive comparison
string sourceLower = source.ToLower();
string removeLower = remove.ToLower();
// define working pointers
int srcPos = 0;
int srcLen = source.Length;
int dstPos = 0;
int rmvPos = 0;
int rmvLen = remove.Length;
// create char arrays to work with in the 'unsafe' code
char[] destChar = new char[srcLen];
// loop through each char in the source array
while (srcPos < srcLen)
{
// copy the char and move dest position on
destChar[dstPos] = source[srcPos];
dstPos++;
// compare source char to remove char
// note we're comparing against the sourceLower but copying from source so that
// a case insensitive remove preserves the rest of the string's original case
if (sourceLower[srcPos] == removeLower[rmvPos])
{
rmvPos++;
if (rmvPos == rmvLen)
{
// if the whole string has been matched
// reverse dest position back by length of remove string
dstPos -= rmvPos;
rmvPos = 0;
}
}
else
{
rmvPos = 0;
}
// move to next char in source
srcPos++;
}
// return the string
return new string(destChar, 0, dstPos);
}
}
这是基准测试:
internal static class StringRemoveTests
{
private static string CreateString()
{
string x = "xxxxxxxxxxxxxxxxxxxx";
string y = "GoodBye";
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 1000000; i++)
sb.Append(i % 3 == 0 ? y : x);
return sb.ToString();
}
private static int RunBenchMarkUnsafe()
{
string str = CreateString();
DateTime start = DateTime.Now;
string str2 = str.RemoveUnsafe("goodBYE");
DateTime end = DateTime.Now;
return (int)(end - start).TotalMilliseconds;
}
private static int RunBenchMarkSafe()
{
string str = CreateString();
DateTime start = DateTime.Now;
string str2 = str.RemoveSafe("goodBYE");
DateTime end = DateTime.Now;
return (int)(end - start).TotalMilliseconds;
}
public static void RunBenchmarks()
{
Console.WriteLine("Safe version: " + RunBenchMarkSafe());
Console.WriteLine("Unsafe version: " + RunBenchMarkUnsafe());
}
}
class Program
{
static void Main(string[] args)
{
StringRemoveTests.RunBenchmarks();
Console.ReadLine();
}
}
输出:(结果以毫秒为单位)
// 1st run
Safe version: 569
Unsafe version: 260
// 2nd run
Safe version: 709
Unsafe version: 329
// 3rd run
Safe version: 486
Unsafe version: 279
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.