[英]How do I split up a search string to allow for quoted text?
我想从搜索字段的文本中列出字符串。 我想将双引号中的所有内容都分开。
恩。
sample' "string's are, more "text" making" 12.34,hello"pineapple sundays
产生
sample'
string's are, more_ //underscore shown to display space
text
making
12.34
hello
pineapple
sundays
编辑:这是我的(有点)优雅的解决方案,感谢大家的帮助!
Private Function GetSearchTerms(ByVal searchText As String) As String()
'Clean search string of unwanted characters'
searchText = System.Text.RegularExpressions.Regex.Replace(searchText, "[^a-zA-Z0-9""'.,= ]", "")
'Guarantees the first entry will not be an entry in quotes if the searchkeywords starts with double quotes'
Dim searches As String() = searchText.Replace("""", " "" ").Split("""")
Dim myWords As System.Collections.Generic.List(Of String) = New System.Collections.Generic.List(Of String)
Dim delimiters As String() = New String() {" ", ","}
For index As Integer = 0 To searches.Length - 1
'even is regular text, split up into individual search terms'
If (index Mod 2 = 0) Then
myWords.AddRange(searches(index).Split(delimiters, StringSplitOptions.RemoveEmptyEntries))
Else
'check for unclosed double quote, if so, split it up and add, space we added earlier will get split out'
If (searches.Length Mod 2 = 0 And index = searches.Length - 1) Then
myWords.AddRange(searches(index).Split(delimiters, StringSplitOptions.RemoveEmptyEntries))
Else
'2 double quotes found'
'remove the 2 spaces that we added earlier'
Dim myQuotedString As String = searches(index).Substring(1, searches(index).Length - 2)
If (myQuotedString.Length > 0) Then
myWords.Add(myQuotedString)
End If
End If
End If
Next
Return myWords.ToArray()
End Function
Oi,VB评论很丑,有人知道如何清理吗?
这是一个比您完全理解的更为复杂的解析问题。 建议您查看TextFieldParser类和FileHelpers库: http : //www.filehelpers.com/
这不是完整的解决方案,因为它缺少一些验证检查,但它具有您需要的一切。
我的CharOccurs()查找出现的'"'
并将它们按顺序存储到列表中。
public static List<int> CharOccurs(string stringToSearch, char charToFind)
{
List<int> count = new List<int>();
int chr = 0;
while (chr != -1)
{
chr = stringToSearch.IndexOf(charToFind, chr);
if (chr != -1)
{
count.Add(chr);
chr++;
}
else
{
chr = -1;
}
}
return count;
}
下面的代码在很大程度上是说明性的。 我将引号内的字符串作为分隔符,并仅用'"' character
进行拆分,然后对外部引号字符串进行SubString,然后将其拆分为",", space and '"'
字符。 请添加验证检查,以使其通用。
string input = "sample' \"string's are, more \"text\" making\" 12.34,hello\"pineapple sundays";
List<int> positions = CharOccurs(input, '\"');
string within_quotes, outside_quotes;
string[] arr_within_quotes;
List<string> output = new List<string>();
output.AddRange(input.Substring(0, positions[0]-1).Split(new char[] { ' ', ',', '"' }));
if (positions.Count % 2 == 0)
{
within_quotes = input.Substring(positions[0]+1, positions[positions.Count - 1] - positions[0]-1);
arr_within_quotes = within_quotes.Split('"');
output.AddRange(arr_within_quotes);
output.AddRange(input.Substring(positions[positions.Count - 1] + 1).Split(new char[] { ' ', ',' }));
}
else
{
within_quotes = input.Substring(positions[0]+1, positions[positions.Count - 2] - positions[0]-1);
arr_within_quotes = within_quotes.Split('"');
output.AddRange(arr_within_quotes);
output.AddRange(input.Substring(positions[positions.Count - 2] + 1).Split(new char[] { ' ', ',', '"' }));
}
我几个月前为VB.NET编写了这个Parse Line函数,它可能对您有用,它可以确定是否有Text Qualifiers并将基于Text拆分,请尝试在其中将其转换为C#。接下来的几分钟,如果您希望我这样做。
您将有一行文本:
样例的“字符串是,更多的是“文本”制作” 12.34,你好,“菠萝星期日
然后将其作为strLine,并将strDataDelimeters =“,”设置为strTextQualifier =“”“”
希望这可以帮助你。
Public Function ParseLine(ByVal strLine As String, Optional ByVal strDataDelimiter As String = "", Optional ByVal strTextQualifier As String = "", Optional ByVal strQualifierSplitter As Char = vbTab) As String()
Try
Dim strField As String = Nothing
Dim strNewLine As String = Nothing
Dim lngChrPos As Integer = 0
Dim bUseQualifier As Boolean = False
Dim bRemobedLastDel As Boolean = False
Dim bEmptyLast As Boolean = False ' Take into account where the line ends in a field delimiter, the ParseLine function should keep that empty field as well.
Dim strList As String()
'TEST,23479234,Just Right 950g,02/04/2006,1234,5678,9999,0000
'TEST,23479234,Just Right 950g,02/04/2006,1234,5678,9999,0000,
'TEST,23479234,Just Right 950g,02/04/2006,1234,,,0000,
'TEST,23479234,Just Right 950g,02/04/2006,1234,5678,9999,,
'TEST,23479234,"Just Right 950g, BO",02/04/2006,,5678,9999,,
'TEST,23479234,"Just Right"" 950g, BO",02/04/2006,,5678,9999,1111,
'TEST23479234 'Kellogg''s Just Right 950g' 02/04/2006 1234 5678 0000 9999
'TEST23479234 '' 02/04/2006 1234 5678 0000 9999
bUseQualifier = strTextQualifier.Length()
'split data based on options..
If bUseQualifier Then
'replace double qualifiers for ease of parsing..
'strLine = strLine.Replace(New String(strTextQualifier, 2), vbTab)
'loop and find each field..
Do Until strLine = Nothing
If strLine.Substring(0, 1) = strTextQualifier Then
'find closing qualifier
lngChrPos = strLine.IndexOf(strTextQualifier, 1)
'check for missing double qualifiers, unclosed qualifiers
Do Until (strLine.Length() - 1) = lngChrPos OrElse lngChrPos = -1 OrElse _
strLine.Substring(lngChrPos + 1, 1) = strDataDelimiter
lngChrPos = strLine.IndexOf(strTextQualifier, lngChrPos + 1)
Loop
'get field from line..
If lngChrPos = -1 Then
strField = strLine.Substring(1)
strLine = vbNullString
Else
strField = strLine.Substring(1, lngChrPos - 1)
If (strLine.Length() - 1) = lngChrPos Then
strLine = vbNullString
Else
strLine = strLine.Substring(lngChrPos + 2)
If strLine = "" Then
bEmptyLast = True
End If
End If
'strField = String.Format("{0}{1}{2}", strTextQualifier, strField, strTextQualifier)
End If
Else
'find next delimiter..
'lngChrPos = InStr(1, strLine, strDataDelimiter)
lngChrPos = strLine.IndexOf(strDataDelimiter)
'get field from line..
If lngChrPos = -1 Then
strField = strLine
strLine = vbNullString
Else
strField = strLine.Substring(0, lngChrPos)
strLine = strLine.Substring(lngChrPos + 1)
If strLine = "" Then
bEmptyLast = True
End If
End If
End If
' Now replace double qualifiers with a single qualifier in the "corrected" string
strField = strField.Replace(New String(strTextQualifier, 2), strTextQualifier)
'restore double qualifiers..
'strField = IIf(strField = vbNullChar, vbNullString, strField)
'strField = Replace$(strField, vbTab, strTextQualifier)
'strField = IIf(strField = vbTab, vbNullString, strField)
'strField = strField.Replace(vbTab, strTextQualifier)
'save field to array..
strNewLine = String.Format("{0}{1}{2}", strNewLine, strQualifierSplitter, strField)
Loop
If bEmptyLast = True Then
strNewLine = String.Format("{0}{1}", strNewLine, strQualifierSplitter)
End If
'trim off first nullchar..
strNewLine = strNewLine.Substring(1)
'split new line..
strList = strNewLine.Split(strQualifierSplitter)
Else
If strLine.Substring(strLine.Length - 1, 1) = strDataDelimiter Then
strLine = strLine.Substring(0)
End If
'no qualifier.. do a simply split..
strList = strLine.Split(strDataDelimiter)
End If
'return result..
Return strList
Catch ex As Exception
Throw New Exception(String.Format("Error Splitting Special String - {0}", ex.Message.ToString()))
End Try
End Function
如果您想在“”之前显示下划线以表示空格,则可以使用以下命令:
string[] splitString = t.Replace(" \"", "_\"").Split('"');
当您开始添加各种异常时,此类事情的正则表达式会很快变得复杂。
尽管如此,如果出于兴趣和完整性的考虑,比其他任何事情都多:
(?<term>[a-zA-Z0-9'.=]+)|("(?<term>[^"]+)")
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.