[英]Javascript - string.split(regex) keep separators
我想使用正則表達式拆分字符串,並在結果數組中包含分隔符/匹配信息。
在java中我用過:
theString.split("(?<=[!><=}{])|(?=[!><=}{])|(?<= AND )|(?= AND )|(?<= OR )|(?= OR )")
但是,javascript不支持lookbehind ?<=
例如,我想要字符串:
"Reason={Existing problem or fault}{Bestaande probleem of vout}{Other}{Ander} and Required!=No and Results >=10 and Results <=25 and Tst>5 and Tst<80 and Info=test this or that and those and Success!=Yes"
分開:
Reason,=,{,Existing problem, or ,fault,},{,Bestaande probleem of vout,},{,Other,},{,Ander,}, and ,Required,!,=,No, and ,Results,>,=,10, and ,Results,<,=,25, and ,Tst,>,5, and ,Tst,<,80, and ,Info,=,test this, or ,that, and ,those, and ,Success,!,=,Yes
我得到的例子:
var thestr = "Reason={Existing problem or fault}{Bestaande probleem of vout}{Other}{Ander} and Required!=No and Results >=10 and Results <=25 and Tst>5 and Tst<80 and Info=test this or that and those and Success!=Yes";
document.write("::SPLIT::<br>");
var patt1=new RegExp(/([!><=}{])|( AND )|( OR ) /gi);
var x = thestr.split(patt1);
//This splits correctly but, doesn't include the separators / matched characters
document.write("length="+x.length+"<br>");
for (c=0;c<x.length;c++) {
document.write(c+" - "+ x[c]+" |");
}
document.write("<br><br>::MATCH::<br>");
var y = thestr.match(patt1);
//This shows the matched characters but, how do I combine info from split and match
document.write("length="+y.length+"<br>");
for (d=0;d<y.length;d++) {
document.write(d+" - "+ y[d]+" |");
}
document.write("<br><br>::INCLUDE SEPERATORS::<br>");
var patt2=new RegExp(/(?![!><=}{])|(?=[!><=}{])|(?! AND )|(?= AND )|(?! OR )|(?= OR ) /gi);
//This puts everything in the array, but, each character is a seperate array element.
// Not what I wanted to achieve.
var bits = thestr.split(patt2);
document.write("length="+bits.length+"<br>");
for (r=0;r<bits.length;r++) {
document.write(r+" - "+ bits[r]+" |");
}
如果將整個模式放在一個組中,您還將獲得分隔符:
thestr.split(/([!><=}{]| (?:AND|OR) )/)
這將返回一個數組,如:
["Reason", "=", "", "{", "Existing problem or fault", "}", "", "{", "Bestaande probleem of vout", "}", "", "{", "Other", "}", "", "{", "Ander", "}", " and Required", "!", "", "=", "No and Results ", ">", "", "=", "10 and Results ", "<", "", "=", "25 and Tst", ">", "5 and Tst", "<", "80 and Info", "=", "test this or that and those and Success", "!", "", "=", "Yes"]
然后你只需要過濾空字符串就可以了:
thestr.split(/([!><=}{]| (?:AND|OR) )/).filter(Boolean)
編輯由於Internet Explorer和可能的其他瀏覽器不將分組分隔符放入結果數組中,您可以這樣做:
var matches = thestr.split(/(?:[!><=}{]| (?:AND|OR) )/),
separators = thestr.match(/(?:[!><=}{]| (?:AND|OR) )/g);
for (var i=0; i<separators.length; ++i) {
matches[i+1] = separators[i];
}
這基本上將分離器與其他部分分開,然后將兩者結合起來。
沒有深入到您的查詢結構,我建議您使用replace
方法與函數作為替換,將術語收集到數組中:
function parse(sQuery) {
var aParsed = [];
var oReTerms = /.../gim;
sQuery.replace(oReTerms, function($0, $1, $2, ...) {
//...
if ($1) {
aParsed.append($1);
}
if ($2) {
aParsed.append($2);
}
//...
return $0; // return what was matched (or any string)
});
return aParsed;
}
我之前做過這個來解析HTML標簽和屬性。 我希望這個想法很清楚。 您只需要定義正則表達式,以便它匹配查詢中的所有術語。
對於特定情況,您可以在替換功能中進行另一次替換。
Gumbo上面的拆分功能是一個好主意,但它不起作用。 它應該是:
function split(str, regex) {
var matches = str.split(regex),
separators = str.match(regex),
ret = [ matches[0] ];
if (!separators) return ret;
for (var i = 0; i < separators.length; ++i) {
ret[2 * i + 1] = separators[i];
ret[2 * i + 2] = matches[i + 1];
}
return ret;
}
split('a,b,c', /,/g); // returns ["a", ",", "b", ",", "c"]
如果正則表達式拆分包含捕獲組,我不確定JavaScript的行為方式。 我知道在Python中,如果將分裂分隔符包含在捕獲括號中,則分割分隔符將成為匹配的一部分。
嘗試
result = subject.split(/( or )|( and )|([^\w\s])\b|(?=[^\w\s])/i);
看看會發生什么。
function split2(str, re) {
if (re.global) {
// Reset to start of string
re.lastIndex = 0;
}
var result = [];
var match = re.exec(str);
var lastEnd = 0;
while (match != null) {
if (match.index > lastEnd) {
result.push(str.substring(lastEnd, match.index));
}
result.push(match[0]);
lastEnd = match.index + match[0].length;
match = re.exec(str);
}
result.push(str.substring(lastEnd));
return result;
}
var thestr = "Reason={Existing problem or fault}{Bestaande probleem of vout}{Other}{Ander} and Required!=No and Results >=10 and Results <=25 and Tst>5 and Tst<80 and Info=test this or that and those and Success!=Yes";
var patt = /[!><=}{]| AND | OR /gi;
split2(thestr,patt):
輸出:
["Reason", "=", "{", "Existing problem", " or ", "fault", "}", "{",
"Bestaande probleem of vout", "}", "{", "Other", "}", "{", "Ander", "}", " and ",
"Required", "!", "=", "No", " and ", "Results ", ">", "=", "10", " and ",
"Results ", "<", "=", "25", " and ", "Tst", ">", "5", " and ", "Tst", "<", "80",
" and ", "Info", "=", "test this", " or ", "that", " and ", "those", " and ",
"Success", "!", "=", "Yes"]
要支持大多數正在使用的瀏覽器,您可以匹配字符串
此模式匹配除分隔符,<>!{} =或其中一個分隔符之外的任意數量的字符。
var rx=/([^<>!{}=]+|[<>!{}=])/g
var str='Reason={Existing problem or fault}{Bestaande probleem of vout}'+
'{Other}{Ander} and Required!=No and Results >=10 and Results <=25 '+
'and Tst>5 and Tst<80 and Info=test this or that and those and Success!=Yes';
str.match(rx).join('\n')
//returned value:
Reason
=
{
Existing problem or fault
}
{
Bestaande probleem of vout
}
{
Other
}
{
Ander
}
and Required
!
=
No and Results
>
=
10 and Results
<
=
25 and Tst
>
5 and Tst
<
80 and Info
=
test this or that and those and Success
!
=
Yes
//我連接了字符串並加入了結果以便於閱讀
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.