简体   繁体   English

Javascript - string.split(正则表达式)保留分隔符

[英]Javascript - string.split(regex) keep separators

I would like to split a string using a regular expression and have the separators / matched info included in the resulting array. 我想使用正则表达式拆分字符串,并在结果数组中包含分隔符/匹配信息。

In java I used: 在java中我用过:

theString.split("(?<=[!><=}{])|(?=[!><=}{])|(?<= AND )|(?= AND )|(?<= OR )|(?= OR )")

But, javascript doesn't support lookbehind ?<= 但是,javascript不支持lookbehind ?<=

For example I want string: 例如,我想要字符串:

"Reason={Existing problem or fault}{Bestaande probleem of vout}{Other}{Ander} and Required!=No and Results >=10 and Results <=25 and Tst>5 and Tst<80 and Info=test this or that and those and Success!=Yes"

To split: 分开:

Reason,=,{,Existing problem, or ,fault,},{,Bestaande probleem of vout,},{,Other,},{,Ander,}, and ,Required,!,=,No, and ,Results,>,=,10, and ,Results,<,=,25, and ,Tst,>,5, and ,Tst,<,80, and ,Info,=,test this, or ,that, and ,those, and ,Success,!,=,Yes

Example of what I've got: 我得到的例子:

var thestr = "Reason={Existing problem or fault}{Bestaande probleem of vout}{Other}{Ander} and Required!=No and Results >=10 and Results <=25 and Tst>5 and Tst<80 and Info=test this or that and those and Success!=Yes";

document.write("::SPLIT::<br>");
var patt1=new RegExp(/([!><=}{])|( AND )|( OR ) /gi);

var x = thestr.split(patt1);
//This splits correctly but, doesn't include the separators / matched characters
document.write("length="+x.length+"<br>");
for (c=0;c<x.length;c++) {
    document.write(c+" - "+ x[c]+" |");
}

document.write("<br><br>::MATCH::<br>");

var y = thestr.match(patt1);

//This shows the matched characters but, how do I combine info from split and match
document.write("length="+y.length+"<br>");
for (d=0;d<y.length;d++) {
    document.write(d+" - "+ y[d]+" |");
}

document.write("<br><br>::INCLUDE SEPERATORS::<br>");
var patt2=new RegExp(/(?![!><=}{])|(?=[!><=}{])|(?! AND )|(?= AND )|(?! OR )|(?= OR ) /gi);
//This puts everything in the array, but, each character is a seperate array element.
// Not what I wanted to achieve.
var bits = thestr.split(patt2);
document.write("length="+bits.length+"<br>");
for (r=0;r<bits.length;r++) {
    document.write(r+" - "+ bits[r]+" |");
}

If you put the whole pattern in a group, you will also get the separators: 如果将整个模式放在一个组中,您还将获得分隔符:

thestr.split(/([!><=}{]| (?:AND|OR) )/)

This returns an array like: 这将返回一个数组,如:

["Reason", "=", "", "{", "Existing problem or fault", "}", "", "{", "Bestaande probleem of vout", "}", "", "{", "Other", "}", "", "{", "Ander", "}", " and Required", "!", "", "=", "No and Results ", ">", "", "=", "10 and Results ", "<", "", "=", "25 and Tst", ">", "5 and Tst", "<", "80 and Info", "=", "test this or that and those and Success", "!", "", "=", "Yes"]

Then you just need to filter the empty strings and you're done: 然后你只需要过滤空字符串就可以了:

thestr.split(/([!><=}{]| (?:AND|OR) )/).filter(Boolean)

Edit Since Internet Explorer and possibly other browsers do not take a grouped separator into the result array, you could do this instead: 编辑由于Internet Explorer和可能的其他浏览器不将分组分隔符放入结果数组中,您可以这样做:

var matches = thestr.split(/(?:[!><=}{]| (?:AND|OR) )/),
    separators = thestr.match(/(?:[!><=}{]| (?:AND|OR) )/g);
for (var i=0; i<separators.length; ++i) {
    matches[i+1] = separators[i];
}

This basically separates the separators from the other parts and then combines both. 这基本上将分离器与其他部分分开,然后将两者结合起来。

Not getting too deep into your query structure, I would suggest you to use replace method with a function as replacement which would collect the terms into an array: 没有深入到您的查询结构,我建议您使用replace方法与函数作为替换,将术语收集到数组中:

function parse(sQuery) {
    var aParsed = [];
    var oReTerms = /.../gim;
    sQuery.replace(oReTerms, function($0, $1, $2, ...) {
        //...
        if ($1) {
            aParsed.append($1);
        }
        if ($2) {
            aParsed.append($2);
        }
        //...
        return $0; // return what was matched (or any string)
    });
    return aParsed;
}

I did this previously to parse HTML tags and attributes. 我之前做过这个来解析HTML标签和属性。 I hope the idea is clear. 我希望这个想法很清楚。 You just need to define your regular expression so that it matches all terms in the query. 您只需要定义正则表达式,以便它匹配查询中的所有术语。

And you can have another replacing within the replacement function for specific cases. 对于特定情况,您可以在替换功能中进行另一次替换。

Gumbo's split function above is a good idea but it doesn't work. Gumbo上面的拆分功能是一个好主意,但它不起作用。 It should be: 它应该是:

function split(str, regex) {
    var matches    = str.split(regex),
        separators = str.match(regex),
        ret        = [ matches[0] ];
    if (!separators) return ret;
    for (var i = 0; i < separators.length; ++i) {
        ret[2 * i + 1] = separators[i];
        ret[2 * i + 2] = matches[i + 1];
    }
    return ret;
}

split('a,b,c', /,/g); // returns ["a", ",", "b", ",", "c"]

I'm not sure about how JavaScript behaves if a regex split contains a capturing group. 如果正则表达式拆分包含捕获组,我不确定JavaScript的行为方式。 I know that in Python, a splitting delimiter becomes part of the match if it is enclosed in capturing parentheses. 我知道在Python中,如果将分裂分隔符包含在捕获括号中,则分割分隔符将成为匹配的一部分。

Try 尝试

result = subject.split(/( or )|( and )|([^\w\s])\b|(?=[^\w\s])/i);

and see what happens. 看看会发生什么。

function split2(str, re) {
    if (re.global) {
        // Reset to start of string
        re.lastIndex = 0;
    }
    var result = [];
    var match = re.exec(str);
    var lastEnd = 0;
    while (match != null) {
        if (match.index > lastEnd) {
            result.push(str.substring(lastEnd, match.index));
        }
        result.push(match[0]);
        lastEnd = match.index + match[0].length;
        match = re.exec(str);
    }
    result.push(str.substring(lastEnd));
    return result;
}

var thestr = "Reason={Existing problem or fault}{Bestaande probleem of vout}{Other}{Ander} and Required!=No and Results >=10 and Results <=25 and Tst>5 and Tst<80 and Info=test this or that and those and Success!=Yes";

var patt = /[!><=}{]| AND | OR /gi;

split2(thestr,patt):

Output: 输出:

["Reason", "=", "{", "Existing problem", " or ", "fault", "}", "{",
"Bestaande probleem of vout", "}", "{", "Other", "}", "{", "Ander", "}", " and ",
"Required", "!", "=", "No", " and ", "Results ", ">", "=", "10", " and ",
"Results ", "<", "=", "25", " and ", "Tst", ">", "5", " and ", "Tst", "<", "80",
" and ", "Info", "=", "test this", " or ", "that", " and ", "those", " and ",
"Success", "!", "=", "Yes"]

To support most of the browsers in use, you can match your strings 要支持大多数正在使用的浏览器,您可以匹配字符串

this pattern matches any number of characters except the separators, <>!{}=, or one of the separators. 此模式匹配除分隔符,<>!{} =或其中一个分隔符之外的任意数量的字符。

var rx=/([^<>!{}=]+|[<>!{}=])/g

var str='Reason={Existing problem or fault}{Bestaande probleem of vout}'+
'{Other}{Ander} and Required!=No and Results >=10 and Results <=25 '+
'and Tst>5 and Tst<80 and Info=test this or that and those and Success!=Yes';


str.match(rx).join('\n')

//returned value:
Reason
=
{
Existing problem or fault
}
{
Bestaande probleem of vout
}
{
Other
}
{
Ander
}
 and Required
!
=
No and Results 
>
=
10 and Results 
<
=
25 and Tst
>
5 and Tst
<
80 and Info
=
test this or that and those and Success
!
=
Yes

// I concatenated the string and joined the result for readability //我连接了字符串并加入了结果以便于阅读

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM