繁体   English   中英

Javascript:用逗号分隔字符串,括号内除外

[英]Javascript: Split a string by comma, except inside parentheses

给定字符串形式:

'"abc",ab(),c(d(),e()),f(g(),zyx),h(123)'

我如何拆分它以获得以下数组格式:

abc
ab()
c(d(),e())
f(g(),zyx)
h(123)

我已经尝试过正常的 javascript 拆分,但是它没有按预期工作。 尝试正则表达式但尚未成功。

您可以跟踪括号,并在左右括号相等时添加这些表达式。

例如-

function splitNoParen(s){
    var left= 0, right= 0, A= [], 
    M= s.match(/([^()]+)|([()])/g), L= M.length, next, str= '';
    for(var i= 0; i<L; i++){
        next= M[i];
        if(next=== '(')++left;
        else if(next=== ')')++right;
        if(left!== 0){
            str+= next;
            if(left=== right){
                A[A.length-1]+=str;
                left= right= 0;
                str= '';
            }
        }
        else A=A.concat(next.match(/([^,]+)/g));
    }
    return A;
}

var s1= '"abc",ab(),c(d(),e()),f(g(),zyx),h(123)';
splitNoParen(s1).join('\n');

/*  returned value: (String)
"abc"
ab()
c(d(),e())
f(g(),zyx)
h(123)
*/

这可能不是最好或更精致的解决方案,也可能不适合每一种可能性,但根据您的示例,它可以工作:

var data = '"abc",ab(),c(d(),e()),f(g(),zyx),h(123)';
// Create a preResult splitting the commas.
var preResult = data.replace(/"/g, '').split(',');
// Create an empty result.
var result = [];

for (var i = 0; i < preResult.length; i++) {
    // Check on every preResult if the number of parentheses match.
    // Opening ones...
    var opening = preResult[i].match(/\(/g) || 0;
    // Closing ones...
    var closing = preResult[i].match(/\)/g) || 0;

    if (opening != 0 &&
        closing != 0 &&
        opening.length != closing.length) {
        // If the current item contains a different number of opening
        // and closing parentheses, merge it with the next adding a 
        // comma in between.
        result.push(preResult[i] + ',' + preResult[i + 1]);
        i++;
    } else {
        // Leave it as it is.
        result.push(preResult[i]);
    }
}

演示

为了将来参考,这是另一种顶级拆分方法,使用string.replace作为控制流运算符:

function psplit(s) {
  var depth = 0, seg = 0, rv = [];
  s.replace(/[^(),]*([)]*)([(]*)(,)?/g,
            function (m, cls, opn, com, off, s) {
    depth += opn.length - cls.length;
    var newseg = off + m.length;
    if (!depth && com) {
      rv.push(s.substring(seg, newseg - 1));
      seg = newseg;
    }
    return m;
  });
  rv.push(s.substring(seg));
  return rv;
}

console.log(psplit('abc,ab(),c(d(),e()),f(g(),zyx),h(123)'))

["abc", "ab()", "c(d(),e())", "f(g(),zyx)", "h(123)"]

让它处理引号也不会太复杂,但在某些时候你需要决定使用真正的解析器,比如jison ,我怀疑这就是重点。 无论如何,问题中没有足够的细节来了解所需的双引号处理是什么。

您不能为此使用.split ,而是必须编写一个像这样的小型解析器:

 function splitNoParen(s){ let results = []; let next; let str = ''; let left = 0, right = 0; function keepResult() { results.push(str); str = ''; } for(var i = 0; i<s.length; i++) { switch(s[i]) { case ',': if((left === right)) { keepResult(); left = right = 0; } else { str += s[i]; } break; case '(': left++; str += s[i]; break; case ')': right++; str += s[i]; break; default: str += s[i]; } } keepResult(); return results; } var s1= '"abc",ab(),c(d(),e()),f(g(),zyx),h(123)'; console.log(splitNoParen(s1).join('\n')); var s2='cats,(my-foo)-bar,baz'; console.log(splitNoParen(s2).join('\n'));

有类似的问题,现有的解决方案很难概括。 因此,这是另一个更具可读性且更易于扩展到您的个人需求的解析器。 它也适用于大括号、方括号、普通大括号和任何类型的字符串。 许可证是麻省理工学院。

export function parse_arguments(input:string, split_by = ",")
{
    // Some symbols can be nested, like braces, and must be counted
    const state = {"{":0,"[":0,"(":0};
    // Some cannot be nested, and just flip a flag
    const state_singlo = {"\"":false,"'":false,"\`":false}

    // Nestable symbols come in sets, usually in pairs.
    // These sets increase or decrease the state, depending on the symbol.
    const pairs : Record<string,[keyof typeof state,number]> = {
        "{":["{",1],
        "}":["{",-1],
        "[":["[",1],
        "]":["[",-1],
        "(":["(",1],
        ")":["(",-1]
    }

    let start = 0;
    let results = [];
    let length = input.length;
    for(let i = 0; i < length; ++i)
    {
        let char = input[i];

        // Backslash escapes the next character. We directly skip 2 characters by incrementing i one extra time.
        if(char === "\\")
        {
            i++;
            continue;
        }
        // If it's a paired symbol, increase or decrease the state based on our "pairs" constant.
        if(char in pairs)
        {
            let [key,value] = pairs[char];
            state[key] += value;
        }
        // If the symbol exists in the single/not nested state object, flip the corresponding state flag.
        else if(char in state_singlo)
        {
            state_singlo[char as keyof typeof state_singlo] = !state_singlo[char as keyof typeof state_singlo];
        }
        // If it's our split symbol, push the 
        else if(char === split_by)
        {
            if(Object.entries(state).every(([k,v])=>v == 0) && Object.entries(state_singlo).every(([k,v])=>!v))
            {
                results.push(input.substring(start,i))
                start = i+1;
            }
        }
    }

    // Add the last segment if the string didn't end in the split_by symbol, otherwise add an empty string
    if(start < input.length)
        results.push(input.substring(start,input.length))
    else
        results.push("");

    return results;
}

使用此正则表达式,它可以完成工作:

 const regex = /,(?;[^(]*\))/g, const str = '"abc",ab(),c(d(),e()),f(g(),zyx);h(123)'. const result = str;split(regex). console;log(result);

Javascript

var str='"abc",ab(),c(d(),e()),f(g(),zyx),h(123)'
str.split('"').toString().split(',').filter(Boolean);

这应该工作

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM