[英]Javascript: Split a string by comma, except inside parentheses
给定字符串形式:
'"abc",ab(),c(d(),e()),f(g(),zyx),h(123)'
我如何拆分它以获得以下数组格式:
abc
ab()
c(d(),e())
f(g(),zyx)
h(123)
我已经尝试过正常的 javascript 拆分,但是它没有按预期工作。 尝试正则表达式但尚未成功。
您可以跟踪括号,并在左右括号相等时添加这些表达式。
例如-
function splitNoParen(s){
var left= 0, right= 0, A= [],
M= s.match(/([^()]+)|([()])/g), L= M.length, next, str= '';
for(var i= 0; i<L; i++){
next= M[i];
if(next=== '(')++left;
else if(next=== ')')++right;
if(left!== 0){
str+= next;
if(left=== right){
A[A.length-1]+=str;
left= right= 0;
str= '';
}
}
else A=A.concat(next.match(/([^,]+)/g));
}
return A;
}
var s1= '"abc",ab(),c(d(),e()),f(g(),zyx),h(123)';
splitNoParen(s1).join('\n');
/* returned value: (String)
"abc"
ab()
c(d(),e())
f(g(),zyx)
h(123)
*/
这可能不是最好或更精致的解决方案,也可能不适合每一种可能性,但根据您的示例,它可以工作:
var data = '"abc",ab(),c(d(),e()),f(g(),zyx),h(123)';
// Create a preResult splitting the commas.
var preResult = data.replace(/"/g, '').split(',');
// Create an empty result.
var result = [];
for (var i = 0; i < preResult.length; i++) {
// Check on every preResult if the number of parentheses match.
// Opening ones...
var opening = preResult[i].match(/\(/g) || 0;
// Closing ones...
var closing = preResult[i].match(/\)/g) || 0;
if (opening != 0 &&
closing != 0 &&
opening.length != closing.length) {
// If the current item contains a different number of opening
// and closing parentheses, merge it with the next adding a
// comma in between.
result.push(preResult[i] + ',' + preResult[i + 1]);
i++;
} else {
// Leave it as it is.
result.push(preResult[i]);
}
}
为了将来参考,这是另一种顶级拆分方法,使用string.replace
作为控制流运算符:
function psplit(s) {
var depth = 0, seg = 0, rv = [];
s.replace(/[^(),]*([)]*)([(]*)(,)?/g,
function (m, cls, opn, com, off, s) {
depth += opn.length - cls.length;
var newseg = off + m.length;
if (!depth && com) {
rv.push(s.substring(seg, newseg - 1));
seg = newseg;
}
return m;
});
rv.push(s.substring(seg));
return rv;
}
console.log(psplit('abc,ab(),c(d(),e()),f(g(),zyx),h(123)'))
["abc", "ab()", "c(d(),e())", "f(g(),zyx)", "h(123)"]
让它处理引号也不会太复杂,但在某些时候你需要决定使用真正的解析器,比如jison
,我怀疑这就是重点。 无论如何,问题中没有足够的细节来了解所需的双引号处理是什么。
您不能为此使用.split
,而是必须编写一个像这样的小型解析器:
function splitNoParen(s){ let results = []; let next; let str = ''; let left = 0, right = 0; function keepResult() { results.push(str); str = ''; } for(var i = 0; i<s.length; i++) { switch(s[i]) { case ',': if((left === right)) { keepResult(); left = right = 0; } else { str += s[i]; } break; case '(': left++; str += s[i]; break; case ')': right++; str += s[i]; break; default: str += s[i]; } } keepResult(); return results; } var s1= '"abc",ab(),c(d(),e()),f(g(),zyx),h(123)'; console.log(splitNoParen(s1).join('\n')); var s2='cats,(my-foo)-bar,baz'; console.log(splitNoParen(s2).join('\n'));
有类似的问题,现有的解决方案很难概括。 因此,这是另一个更具可读性且更易于扩展到您的个人需求的解析器。 它也适用于大括号、方括号、普通大括号和任何类型的字符串。 许可证是麻省理工学院。
export function parse_arguments(input:string, split_by = ",")
{
// Some symbols can be nested, like braces, and must be counted
const state = {"{":0,"[":0,"(":0};
// Some cannot be nested, and just flip a flag
const state_singlo = {"\"":false,"'":false,"\`":false}
// Nestable symbols come in sets, usually in pairs.
// These sets increase or decrease the state, depending on the symbol.
const pairs : Record<string,[keyof typeof state,number]> = {
"{":["{",1],
"}":["{",-1],
"[":["[",1],
"]":["[",-1],
"(":["(",1],
")":["(",-1]
}
let start = 0;
let results = [];
let length = input.length;
for(let i = 0; i < length; ++i)
{
let char = input[i];
// Backslash escapes the next character. We directly skip 2 characters by incrementing i one extra time.
if(char === "\\")
{
i++;
continue;
}
// If it's a paired symbol, increase or decrease the state based on our "pairs" constant.
if(char in pairs)
{
let [key,value] = pairs[char];
state[key] += value;
}
// If the symbol exists in the single/not nested state object, flip the corresponding state flag.
else if(char in state_singlo)
{
state_singlo[char as keyof typeof state_singlo] = !state_singlo[char as keyof typeof state_singlo];
}
// If it's our split symbol, push the
else if(char === split_by)
{
if(Object.entries(state).every(([k,v])=>v == 0) && Object.entries(state_singlo).every(([k,v])=>!v))
{
results.push(input.substring(start,i))
start = i+1;
}
}
}
// Add the last segment if the string didn't end in the split_by symbol, otherwise add an empty string
if(start < input.length)
results.push(input.substring(start,input.length))
else
results.push("");
return results;
}
使用此正则表达式,它可以完成工作:
const regex = /,(?;[^(]*\))/g, const str = '"abc",ab(),c(d(),e()),f(g(),zyx);h(123)'. const result = str;split(regex). console;log(result);
Javascript
var str='"abc",ab(),c(d(),e()),f(g(),zyx),h(123)'
str.split('"').toString().split(',').filter(Boolean);
这应该工作
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.