关于正则表达式:如何在javascript中拆分带有多个分隔符的字符串?

How do I split a string with multiple separators in javascript?

如何在javascript中使用多个分隔符拆分字符串?我尝试在逗号和空格上拆分,但是,afaik、js的split函数只支持一个分隔符。


传入regexp作为参数:

1
2
js>"Hello awesome, world!".split(/[\s,]+/)
Hello,awesome,world!

编辑添加:

您可以通过选择数组的长度减去1来获取最后一个元素:

1
2
3
4
>>> bits ="Hello awesome, world!".split(/[\s,]+/)
["Hello","awesome","world!"]
>>> bit = bits[bits.length - 1]
"world!"

…如果图案不匹配:

1
2
3
4
>>> bits ="Hello awesome, world!".split(/foo/)
["Hello awesome, world!"]
>>> bits[bits.length - 1]
"Hello awesome, world!"


您可以将regex传递到javascript的split操作符中。例如:

1
2
"1,2 3".split(/,| /)
["1","2","3"]

或者,如果要允许多个分隔符一起仅作为一个分隔符:

1
2
"1, 2, , 3".split(/(?:,| )+/)
["1","2","3"]

(您必须使用非捕获(?:)缩减,因为否则它将重新拼接到结果中。或者你可以像亚伦一样聪明,使用角色类。)

(在Safari+FF中测试的示例)


另一个简单但有效的方法是重复使用split+join。

1
"a=b,c:d".split('=').join(',').split(':').join(',').split(',')

本质上,在连接后进行拆分就像是全局替换,因此这将用逗号替换每个分隔符,然后一旦全部替换,它将对逗号进行最后一次拆分。

上述表达式的结果是:

1
['a', 'b', 'c', 'd']

在此基础上展开,您还可以将其置于函数中:

1
2
3
4
5
6
7
8
function splitMulti(str, tokens){
        var tempChar = tokens[0]; // We can use the first token as a temporary join character
        for(var i = 1; i < tokens.length; i++){
            str = str.split(tokens[i]).join(tempChar);
        }
        str = str.split(tempChar);
        return str;
}

用途:

1
splitMulti('a=b,c:d', ['=', ',', ':']) // ["a","b","c","d"]

如果您经常使用这个功能,为了方便起见,考虑包装String.prototype.split甚至是值得的(我认为我的函数是相当安全的-唯一考虑的是条件(次要)的额外开销,以及如果传递数组,它缺少限制参数的实现这一事实)。

如果使用下面的方法简单地包装它,请确保包含splitMulti函数:)。同样值得注意的是,有些人不喜欢扩展内置组件(因为很多人做得不对,可能会发生冲突),所以如果有疑问,在使用此插件之前请与更高级别的人联系,或者这样问:)

1
2
3
4
5
6
7
8
9
    var splitOrig = String.prototype.split; // Maintain a reference to inbuilt fn
    String.prototype.split = function (){
        if(arguments[0].length > 0){
            if(Object.prototype.toString.call(arguments[0]) =="[object Array]" ) { // Check if our separator is an array
                return splitMulti(this, arguments[0]);  // Call splitMulti
            }
        }
        return splitOrig.apply(this, arguments); // Call original split maintaining context
    };

用途:

1
2
3
4
5
var a ="a=b,c:d";
    a.split(['=', ',', ':']); // ["a","b","c","d"]

// Test to check that the built-in split still works (although our wrapper wouldn't work if it didn't as it depends on it :P)
        a.split('='); // ["a","b,c:d"]

享受!


让我们保持简单:(在regex中添加"[]+"表示"1或更多")。

这意味着"+"和"1,"是相同的。

1
2
3
var words = text.split(/[ .:;?!~,`"&|()<>{}\[\]

/\\]+/); // note ' and - are kept


棘手的方法:

1
2
3
var s ="dasdnk asd, (naks) :d skldma";
var a = s.replace('(',' ').replace(')',' ').replace(',',' ').split(' ');
console.log(a);//["dasdnk","asd","naks",":d","skldma"]


对于那些希望在拆分函数中进行更多自定义的人,我编写了一个递归算法,用要拆分的字符列表拆分给定的字符串。我在看到上面的帖子之前就写了这个。我希望它能帮助一些受挫的程序员。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
splitString = function(string, splitters) {
    var list = [string];
    for(var i=0, len=splitters.length; i<len; i++) {
        traverseList(list, splitters[i], 0);
    }
    return flatten(list);
}

traverseList = function(list, splitter, index) {
    if(list[index]) {
        if((list.constructor !== String) && (list[index].constructor === String))
            (list[index] != list[index].split(splitter)) ? list[index] = list[index].split(splitter) : null;
        (list[index].constructor === Array) ? traverseList(list[index], splitter, 0) : null;
        (list.constructor === Array) ? traverseList(list, splitter, index+1) : null;    
    }
}

flatten = function(arr) {
    return arr.reduce(function(acc, val) {
        return acc.concat(val.constructor === Array ? flatten(val) : val);
    },[]);
}

var stringToSplit ="people and_other/things";
var splitList = ["","_","/"];
splitString(stringToSplit, splitList);

上面的示例返回:["people","and","other","things"]

注:flatten功能取自rosetta代码


您可以将所有要用作分隔符的字符单独地或共同地合并到正则表达式中,并将它们传递给split函数。例如,你可以写:

1
console.log("dasdnk asd, (naks) :d skldma".split(/[ \(,\)]+/) );

输出为:

1
["dasdnk","asd","naks",":d","skldma"]


嗨,例如,如果在字符串07:05:45pm中拆分和替换

1
var hour = time.replace("PM","").split(":");

结果

1
[ '07', '05', '45' ]

也许您应该进行某种类型的字符串替换,将一个分隔符转换为另一个分隔符,这样在拆分中就只有一个分隔符需要处理。


An easy way to do this is to process each character of the string with each delimiter and build an array of the splits:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
splix = function ()
{
  u = [].slice.call(arguments); v = u.slice(1); u = u[0]; w = [u]; x = 0;

  for (i = 0; i < u.length; ++i)
  {
    for (j = 0; j < v.length; ++j)
    {
      if (u.slice(i, i + v[j].length) == v[j])
      {
        y = w[x].split(v[j]); w[x] = y[0]; w[++x] = y[1];
      };
    };
  };

  return w;
};

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
console.logg = function ()
{
  document.body.innerHTML +="" + [].slice.call(arguments).join();
}

splix = function() {
  u = [].slice.call(arguments);
  v = u.slice(1);
  u = u[0];
  w = [u];
  x = 0;
  console.logg("Processing: <wyn>" + JSON.stringify(w) +"</wyn>");

  for (i = 0; i < u.length; ++i) {
    for (j = 0; j < v.length; ++j) {
      console.logg("Processing: <wyn>[\x22" + u.slice(i, i + v[j].length) +"\x22, \x22" + v[j] +"\x22]</wyn>");
      if (u.slice(i, i + v[j].length) == v[j]) {
        y = w[x].split(v[j]);
        w[x] = y[0];
        w[++x] = y[1];
        console.logg("Currently processed:" + JSON.stringify(w) +"
"
);
      };
    };
  };

  console.logg("Return: <wyn>" + JSON.stringify(w) +"</wyn>");
};

setTimeout(function() {
  console.clear();
  splix("1.23--4",".","--");
}, 250);

1
2
3
@import url("http://fonts.googleapis.com/css?family=Roboto");

body {font: 20px Roboto;}

Usage: splix(string, delimiters...)

Example: splix("1.23--4",".","--")

Returns: ["1","23","4"]


从@stephen sweriduk解决方案开始(我更感兴趣!),我稍微修改了一下,使其更通用和可重用:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
/**
 * Adapted from: http://stackoverflow.com/questions/650022/how-do-i-split-a-string-with-multiple-separators-in-javascript
*/

var StringUtils = {

  /**
   * Flatten a list of strings
   * http://rosettacode.org/wiki/Flatten_a_list
   */

  flatten : function(arr) {
    var self=this;
    return arr.reduce(function(acc, val) {
        return acc.concat(val.constructor === Array ? self.flatten(val) : val);
    },[]);
  },

  /**
   * Recursively Traverse a list and apply a function to each item
   * @param list array
   * @param expression Expression to use in func
   * @param func function of (item,expression) to apply expression to item
   *
   */

  traverseListFunc : function(list, expression, index, func) {
    var self=this;
    if(list[index]) {
        if((list.constructor !== String) && (list[index].constructor === String))
            (list[index] != func(list[index], expression)) ? list[index] = func(list[index], expression) : null;
        (list[index].constructor === Array) ? self.traverseListFunc(list[index], expression, 0, func) : null;
        (list.constructor === Array) ? self.traverseListFunc(list, expression, index+1, func) : null;
    }
  },

  /**
   * Recursively map function to string
   * @param string
   * @param expression Expression to apply to func
   * @param function of (item, expressions[i])
   */

  mapFuncToString : function(string, expressions, func) {
    var self=this;
    var list = [string];
    for(var i=0, len=expressions.length; i<len; i++) {
        self.traverseListFunc(list, expressions[i], 0, func);
    }
    return self.flatten(list);
  },

  /**
   * Split a string
   * @param splitters Array of characters to apply the split
   */

  splitString : function(string, splitters) {
    return this.mapFuncToString(string, splitters, function(item, expression) {
      return item.split(expression);
    })
  },

}

然后

1
2
3
4
var stringToSplit ="people and_other/things";
var splitList = ["","_","/"];
var splittedString=StringUtils.splitString(stringToSplit, splitList);
console.log(splitList, stringToSplit, splittedString);

作为原版:

1
[ ' ', '_', '/' ] 'people and_other/things' [ 'people', 'and', 'other', 'things' ]


1
2
3
4
5
a ="a=b,c:d"

array = ['=',',',':'];

for(i=0; i< array.length; i++){ a= a.split(array[i]).join(); }

这将返回没有特殊字符的字符串。


我认为,如果你指定你想离开的东西,而不是你想删除的东西,会更容易些。

就像你只想用英语单词一样,你可以这样使用:

1
text.match(/[a-z'\-]+/gi);

示例(运行代码段):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
var R=[/[a-z'\-]+/gi,/[a-z'\-\s]+/gi];
var s=document.getElementById('s');
for(var i=0;i<R.length;i++)
 {
  var o=document.createElement('option');
  o.innerText=R[i]+'';
  o.value=i;
  s.appendChild(o);
 }
var t=document.getElementById('t');
var r=document.getElementById('r');

s.onchange=function()
 {
  r.innerHTML='';
  var x=s.value;
  if((x>=0)&&(x<R.length))
   x=t.value.match(R[x]);
  for(i=0;i<x.length;i++)
   {
    var li=document.createElement('li');
    li.innerText=x[i];
    r.appendChild(li);
   }
 }

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
<textarea id="t" style="width:70%;height:12em">even, test; spider-man

But saying o'er what I have said before:
My child is yet a stranger in the world;
She hath not seen the change of fourteen years,
Let two more summers wither in their pride,
Ere we may think her ripe to be a bride.

—Shakespeare, William. The Tragedy of Romeo and Juliet</textarea>

<p>
<select id="s">
 <option selected>Select a regular expression</option>
 <!-- option value="1">/[a-z'
\-]+/gi</option>
 <option value="2">/[a-z'\-\s]+/gi</option -->
</select>
</p>


我不知道regex的性能,但这里有另一种方法可以替代regex,它利用本地散列集并在o(max(str.length,delimeter.length))复杂性中工作:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
var multiSplit = function(str,delimiter){
    if (!(delimiter instanceof Array))
        return str.split(delimiter);
    if (!delimiter || delimiter.length == 0)
        return [str];
    var hashSet = new Set(delimiter);
    if (hashSet.has(""))
        return str.split("");
    var lastIndex = 0;
    var result = [];
    for(var i = 0;i<str.length;i++){
        if (hashSet.has(str[i])){
            result.push(str.substring(lastIndex,i));
            lastIndex = i+1;
        }
    }
    result.push(str.substring(lastIndex));
    return result;
}

multiSplit('1,2,3.4.5.6 7 8 9',[',','.',' ']);
// Output: ["1","2","3","4","5","6","7","8","9"]

multiSplit('1,2,3.4.5.6 7 8 9',' ');
// Output: ["1,2,3.4.5.6","7","8","9"]


我发现我需要这样做的一个主要原因是分割/\上的文件路径。这是一个有点棘手的regex,所以我会在这里发布它以供参考:

1
var splitFilePath = filePath.split(/[\/\\]/);


不是最好的方法,但可以使用多个不同的分隔符/分隔符进行拆分

HTML

1
2
3
<button onclick="myFunction()">Split with Multiple and Different seperators/delimiters</button>
<p id="demo">
</p>

javascript

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
function myFunction() {

var str ="How : are | you doing : today?";
var res = str.split(' | ');

var str2 = '';
var i;
for (i = 0; i < res.length; i++) {
    str2 += res[i];

    if (i != res.length-1) {
      str2 +=",";
    }
}
var res2 = str2.split(' : ');

//you can add countless options (with or without space)

document.getElementById("demo").innerHTML = res2;


我使用regexp:

1
2
3
4
5
str =  'Write a program that extracts from a given text all palindromes, e.g."ABBA","lamal","exe".';

var strNew = str.match(/\w+/g);

// Output: ["Write","a","program","that","extracts","from","a","given","text","all","palindromes","e","g","ABBA","lamal","exe"]