关于c ++：使用C ++ 11拆分字符串

Split a string using C++11

用C++ 11分割字符串最容易的方法是什么？

我看过这篇文章使用的方法，但我觉得应该有一种不那么冗长的方法来使用新标准。

编辑：我希望得到一个vector，并且能够在一个字符上进行定界。

std::regex_token_iterator执行基于regex的通用标记化技术。在单个字符上进行简单拆分可能会造成杀伤力过大，也可能不会造成杀伤力过大，但它可以工作，而且不太冗长：

1
2
3
4
5
6
7
8

std::vector<std::string> split(const string& input, const string& regex) {
// passing -1 as the submatch index parameter performs splitting
std::regex re(regex);
std::sregex_token_iterator
first{input.begin(), input.end(), re, -1},
last;
return {first, last};
}

相关讨论

这里有一种(可能不太详细)拆分字符串的方法(基于您提到的文章)。

1
2
3
4
5
6
7
8
9
10
11
12
13

#include <string>
#include <sstream>
#include <vector>
std::vector<std::string> split(const std::string &s, char delim) {
std::stringstream ss(s);
std::string item;
std::vector<std::string> elems;
while (std::getline(ss, item, delim)) {
elems.push_back(item);
// elems.push_back(std::move(item)); // if C++11 (based on comment from @mchiasson)
}
return elems;
}

相关讨论

下面是一个使用boost拆分字符串并用提取的元素填充向量的示例。

1
2
3
4
5
6
7
8
9
10

#include <boost/algorithm/string.hpp>

std::string my_input("A,B,EE");
std::vector<std::string> results;

boost::algorithm::split(results, my_input, is_any_of(","));

assert(results[0] =="A");
assert(results[1] =="B");
assert(results[2] =="EE");

另一个regex解决方案受到其他答案的启发，但希望更短更容易阅读：

1
2
3
4

std::string s{"String to split here, and here, and here,..."};
std::regex regex{R"([\s,]+)"}; // split on space and comma
std::sregex_token_iterator it{s.begin(), s.end(), regex, -1};
std::vector<std::string> words{it, {}};

相关讨论

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

#include <iostream>
#include
#include <vector>
#include <string>

using namespace std;

vector<string> split(const string& str, int delimiter(int) = ::isspace){
vector<string> result;
auto e=str.end();
auto i=str.begin();
while(i!=e){
i=find_if_not(i,e, delimiter);
if(i==e) break;
auto j=find_if(i,e, delimiter);
result.push_back(string(i,j));
i=j;
}
return result;
}

int main(){
string line;
getline(cin,line);
vector<string> result = split(line);
for(auto s: result){
cout<<s<<endl;
}
}

相关讨论

我的选择是boost::tokenizer，但我没有任何繁重的任务，也没有使用大量数据进行测试。带lambda修改的Boost Doc示例：

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

#include <iostream>
#include <boost/tokenizer.hpp>
#include <string>
#include <vector>

int main()
{
using namespace std;
using namespace boost;

string s ="This is, a test";
vector<string> v;
tokenizer<> tok(s);
for_each (tok.begin(), tok.end(), [&v](const string & s) { v.push_back(s); } );
// result 4 items: 1)This 2)is 3)a 4)test
return 0;
}

相关讨论

我不知道这是否不那么冗长，但对于那些更熟悉动态语言(如javascript)的人来说，搜索可能更容易。它使用的唯一C++ 11特性是lambdas。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

#include
#include <string>
#include <cctype>
#include <iostream>
#include <vector>

int main()
{
using namespace std;
string s ="hello how are you won't you tell me your name";
vector<string> tokens;
string token;

for_each(s.begin(), s.end(), [&](char c) {
if (!isspace(c))
token += c;
else
{
if (token.length()) tokens.push_back(token);
token.clear();
}
});
if (token.length()) tokens.push_back(token);

return 0;
}

相关讨论

这是我的答案。冗长、易读、高效。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

std::vector<std::string> tokenize(const std::string& s, char c) {
auto end = s.cend();
auto start = end;

std::vector<std::string> v;
for( auto it = s.cbegin(); it != end; ++it ) {
if( *it != c ) {
if( start == end )
start = it;
continue;
}
if( start != end ) {
v.emplace_back(start, it);
start = end;
}
}
if( start != end )
v.emplace_back(start, end);
return v;
}

相关讨论

1
2
3
4
5
6
7
8
9
10
11

#include <string>
#include <vector>
#include <sstream>

inline vector<string> split(const string& s) {
vector<string> result;
istringstream iss(s);
for (string w; iss >> w; )
result.push_back(w);
return result;
}

相关讨论

这里是一个C++ 11解决方案，它只使用了STD：：String：：FAND()。分隔符可以是任意数量的字符。解析的令牌是通过输出迭代器输出的，它通常是我的代码中的STD：：Buff-Ixter。

我还没有用UTF-8测试过它，但是我希望它可以工作，只要输入和分隔符都是有效的UTF-8字符串。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

#include <string>

template<class Iter>
Iter splitStrings(const std::string &s, const std::string &delim, Iter out)
{
if (delim.empty()) {
*out++ = s;
return out;
}
size_t a = 0, b = s.find(delim);
for ( ; b != std::string::npos;
a = b + delim.length(), b = s.find(delim, a))
{
*out++ = std::move(s.substr(a, b - a));
}
*out++ = std::move(s.substr(a, s.length() - a));
return out;
}

一些测试用例：

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

void test()
{
std::vector<std::string> out;
size_t counter;

std::cout <<"Empty input:" << std::endl;
out.clear();
splitStrings("",",", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter <<":" << *i << std::endl;
}

std::cout <<"Non-empty input, empty delimiter:" << std::endl;
out.clear();
splitStrings("Hello, world!","", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter <<":" << *i << std::endl;
}

std::cout <<"Non-empty input, non-empty delimiter"
", no delimiter in string:" << std::endl;
out.clear();
splitStrings("abxycdxyxydefxya","xyz", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter <<":" << *i << std::endl;
}

std::cout <<"Non-empty input, non-empty delimiter"
", delimiter exists string:" << std::endl;
out.clear();
splitStrings("abxycdxy!!xydefxya","xy", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter <<":" << *i << std::endl;
}

std::cout <<"Non-empty input, non-empty delimiter"
", delimiter exists string"
", input contains blank token:" << std::endl;
out.clear();
splitStrings("abxycdxyxydefxya","xy", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter <<":" << *i << std::endl;
}

std::cout <<"Non-empty input, non-empty delimiter"
", delimiter exists string"
", nothing after last delimiter:" << std::endl;
out.clear();
splitStrings("abxycdxyxydefxy","xy", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter <<":" << *i << std::endl;
}

std::cout <<"Non-empty input, non-empty delimiter"
", only delimiter exists string:" << std::endl;
out.clear();
splitStrings("xy","xy", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter <<":" << *i << std::endl;
}
}

预期输出：

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27

Empty input:
0:
Non-empty input, empty delimiter:
0: Hello, world!
Non-empty input, non-empty delimiter, no delimiter in string:
0: abxycdxyxydefxya
Non-empty input, non-empty delimiter, delimiter exists string:
0: ab
1: cd
2: !!
3: def
4: a
Non-empty input, non-empty delimiter, delimiter exists string, input contains blank token:
0: ab
1: cd
2:
3: def
4: a
Non-empty input, non-empty delimiter, delimiter exists string, nothing after last delimiter:
0: ab
1: cd
2:
3: def
4:
Non-empty input, non-empty delimiter, only delimiter exists string:
0:
1: