c++ method of string segmentation
- 2020-11-18 06:23:24
- OfStack
In C++, strings are often split according to a delimiter to obtain a sequence of substrings in order 1 of the order in which they appear in the original string. 1. Generally, there are two requirements scenarios:
(1) Given 1 delimiter (single character or substring) to split the string;
(2) Given one or more delimiters (single characters), split the string.
When the given delimiter is not in the original string, the original string is not split, returning vector where the individual element is the original string.
Note that when this article is implemented, if the separated substring is empty, it is not counted as the final substring sequence. For example, if the original string is "a,b" with the delimiter ",",", then the separated substring sequence is [" a "," b "] instead of [" a "," b "].
1. A single delimiter (single character or substring) splits a string
#include <iostream>
#include <vector>
#include <string>
using namespace std;
//@brief: Specifies a single delimiter (single character or substring) to split a string
//@param: src The original string; delimiter A delimiter, a single character or substring
vector<string> splitStr(const string& src, const string& delimiter) {
std::vector<string> vetStr;
// Into the reference check
// 1. The original string is null or equal to the delimiter, returns null vector
if (src == "" || src == delimiter) {
return vetStr;
}
// 2. A null delimiter returns a single element that is the original string vector
if (delimiter == "") {
vetStr.push_back(src);
return vetStr;
}
string::size_type startPos = 0;
auto index = src.find(delimiter);
while (index != string::npos) {
auto str = src.substr(startPos, index - startPos);
if (str != "") {
vetStr.push_back(str);
}
startPos = index + delimiter.length();
index = src.find(delimiter, startPos);
}
// Take the final 1 substring
auto str = src.substr(startPos);
if (str != "") {
vetStr.push_back(str);
}
return vetStr;
}
The tests are as follows:
int main(int argc, char* argv[]) {
string str = "I,love,China";
// Normal segmentation
auto vetStr = splitStr(str, ",");
cout << "vetStr.size() = " << vetStr.size() << endl;
for (auto v : vetStr) {
cout << v << " ";
}
// Boundary test
vetStr = splitStr(str, "I,");
cout << endl << "vetStr.size() = " << vetStr.size() << endl;
for (auto v : vetStr) {
cout << v << " ";
}
// Contains no delimiters
vetStr = splitStr(str, "what");
cout << endl << "vetStr.size() = " << vetStr.size() << endl;
for (auto v : vetStr) {
cout << v << " ";
}
return 0;
}
Output results:
[
vetStr.size() = 3
I love China
vetStr.size() = 1
love,China
vetStr.size() = 1
I,love,China
2. Single or more delimiters (single characters) that split a string
The implementation splits the string with a single delimiter (single character or substring). The key is to get the delimiter subscript by the function std::string::find(...). std instead: : string: : find_first_of (...). . The differences between the two are as follows:
[
std::string::find(...)
Look for the delimiter as a whole in the original string and return matching subscripts, such as string("I love China").find("love") returns 2.
std::string::find_first_of(...)
Searches a string for the first position in the occurrence of any 1 character in the separator. And std: : string: : find (...). The difference is that you don't need an entire delimiter match, just a single character match in the delimiter.
Specific implementation is as follows:
//@brief: Specifies a single or more delimiters (single characters) to split a string
//@param: src The original string; delimiter Single or more delimiters (single character)
vector<string> splitStr(const string& src, const string& delimiter) {
std::vector<string> vtStr;
// Into the reference check
// 1. Null returns null if the original string is null vector
if (src == "") {
return vtStr;
}
// 2. A null delimiter returns a single element that is the original string vector
if (delimiter == "") {
vtStr.push_back(src);
return vtStr;
}
string::size_type startPos = 0;
auto index = src.find_first_of(delimiter);
while (index != string::npos) {
auto str = src.substr(startPos, index - startPos);
if (str != "") {
vtStr.push_back(str);
}
startPos = index + 1;
index = src.find_first_of(delimiter, startPos);
}
// Take the final 1 substring
auto str = src.substr(startPos);
if (str != "") {
vtStr.push_back(str);
}
return vtStr;
}
The tests are as follows:
int main(int argc, char* argv[]) {
string str = "I,love,China";
// Normal segmentation. In accordance with the h Separated from a comma
auto vetStr = splitStr(str, "h,");
cout << "vetStr.size() = " << vetStr.size() << endl;
for (auto v : vetStr) {
cout << v << " ";
}
// Boundary test
vetStr = splitStr(str, "Ia");
cout << endl << "vetStr.size() = " << vetStr.size() << endl;
for (auto v : vetStr) {
cout << v << " ";
}
// Contains no delimiters
vetStr = splitStr(str, "_:");
cout << endl << "vetStr.size() = " << vetStr.size() << endl;
for (auto v : vetStr) {
cout << v << " ";
}
return 0;
}
Output results:
[
vetStr.size() = 4
I love C ina
vetStr.size() = 1
,love,Chin
vetStr.size() = 1
I,love,China
3. Negative examples
The following is the implementation of a single or more delimiters (single character) to split the string function, a bit "messy", as a negative teaching material, I hope to help you always remember the code is concise and elegant how valuable, we can compare feeling 1. In addition, appropriate code comments can go a long way in improving the readability of your code.
Messy version 1:
//qsort The comparison function required by the function is sorted in ascending order
int comp(const void*a,const void*b) {
return *(int*)a-*(int*)b;
}
//@brief: Specifies a single or more delimiters (single characters) to split a string
//@param: src The original string; delimiter Set of separators
vector<string> splitStr(const string& src,const string& delimiter) {
vector<string> strRes;
int maxSubstrNum=src.size();
int* pos=new int[maxSubstrNum];
memset(pos,0,maxSubstrNum*sizeof(int));
int j=0;
for(size_t i=0;i<delimiter.size();++i) {
string::size_type index=src.find(delimiter[i]);
while(index!=string::npos) {
pos[j++]=index;
index=src.find(delimiter[i],index+1);
}
}
// The sorting
qsort(pos,j,sizeof(int),comp);
// Take the first 1 substring
string substrFir=src.substr(0,pos[0]);
if(substrFir!="")
strRes.push_back(substrFir);
// Remove the middle j-1 substring
for(int i=0;i<j-1;++i) {
string substr=src.substr(pos[i]+1,pos[i+1]-pos[i]-1);
if(substr!="") {
strRes.push_back(substr);
}
}
// Take out the last 1 substring
string substrLast=src.substr(pos[j-1]+1,src.size()-pos[j-1]-1);
if(substrLast!="") {
strRes.push_back(substrLast);
}
delete[] pos;
return strRes;
}
Code description:
(1) Use find() and substr() to achieve the segmentation function;
(2) In the code, it is necessary to sort the subscript appearing by the separator, so as to take out the substring in order.
Messy Version 2:
//@brief: Specifies a single or more delimiters (single characters) to split a string
//@param: src The original string; delimiter Set of separators
std::vector<std::string> splitStr(const std::string &sStr, const std::string &sSep) {
std::vector<std::string> vt;
std::string::size_type pos = 0;
std::string::size_type pos1 = 0;
int pos_tmp = -1;
while(true) {
std::string s;
std::string s1;
pos1 = sStr.find_first_of(sSep, pos);
if(pos1 == std::string::npos) {
if(pos + 1 <= sStr.length()) {
s = sStr.substr(-1 != pos_tmp ? pos_tmp : pos);
s1 = "";
}
} else if(pos1 == pos && (pos1 + 1 == sStr.length())) {
s = "";
s1 = "";
} else {
s = sStr.substr(-1 != pos_tmp ? pos_tmp : pos, pos1 - (-1 != pos_tmp ? pos_tmp : pos));
s1 = sStr.substr(pos1 + 1);
if (-1 == pos_tmp) {
pos_tmp = pos;
}
pos = pos1;
}
if(!s.empty()) {
vt.push_back(s);
}
pos_tmp = -1;
if(pos1 == std::string::npos) {
break;
}
pos++;
}
return vt;
}
The above is c++ string segmentation method detailed content, more about C++ string segmentation information please pay attention to other related articles on this site!