c++ method of string segmentation

  • 2020-11-18 06:23:24
  • OfStack

In C++, strings are often split according to a delimiter to obtain a sequence of substrings in order 1 of the order in which they appear in the original string. 1. Generally, there are two requirements scenarios:
(1) Given 1 delimiter (single character or substring) to split the string;
(2) Given one or more delimiters (single characters), split the string.

When the given delimiter is not in the original string, the original string is not split, returning vector where the individual element is the original string.

Note that when this article is implemented, if the separated substring is empty, it is not counted as the final substring sequence. For example, if the original string is "a,b" with the delimiter ",",", then the separated substring sequence is [" a "," b "] instead of [" a "," b "].

1. A single delimiter (single character or substring) splits a string


#include <iostream>
#include <vector>
#include <string>
using namespace std;

//@brief:  Specifies a single delimiter (single character or substring) to split a string 
//@param: src  The original string; delimiter  A delimiter, a single character or substring 
vector<string> splitStr(const string& src, const string& delimiter) {
	std::vector<string> vetStr;
	
	//  Into the reference check 
	// 1. The original string is null or equal to the delimiter, returns null  vector
	if (src == "" || src == delimiter) {
		return vetStr;
	}
	// 2. A null delimiter returns a single element that is the original string  vector
	if (delimiter == "") {
		vetStr.push_back(src);
		return vetStr;
	}

	string::size_type startPos = 0;
	auto index = src.find(delimiter);
	while (index != string::npos) {
		auto str = src.substr(startPos, index - startPos);
		if (str != "") {
			vetStr.push_back(str);
		}
		startPos = index + delimiter.length();
		index = src.find(delimiter, startPos);
	}
	//  Take the final 1 substring 
	auto str = src.substr(startPos);
	if (str != "") {
		vetStr.push_back(str);
	}

	return vetStr;
}

The tests are as follows:


int main(int argc, char* argv[]) {
	string str = "I,love,China";

	//  Normal segmentation 
	auto vetStr = splitStr(str, ",");
	cout << "vetStr.size() = " << vetStr.size() << endl;
	for (auto v : vetStr) {
		cout << v << " ";
	}

	//  Boundary test 
	vetStr = splitStr(str, "I,");
	cout << endl << "vetStr.size() = " << vetStr.size() << endl;
	for (auto v : vetStr) {
		cout << v << " ";
	}

	//  Contains no delimiters 
	vetStr = splitStr(str, "what");
	cout << endl << "vetStr.size() = " << vetStr.size() << endl;
	for (auto v : vetStr) {
		cout << v << " ";
	}
	return 0;
}

Output results:

[

vetStr.size() = 3
I love China
vetStr.size() = 1
love,China
vetStr.size() = 1
I,love,China

]

2. Single or more delimiters (single characters) that split a string

The implementation splits the string with a single delimiter (single character or substring). The key is to get the delimiter subscript by the function std::string::find(...). std instead: : string: : find_first_of (...). . The differences between the two are as follows:

[

std::string::find(...)
Look for the delimiter as a whole in the original string and return matching subscripts, such as string("I love China").find("love") returns 2.
std::string::find_first_of(...)
Searches a string for the first position in the occurrence of any 1 character in the separator. And std: : string: : find (...). The difference is that you don't need an entire delimiter match, just a single character match in the delimiter.

]

Specific implementation is as follows:


//@brief:  Specifies a single or more delimiters (single characters) to split a string 
//@param: src  The original string; delimiter  Single or more delimiters (single character) 
vector<string> splitStr(const string& src, const string& delimiter) {
	std::vector<string> vtStr;

	//  Into the reference check 
	// 1. Null returns null if the original string is null  vector
	if (src == "") {
		return vtStr;
	}
	// 2. A null delimiter returns a single element that is the original string  vector
	if (delimiter == "") {
		vtStr.push_back(src);
		return vtStr;
	}

	string::size_type startPos = 0;
	auto index = src.find_first_of(delimiter);
	while (index != string::npos) {
		auto str = src.substr(startPos, index - startPos);
		if (str != "") {
			vtStr.push_back(str);
		}
		startPos = index + 1;
		index = src.find_first_of(delimiter, startPos);
	}
	//  Take the final 1 substring 
	auto str = src.substr(startPos);
	if (str != "") {
		vtStr.push_back(str);
	}

	return vtStr;
}

The tests are as follows:


int main(int argc, char* argv[]) {
	string str = "I,love,China";

	//  Normal segmentation. In accordance with the  h  Separated from a comma 
	auto vetStr = splitStr(str, "h,");
	cout << "vetStr.size() = " << vetStr.size() << endl;
	for (auto v : vetStr) {
		cout << v << " ";
	}

	//  Boundary test 
	vetStr = splitStr(str, "Ia");
	cout << endl << "vetStr.size() = " << vetStr.size() << endl;
	for (auto v : vetStr) {
		cout << v << " ";
	}

	//  Contains no delimiters 
	vetStr = splitStr(str, "_:");
	cout << endl << "vetStr.size() = " << vetStr.size() << endl;
	for (auto v : vetStr) {
		cout << v << " ";
	}
	return 0;
}

Output results:

[

vetStr.size() = 4
I love C ina
vetStr.size() = 1
,love,Chin
vetStr.size() = 1
I,love,China

]

3. Negative examples

The following is the implementation of a single or more delimiters (single character) to split the string function, a bit "messy", as a negative teaching material, I hope to help you always remember the code is concise and elegant how valuable, we can compare feeling 1. In addition, appropriate code comments can go a long way in improving the readability of your code.

Messy version 1:


//qsort The comparison function required by the function is sorted in ascending order 
int comp(const void*a,const void*b) {
	return *(int*)a-*(int*)b;
}

//@brief:  Specifies a single or more delimiters (single characters) to split a string 
//@param: src  The original string; delimiter  Set of separators 
vector<string> splitStr(const string& src,const string& delimiter) {
	vector<string> strRes;
	int maxSubstrNum=src.size();
	int* pos=new int[maxSubstrNum];
	memset(pos,0,maxSubstrNum*sizeof(int));
	
	int j=0;
	for(size_t i=0;i<delimiter.size();++i) {
		string::size_type index=src.find(delimiter[i]);
		while(index!=string::npos) {
			pos[j++]=index;
			index=src.find(delimiter[i],index+1);
		}		
	}
	// The sorting 
	qsort(pos,j,sizeof(int),comp);
	// Take the first 1 substring 
	string substrFir=src.substr(0,pos[0]);
	if(substrFir!="") 
		strRes.push_back(substrFir);
	// Remove the middle j-1 substring 
	for(int i=0;i<j-1;++i) {
		string substr=src.substr(pos[i]+1,pos[i+1]-pos[i]-1);
		if(substr!="") {
			strRes.push_back(substr);
		}
	}
	// Take out the last 1 substring 
	string substrLast=src.substr(pos[j-1]+1,src.size()-pos[j-1]-1);
	if(substrLast!="") {
		strRes.push_back(substrLast);
	}
	delete[] pos;
	return strRes;
}

Code description:
(1) Use find() and substr() to achieve the segmentation function;
(2) In the code, it is necessary to sort the subscript appearing by the separator, so as to take out the substring in order.

Messy Version 2:


//@brief:  Specifies a single or more delimiters (single characters) to split a string 
//@param: src  The original string; delimiter  Set of separators 
std::vector<std::string> splitStr(const std::string &sStr, const std::string &sSep) {
  std::vector<std::string> vt;

  std::string::size_type pos = 0;
  std::string::size_type pos1 = 0;
  int pos_tmp = -1;

  while(true) {
    std::string s;
    std::string s1;
    pos1 = sStr.find_first_of(sSep, pos);
    if(pos1 == std::string::npos) {
      if(pos + 1 <= sStr.length()) {
        s = sStr.substr(-1 != pos_tmp ? pos_tmp : pos);
        s1 = "";
      }
    } else if(pos1 == pos && (pos1 + 1 == sStr.length())) {
      s = "";
      s1 = "";
    } else {
      s = sStr.substr(-1 != pos_tmp ? pos_tmp : pos, pos1 - (-1 != pos_tmp ? pos_tmp : pos));
      s1 = sStr.substr(pos1 + 1);
      if (-1 == pos_tmp) {
        pos_tmp = pos;
      	}
      pos = pos1;
    }

    if(!s.empty()) {
      vt.push_back(s);
    }
    pos_tmp = -1;

    if(pos1 == std::string::npos) {
      break;
    }

    pos++;
  }

  return vt;
}

The above is c++ string segmentation method detailed content, more about C++ string segmentation information please pay attention to other related articles on this site!


Related articles: