C++ Utilities  4.6.1
Common C++ classes and routines used by my applications such as argument parser, IO and conversion utilities
stringconversion.h
Go to the documentation of this file.
1 #ifndef CONVERSION_UTILITIES_STRINGCONVERSION_H
2 #define CONVERSION_UTILITIES_STRINGCONVERSION_H
3 
5 #include "./binaryconversion.h"
6 
7 #include "../misc/traits.h"
8 
9 #include <string>
10 #include <cstring>
11 #include <sstream>
12 #include <iomanip>
13 #include <initializer_list>
14 #include <list>
15 #include <vector>
16 #include <memory>
17 
18 namespace ConversionUtilities
19 {
20 
29  void operator()(char *stringData)
30  {
31  std::free(stringData);
32  }
33 };
34 
38 typedef std::pair<std::unique_ptr<char[], StringDataDeleter>, std::size_t> StringData;
39 //typedef std::pair<std::unique_ptr<char>, std::size_t> StringData; // might work too
40 
41 CPP_UTILITIES_EXPORT StringData convertString(const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor = 1.0f);
42 CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize);
43 CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize);
44 CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize);
45 CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize);
46 CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize);
47 CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize);
48 
49 CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar = '\0');
50 
64 template <class Container = std::initializer_list<std::string> >
65 typename Container::value_type joinStrings(const Container &strings, const typename Container::value_type &delimiter = typename Container::value_type(), bool omitEmpty = false, const typename Container::value_type &leftClosure = typename Container::value_type(), const typename Container::value_type &rightClosure = typename Container::value_type())
66 {
67  typename Container::value_type res;
68  if(strings.size()) {
69  size_t entries = 0, size = 0;
70  for(const auto &str : strings) {
71  if(!omitEmpty || !str.empty()) {
72  size += str.size();
73  ++entries;
74  }
75  }
76  if(entries) {
77  size += (entries * leftClosure.size()) + (entries * rightClosure.size()) + ((entries - 1) * delimiter.size());
78  res.reserve(size);
79  for(const auto &str : strings) {
80  if(!omitEmpty || !str.empty()) {
81  if(!res.empty()) {
82  res.append(delimiter);
83  }
84  res.append(leftClosure);
85  res.append(str);
86  res.append(rightClosure);
87  }
88  }
89  }
90  }
91  return res;
92 }
93 
97 enum class EmptyPartsTreat
98 {
99  Keep,
100  Omit,
101  Merge
102 };
103 
113 template <class Container = std::list<std::string> >
114 Container splitString(const typename Container::value_type &string, const typename Container::value_type &delimiter, EmptyPartsTreat emptyPartsRole = EmptyPartsTreat::Keep, int maxParts = -1)
115 {
116  --maxParts;
117  Container res;
118  bool merge = false;
119  for(typename Container::value_type::size_type i = 0, end = string.size(), delimPos; i < end; i = delimPos + delimiter.size()) {
120  delimPos = string.find(delimiter, i);
121  if(!merge && maxParts >= 0 && res.size() == static_cast<typename Container::value_type::size_type>(maxParts)) {
122  if(delimPos == i && emptyPartsRole == EmptyPartsTreat::Merge) {
123  if(!res.empty()) {
124  merge = true;
125  continue;
126  }
127  }
128  delimPos = Container::value_type::npos;
129  }
130  if(delimPos == Container::value_type::npos) {
131  delimPos = string.size();
132  }
133  if(emptyPartsRole == EmptyPartsTreat::Keep || i != delimPos) {
134  if(merge) {
135  res.back().append(delimiter);
136  res.back().append(string.substr(i, delimPos - i));
137  merge = false;
138  } else {
139  res.emplace_back(string.substr(i, delimPos - i));
140  }
141  } else if(emptyPartsRole == EmptyPartsTreat::Merge) {
142  if(!res.empty()) {
143  merge = true;
144  }
145  }
146  }
147  return res;
148 }
149 
153 template <typename StringType>
154 bool startsWith(const StringType &str, const StringType &phrase)
155 {
156  if(str.size() < phrase.size()) {
157  return false;
158  }
159  for(auto stri = str.cbegin(), strend = str.cend(), phrasei = phrase.cbegin(), phraseend = phrase.cend(); stri != strend; ++stri, ++phrasei) {
160  if(phrasei == phraseend) {
161  return true;
162  } else if(*stri != *phrasei) {
163  return false;
164  }
165  }
166  return false;
167 }
168 
172 template <typename StringType>
173 bool startsWith(const StringType &str, const typename StringType::value_type *phrase)
174 {
175  for(auto stri = str.cbegin(), strend = str.cend(); stri != strend; ++stri, ++phrase) {
176  if(!*phrase) {
177  return true;
178  } else if(*stri != *phrase) {
179  return false;
180  }
181  }
182  return false;
183 }
184 
189 template <typename StringType>
190 bool containsSubstrings(const StringType &str, std::initializer_list<StringType> substrings)
191 {
192  typename StringType::size_type currentPos = 0;
193  for(const auto &substr : substrings) {
194  if((currentPos = str.find(substr, currentPos)) == StringType::npos) {
195  return false;
196  }
197  currentPos += substr.size();
198  }
199  return true;
200 }
201 
206 template <typename StringType>
207 bool containsSubstrings(const StringType &str, std::initializer_list<const typename StringType::value_type *> substrings)
208 {
209  typename StringType::size_type currentPos = 0;
210  for(const auto *substr : substrings) {
211  if((currentPos = str.find(substr, currentPos)) == StringType::npos) {
212  return false;
213  }
214  currentPos += std::strlen(substr);
215  }
216  return true;
217 }
218 
222 template <typename StringType>
223 void findAndReplace(StringType &str, const StringType &find, const StringType &replace)
224 {
225  for(typename StringType::size_type i = 0; (i = str.find(find, i)) != StringType::npos; i += replace.size()) {
226  str.replace(i, find.size(), replace);
227  }
228 }
229 
236 template <typename CharType>
237 CharType digitToChar(CharType digit)
238 {
239  CharType res;
240  if(digit <= 9) {
241  res = digit + '0';
242  } else {
243  res = digit + 'A' - 10;
244  }
245  return res;
246 }
247 
254 template <typename IntegralType, class StringType = std::string, Traits::EnableIf<std::is_integral<IntegralType>, Traits::Not<std::is_signed<IntegralType> > >...>
255 StringType numberToString(IntegralType number, typename StringType::value_type base = 10)
256 {
257  std::size_t resSize = 0;
258  for(auto n = number; n; n /= base, ++resSize);
259  StringType res;
260  res.reserve(resSize);
261  do {
262  res.insert(res.begin(), digitToChar<typename StringType::value_type>(number % base));
263  number /= base;
264  } while(number);
265  return res;
266 }
267 
274 template <typename IntegralType, class StringType = std::string, Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType> >...>
275 StringType numberToString(IntegralType number, typename StringType::value_type base = 10)
276 {
277  const bool negative = number < 0;
278  std::size_t resSize;
279  if(negative) {
280  number = -number, resSize = 1;
281  } else {
282  resSize = 0;
283  }
284  for(auto n = number; n; n /= base, ++resSize);
285  StringType res;
286  res.reserve(resSize);
287  do {
288  res.insert(res.begin(), digitToChar<typename StringType::value_type>(number % base));
289  number /= base;
290  } while(number);
291  if(negative) {
292  res.insert(res.begin(), '-');
293  }
294  return res;
295 }
296 
305 template <typename FloatingType, class StringType = std::string, Traits::EnableIf<std::is_floating_point<FloatingType> >...>
306 StringType numberToString(FloatingType number, typename StringType::value_type base = 10)
307 {
308  std::basic_stringstream<typename StringType::value_type> ss;
309  ss << std::setbase(base) << number;
310  return ss.str();
311 }
312 
317 template <typename CharType>
318 CharType charToDigit(CharType character, CharType base)
319 {
320  CharType res;
321  if(character >= '0' && character <= '9') {
322  res = character - '0';
323  } else if(character >= 'a' && character <= 'z') {
324  res = character - 'a' + 10;
325  } else if(character >= 'A' && character <= 'Z') {
326  res = character - 'A' + 10;
327  } else {
328  throw ConversionException("The string is no valid number");
329  }
330  if(res >= base) {
331  throw ConversionException("The string is no valid number");
332  }
333  return res;
334 }
335 
343 template <typename IntegralType, class StringType, Traits::EnableIf<std::is_integral<IntegralType>, Traits::Not<std::is_signed<IntegralType> > >...>
344 IntegralType stringToNumber(const StringType &string, typename StringType::value_type base = 10)
345 {
346  IntegralType result = 0;
347  for(const auto &c : string) {
348  if(c == ' ') {
349  continue;
350  }
351  result *= base;
352  result += charToDigit<typename StringType::value_type>(c, base);
353  }
354  return result;
355 }
356 
364 template <typename IntegralType, class StringType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType> >...>
365 IntegralType stringToNumber(const StringType &string, typename StringType::value_type base = 10)
366 {
367  auto i = string.begin();
368  auto end = string.end();
369  if(i == end) {
370  return 0;
371  }
372  const bool negative = (*i == '-');
373  if(negative) {
374  ++i;
375  }
376  IntegralType result = 0;
377  for(; i != end; ++i) {
378  if(*i == ' ') {
379  continue;
380  }
381  result *= base;
382  result += charToDigit<typename StringType::value_type>(*i, base);
383  }
384  return negative ? -result : result;
385 }
386 
396 template <typename FloatingType, class StringType, Traits::EnableIf<std::is_floating_point<FloatingType> >...>
397 FloatingType stringToNumber(const StringType &string, typename StringType::value_type base = 10)
398 {
399  std::basic_stringstream<typename StringType::value_type> ss;
400  ss << std::setbase(base) << string;
401  FloatingType result;
402  if((ss >> result) && ss.eof()) {
403  return result;
404  } else {
405  throw ConversionException("The string is no valid number.");
406  }
407 }
408 
416 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, Traits::Not<std::is_signed<IntegralType> > >...>
417 IntegralType stringToNumber(const CharType *string, unsigned char base = 10)
418 {
419  IntegralType result = 0;
420  for(; *string; ++string) {
421  if(*string == ' ') {
422  continue;
423  }
424  result *= base;
425  result += charToDigit<CharType>(*string, base);
426  }
427  return result;
428 }
429 
437 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType> >...>
438 IntegralType stringToNumber(const CharType *string, unsigned char base = 10)
439 {
440  if(!*string) {
441  return 0;
442  }
443  const bool negative = (*string == '-');
444  if(negative) {
445  ++string;
446  }
447  IntegralType result = 0;
448  for(; *string; ++string) {
449  if(*string == ' ') {
450  continue;
451  }
452  result *= base;
453  result += charToDigit<CharType>(*string, base);
454  }
455  return negative ? -result : result;
456 }
457 
467 template <typename T>
468 std::string interpretIntegerAsString(T integer, int startOffset = 0)
469 {
470  char buffer[sizeof(T)];
471  ConversionUtilities::BE::getBytes(integer, buffer);
472  return std::string(buffer + startOffset, sizeof(T) - startOffset);
473 }
474 
475 CPP_UTILITIES_EXPORT std::string dataSizeToString(uint64 sizeInByte, bool includeByte = false);
476 CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits = false);
477 CPP_UTILITIES_EXPORT std::string encodeBase64(const byte *data, uint32 dataSize);
478 CPP_UTILITIES_EXPORT std::pair<std::unique_ptr<byte[]>, uint32> decodeBase64(const char *encodedStr, const uint32 strSize);
479 
480 }
481 
482 #endif // CONVERSION_UTILITIES_STRINGCONVERSION_H
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (little-endian).
bool startsWith(const StringType &str, const StringType &phrase)
Returns whether str starts with phrase.
void operator()(char *stringData)
Deletes the specified stringData with std::free(), because the memory has been allocated using std::m...
CPP_UTILITIES_EXPORT std::string encodeBase64(const byte *data, uint32 dataSize)
Encodes the specified data to Base64.
IntegralType stringToNumber(const StringType &string, typename StringType::value_type base=10)
Converts the given string to a number assuming string uses the specified base.
bool containsSubstrings(const StringType &str, std::initializer_list< StringType > substrings)
Returns whether str contains the specified substrings.
The ConversionException class is thrown by the various conversion functions of this library when a co...
CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (big-endian) string to UTF-8.
CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar='\0')
Truncates all characters after the first occurrence of the specified terminationChar and the terminat...
StringType numberToString(IntegralType number, typename StringType::value_type base=10)
Converts the given number to its equivalent string representation using the specified base...
std::uint64_t uint64
unsigned 64-bit integer
Definition: types.h:49
The StringDataDeleter struct deletes the data of a StringData instance.
std::string interpretIntegerAsString(T integer, int startOffset=0)
Interprets the given integer at the specified position as std::string using the specified byte order...
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (big-endian).
EmptyPartsTreat
Specifies the role of empty parts when splitting strings.
CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to Latin-1.
void findAndReplace(StringType &str, const StringType &find, const StringType &replace)
Replaces all occurences of find with relpace in the specified str.
Contains several functions providing conversions between different data types.
std::uint32_t uint32
unsigned 32-bit integer
Definition: types.h:44
std::pair< std::unique_ptr< char[], StringDataDeleter >, std::size_t > StringData
Type used to return string encoding conversion result.
CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (little-endian) string to UTF-8.
CharType charToDigit(CharType character, CharType base)
Returns number/digit of the specified character representation using the specified base...
Container::value_type joinStrings(const Container &strings, const typename Container::value_type &delimiter=typename Container::value_type(), bool omitEmpty=false, const typename Container::value_type &leftClosure=typename Container::value_type(), const typename Container::value_type &rightClosure=typename Container::value_type())
Joins the given strings using the specified delimiter.
CPP_UTILITIES_EXPORT StringData convertString(const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor=1.0f)
Converts the specified string from one character set to another.
CharType digitToChar(CharType digit)
Returns the character representation of the specified digit.
std::uint8_t byte
unsigned byte
Definition: types.h:14
CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified Latin-1 string to UTF-8.
Container splitString(const typename Container::value_type &string, const typename Container::value_type &delimiter, EmptyPartsTreat emptyPartsRole=EmptyPartsTreat::Keep, int maxParts=-1)
Splits the given string at the specified delimiter.
CPP_UTILITIES_EXPORT std::string dataSizeToString(uint64 sizeInByte, bool includeByte=false)
Converts the specified data size in byte to its equivalent std::string representation.
#define CPP_UTILITIES_EXPORT
Marks the symbol to be exported by the c++utilities library.
CPP_UTILITIES_EXPORT std::pair< std::unique_ptr< byte[]>, uint32 > decodeBase64(const char *encodedStr, const uint32 strSize)
Decodes the specified Base64 encoded string.
CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits=false)
Converts the specified bitrate in kbit/s to its equivalent std::string representation.