C++ Utilities  4.14.2
Useful C++ classes and routines such as argument parser, IO and conversion utilities
stringconversion.h
Go to the documentation of this file.
1 #ifndef CONVERSION_UTILITIES_STRINGCONVERSION_H
2 #define CONVERSION_UTILITIES_STRINGCONVERSION_H
3 
4 #include "./binaryconversion.h"
6 
7 #include "../misc/traits.h"
8 
9 #include <cstring>
10 #include <initializer_list>
11 #include <iomanip>
12 #include <list>
13 #include <memory>
14 #include <sstream>
15 #include <string>
16 #include <vector>
17 
18 namespace ConversionUtilities {
19 
28  void operator()(char *stringData)
29  {
30  std::free(stringData);
31  }
32 };
33 
37 typedef std::pair<std::unique_ptr<char[], StringDataDeleter>, std::size_t> StringData;
38 //typedef std::pair<std::unique_ptr<char>, std::size_t> StringData; // might work too
39 
41  const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor = 1.0f);
42 CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize);
43 CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize);
44 CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize);
45 CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize);
46 CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize);
47 CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize);
48 
49 CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar = '\0');
50 
64 template <class Container = std::initializer_list<std::string>>
65 typename Container::value_type joinStrings(const Container &strings,
66  const typename Container::value_type &delimiter = typename Container::value_type(), bool omitEmpty = false,
67  const typename Container::value_type &leftClosure = typename Container::value_type(),
68  const typename Container::value_type &rightClosure = typename Container::value_type())
69 {
70  typename Container::value_type res;
71  if (strings.size()) {
72  size_t entries = 0, size = 0;
73  for (const auto &str : strings) {
74  if (!omitEmpty || !str.empty()) {
75  size += str.size();
76  ++entries;
77  }
78  }
79  if (entries) {
80  size += (entries * leftClosure.size()) + (entries * rightClosure.size()) + ((entries - 1) * delimiter.size());
81  res.reserve(size);
82  for (const auto &str : strings) {
83  if (!omitEmpty || !str.empty()) {
84  if (!res.empty()) {
85  res.append(delimiter);
86  }
87  res.append(leftClosure);
88  res.append(str);
89  res.append(rightClosure);
90  }
91  }
92  }
93  }
94  return res;
95 }
96 
100 template <class Container = std::initializer_list<std::string>> inline std::vector<std::string> toMultiline(const Container &arrayOfLines)
101 {
102  return joinStrings(arrayOfLines, "\n", false);
103 }
104 
108 enum class EmptyPartsTreat {
109  Keep,
110  Omit,
111  Merge
112 };
113 
123 template <class Container = std::list<std::string>>
124 Container splitString(const typename Container::value_type &string, const typename Container::value_type &delimiter,
125  EmptyPartsTreat emptyPartsRole = EmptyPartsTreat::Keep, int maxParts = -1)
126 {
127  --maxParts;
128  Container res;
129  bool merge = false;
130  for (typename Container::value_type::size_type i = 0, end = string.size(), delimPos; i < end; i = delimPos + delimiter.size()) {
131  delimPos = string.find(delimiter, i);
132  if (!merge && maxParts >= 0 && res.size() == static_cast<typename Container::value_type::size_type>(maxParts)) {
133  if (delimPos == i && emptyPartsRole == EmptyPartsTreat::Merge) {
134  if (!res.empty()) {
135  merge = true;
136  continue;
137  }
138  }
139  delimPos = Container::value_type::npos;
140  }
141  if (delimPos == Container::value_type::npos) {
142  delimPos = string.size();
143  }
144  if (emptyPartsRole == EmptyPartsTreat::Keep || i != delimPos) {
145  if (merge) {
146  res.back().append(delimiter);
147  res.back().append(string.substr(i, delimPos - i));
148  merge = false;
149  } else {
150  res.emplace_back(string.substr(i, delimPos - i));
151  }
152  } else if (emptyPartsRole == EmptyPartsTreat::Merge) {
153  if (!res.empty()) {
154  merge = true;
155  }
156  }
157  }
158  return res;
159 }
160 
170 template <class Container = std::list<std::string>>
171 Container splitStringSimple(const typename Container::value_type &string, const typename Container::value_type &delimiter, int maxParts = -1)
172 {
173  --maxParts;
174  Container res;
175  for (typename Container::value_type::size_type i = 0, end = string.size(), delimPos; i < end; i = delimPos + delimiter.size()) {
176  delimPos = string.find(delimiter, i);
177  if (maxParts >= 0 && res.size() == static_cast<typename Container::value_type::size_type>(maxParts)) {
178  delimPos = Container::value_type::npos;
179  }
180  if (delimPos == Container::value_type::npos) {
181  delimPos = string.size();
182  }
183  res.emplace_back(string.substr(i, delimPos - i));
184  }
185  return res;
186 }
187 
191 template <class Container = std::vector<std::string>> inline std::vector<std::string> toArrayOfLines(const std::string &multilineString)
192 {
193  return splitString<Container>(multilineString, "\n", EmptyPartsTreat::Keep);
194 }
195 
199 template <typename StringType> bool startsWith(const StringType &str, const StringType &phrase)
200 {
201  if (str.size() < phrase.size()) {
202  return false;
203  }
204  for (auto stri = str.cbegin(), strend = str.cend(), phrasei = phrase.cbegin(), phraseend = phrase.cend(); stri != strend; ++stri, ++phrasei) {
205  if (phrasei == phraseend) {
206  return true;
207  } else if (*stri != *phrasei) {
208  return false;
209  }
210  }
211  return false;
212 }
213 
217 template <typename StringType> bool startsWith(const StringType &str, const typename StringType::value_type *phrase)
218 {
219  for (auto stri = str.cbegin(), strend = str.cend(); stri != strend; ++stri, ++phrase) {
220  if (!*phrase) {
221  return true;
222  } else if (*stri != *phrase) {
223  return false;
224  }
225  }
226  return false;
227 }
228 
233 template <typename StringType> bool containsSubstrings(const StringType &str, std::initializer_list<StringType> substrings)
234 {
235  typename StringType::size_type currentPos = 0;
236  for (const auto &substr : substrings) {
237  if ((currentPos = str.find(substr, currentPos)) == StringType::npos) {
238  return false;
239  }
240  currentPos += substr.size();
241  }
242  return true;
243 }
244 
249 template <typename StringType>
250 bool containsSubstrings(const StringType &str, std::initializer_list<const typename StringType::value_type *> substrings)
251 {
252  typename StringType::size_type currentPos = 0;
253  for (const auto *substr : substrings) {
254  if ((currentPos = str.find(substr, currentPos)) == StringType::npos) {
255  return false;
256  }
257  currentPos += std::strlen(substr);
258  }
259  return true;
260 }
261 
265 template <typename StringType> void findAndReplace(StringType &str, const StringType &find, const StringType &replace)
266 {
267  for (typename StringType::size_type i = 0; (i = str.find(find, i)) != StringType::npos; i += replace.size()) {
268  str.replace(i, find.size(), replace);
269  }
270 }
271 
278 template <typename CharType> constexpr CharType digitToChar(CharType digit)
279 {
280  return digit <= 9 ? (digit + '0') : (digit + 'A' - 10);
281 }
282 
289 template <typename IntegralType, class StringType = std::string,
290  Traits::EnableIf<std::is_integral<IntegralType>, std::is_unsigned<IntegralType>> * = nullptr>
291 StringType numberToString(IntegralType number, typename StringType::value_type base = 10)
292 {
293  std::size_t resSize = 0;
294  for (auto n = number; n; n /= base, ++resSize)
295  ;
296  StringType res;
297  res.reserve(resSize);
298  do {
299  res.insert(res.begin(), digitToChar<typename StringType::value_type>(number % base));
300  number /= base;
301  } while (number);
302  return res;
303 }
304 
311 template <typename IntegralType, class StringType = std::string,
312  Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType>> * = nullptr>
313 StringType numberToString(IntegralType number, typename StringType::value_type base = 10)
314 {
315  const bool negative = number < 0;
316  std::size_t resSize;
317  if (negative) {
318  number = -number, resSize = 1;
319  } else {
320  resSize = 0;
321  }
322  for (auto n = number; n; n /= base, ++resSize)
323  ;
324  StringType res;
325  res.reserve(resSize);
326  do {
327  res.insert(res.begin(), digitToChar<typename StringType::value_type>(number % base));
328  number /= base;
329  } while (number);
330  if (negative) {
331  res.insert(res.begin(), '-');
332  }
333  return res;
334 }
335 
344 template <typename FloatingType, class StringType = std::string, Traits::EnableIf<std::is_floating_point<FloatingType>> * = nullptr>
345 StringType numberToString(FloatingType number, typename StringType::value_type base = 10)
346 {
347  std::basic_stringstream<typename StringType::value_type> ss;
348  ss << std::setbase(base) << number;
349  return ss.str();
350 }
351 
357 template <typename CharType> CharType charToDigit(CharType character, CharType base)
358 {
359  CharType res = base;
360  if (character >= '0' && character <= '9') {
361  res = character - '0';
362  } else if (character >= 'a' && character <= 'z') {
363  res = character - 'a' + 10;
364  } else if (character >= 'A' && character <= 'Z') {
365  res = character - 'A' + 10;
366  }
367  if (res < base) {
368  return res;
369  }
370  std::string errorMsg;
371  errorMsg.reserve(36);
372  errorMsg += "The character \"";
373  errorMsg += character;
374  errorMsg += "\" is no valid digit.";
375  throw ConversionException(std::move(errorMsg));
376 }
377 
386 template <typename IntegralType, typename StringType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_unsigned<IntegralType>> * = nullptr>
387 IntegralType stringToNumber(const StringType &string, typename StringType::value_type base = 10)
388 {
389  IntegralType result = 0;
390  for (const auto &c : string) {
391  if (c == ' ') {
392  continue;
393  }
394  result *= base;
395  result += charToDigit<typename StringType::value_type>(c, base);
396  }
397  return result;
398 }
399 
408 template <typename IntegralType, class StringType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType>> * = nullptr>
409 IntegralType stringToNumber(const StringType &string, typename StringType::value_type base = 10)
410 {
411  auto i = string.begin();
412  auto end = string.end();
413  for (; i != end && *i == ' '; ++i)
414  ;
415  if (i == end) {
416  return 0;
417  }
418  const bool negative = (*i == '-');
419  if (negative) {
420  ++i;
421  }
422  IntegralType result = 0;
423  for (; i != end; ++i) {
424  if (*i == ' ') {
425  continue;
426  }
427  result *= base;
428  result += charToDigit<typename StringType::value_type>(*i, base);
429  }
430  return negative ? -result : result;
431 }
432 
443 template <typename FloatingType, class StringType, Traits::EnableIf<std::is_floating_point<FloatingType>> * = nullptr>
444 FloatingType stringToNumber(const StringType &string, typename StringType::value_type base = 10)
445 {
446  std::basic_stringstream<typename StringType::value_type> ss;
447  ss << std::setbase(base) << string;
448  FloatingType result;
449  if ((ss >> result) && ss.eof()) {
450  return result;
451  }
452  std::string errorMsg;
453  errorMsg.reserve(42 + string.size());
454  errorMsg += "The string \"";
455  errorMsg += string;
456  errorMsg += "\" is no valid floating number.";
457  throw ConversionException(errorMsg);
458 }
459 
468 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_unsigned<IntegralType>> * = nullptr>
469 IntegralType stringToNumber(const CharType *string, unsigned char base = 10)
470 {
471  IntegralType result = 0;
472  for (; *string; ++string) {
473  if (*string == ' ') {
474  continue;
475  }
476  result *= base;
477  result += charToDigit<CharType>(*string, base);
478  }
479  return result;
480 }
481 
489 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_unsigned<IntegralType>> * = nullptr>
490 IntegralType bufferToNumber(const CharType *string, std::size_t size, unsigned char base = 10)
491 {
492  IntegralType result = 0;
493  for (const CharType *end = string + size; string != end; ++string) {
494  if (*string == ' ') {
495  continue;
496  }
497  result *= base;
498  result += charToDigit<CharType>(*string, base);
499  }
500  return result;
501 }
502 
510 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType>> * = nullptr>
511 IntegralType stringToNumber(const CharType *string, unsigned char base = 10)
512 {
513  if (!*string) {
514  return 0;
515  }
516  for (; *string && *string == ' '; ++string)
517  ;
518  if (!*string) {
519  return 0;
520  }
521  const bool negative = (*string == '-');
522  if (negative) {
523  ++string;
524  }
525  IntegralType result = 0;
526  for (; *string; ++string) {
527  if (*string == ' ') {
528  continue;
529  }
530  result *= base;
531  result += charToDigit<CharType>(*string, base);
532  }
533  return negative ? -result : result;
534 }
535 
543 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType>> * = nullptr>
544 IntegralType bufferToNumber(const CharType *string, std::size_t size, unsigned char base = 10)
545 {
546  if (!size) {
547  return 0;
548  }
549  const CharType *end = string + size;
550  for (; string != end && *string == ' '; ++string)
551  ;
552  if (string == end) {
553  return 0;
554  }
555  const bool negative = (*string == '-');
556  if (negative) {
557  ++string;
558  }
559  IntegralType result = 0;
560  for (; string != end; ++string) {
561  if (*string == ' ') {
562  continue;
563  }
564  result *= base;
565  result += charToDigit<CharType>(*string, base);
566  }
567  return negative ? -result : result;
568 }
569 
579 template <typename T> std::string interpretIntegerAsString(T integer, int startOffset = 0)
580 {
581  char buffer[sizeof(T)];
582  ConversionUtilities::BE::getBytes(integer, buffer);
583  return std::string(buffer + startOffset, sizeof(T) - startOffset);
584 }
585 
586 CPP_UTILITIES_EXPORT std::string dataSizeToString(uint64 sizeInByte, bool includeByte = false);
587 CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits = false);
588 CPP_UTILITIES_EXPORT std::string encodeBase64(const byte *data, uint32 dataSize);
589 CPP_UTILITIES_EXPORT std::pair<std::unique_ptr<byte[]>, uint32> decodeBase64(const char *encodedStr, const uint32 strSize);
590 } // namespace ConversionUtilities
591 
592 #endif // CONVERSION_UTILITIES_STRINGCONVERSION_H
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (little-endian).
bool startsWith(const StringType &str, const StringType &phrase)
Returns whether str starts with phrase.
void operator()(char *stringData)
Deletes the specified stringData with std::free(), because the memory has been allocated using std::m...
CPP_UTILITIES_EXPORT std::string encodeBase64(const byte *data, uint32 dataSize)
Encodes the specified data to Base64.
IntegralType stringToNumber(const StringType &string, typename StringType::value_type base=10)
Converts the given string to an unsigned number assuming string uses the specified base...
Container splitStringSimple(const typename Container::value_type &string, const typename Container::value_type &delimiter, int maxParts=-1)
Splits the given string (which might also be a string view) at the specified delimiter.
bool containsSubstrings(const StringType &str, std::initializer_list< StringType > substrings)
Returns whether str contains the specified substrings.
The ConversionException class is thrown by the various conversion functions of this library when a co...
std::vector< std::string > toArrayOfLines(const std::string &multilineString)
Converts the specified multilineString to an array of lines.
CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (big-endian) string to UTF-8.
CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar='\0')
Truncates all characters after the first occurrence of the specified terminationChar and the terminat...
StringType numberToString(IntegralType number, typename StringType::value_type base=10)
Converts the given number to its equivalent string representation using the specified base...
std::uint64_t uint64
unsigned 64-bit integer
Definition: types.h:49
The StringDataDeleter struct deletes the data of a StringData instance.
typename std::enable_if< All< Condition... >::value, Detail::Enabler >::type EnableIf
Definition: traits.h:33
std::string interpretIntegerAsString(T integer, int startOffset=0)
Interprets the given integer at the specified position as std::string using the specified byte order...
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (big-endian).
EmptyPartsTreat
Specifies the role of empty parts when splitting strings.
CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to Latin-1.
void findAndReplace(StringType &str, const StringType &find, const StringType &replace)
Replaces all occurences of find with relpace in the specified str.
Contains several functions providing conversions between different data types.
std::uint32_t uint32
unsigned 32-bit integer
Definition: types.h:44
std::pair< std::unique_ptr< char[], StringDataDeleter >, std::size_t > StringData
Type used to return string encoding conversion result.
IntegralType bufferToNumber(const CharType *string, std::size_t size, unsigned char base=10)
Converts the given string of size characters to an unsigned numeric value using the specified base...
CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (little-endian) string to UTF-8.
CharType charToDigit(CharType character, CharType base)
Returns number/digit of the specified character representation using the specified base...
Container::value_type joinStrings(const Container &strings, const typename Container::value_type &delimiter=typename Container::value_type(), bool omitEmpty=false, const typename Container::value_type &leftClosure=typename Container::value_type(), const typename Container::value_type &rightClosure=typename Container::value_type())
Joins the given strings using the specified delimiter.
CPP_UTILITIES_EXPORT StringData convertString(const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor=1.0f)
Converts the specified string from one character set to another.
std::uint8_t byte
unsigned byte
Definition: types.h:14
CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified Latin-1 string to UTF-8.
std::vector< std::string > toMultiline(const Container &arrayOfLines)
Converts the specified arrayOfLines to a multiline string.
Container splitString(const typename Container::value_type &string, const typename Container::value_type &delimiter, EmptyPartsTreat emptyPartsRole=EmptyPartsTreat::Keep, int maxParts=-1)
Splits the given string at the specified delimiter.
CPP_UTILITIES_EXPORT std::string dataSizeToString(uint64 sizeInByte, bool includeByte=false)
Converts the specified data size in byte to its equivalent std::string representation.
#define CPP_UTILITIES_EXPORT
Marks the symbol to be exported by the c++utilities library.
CPP_UTILITIES_EXPORT std::pair< std::unique_ptr< byte[]>, uint32 > decodeBase64(const char *encodedStr, const uint32 strSize)
Decodes the specified Base64 encoded string.
constexpr CharType digitToChar(CharType digit)
Returns the character representation of the specified digit.
CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits=false)
Converts the specified bitrate in kbit/s to its equivalent std::string representation.