C++ Utilities  4.15.0
Useful C++ classes and routines such as argument parser, IO and conversion utilities
stringconversion.h
Go to the documentation of this file.
1 #ifndef CONVERSION_UTILITIES_STRINGCONVERSION_H
2 #define CONVERSION_UTILITIES_STRINGCONVERSION_H
3 
4 #include "./binaryconversion.h"
6 
7 #include "../misc/traits.h"
8 
9 #include <cstdlib>
10 #include <cstring>
11 #include <initializer_list>
12 #include <iomanip>
13 #include <list>
14 #include <memory>
15 #include <sstream>
16 #include <string>
17 #include <vector>
18 
19 namespace ConversionUtilities {
20 
29  void operator()(char *stringData)
30  {
31  std::free(stringData);
32  }
33 };
34 
38 using StringData = std::pair<std::unique_ptr<char[], StringDataDeleter>, std::size_t>;
39 //using StringData = std::pair<std::unique_ptr<char>, std::size_t>; // might work too
40 
42  const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor = 1.0f);
43 CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize);
44 CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize);
45 CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize);
46 CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize);
47 CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize);
48 CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize);
49 
50 CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar = '\0');
51 
65 template <class Container = std::initializer_list<std::string>>
66 typename Container::value_type joinStrings(const Container &strings,
67  const typename Container::value_type &delimiter = typename Container::value_type(), bool omitEmpty = false,
68  const typename Container::value_type &leftClosure = typename Container::value_type(),
69  const typename Container::value_type &rightClosure = typename Container::value_type())
70 {
71  typename Container::value_type res;
72  if (!strings.size()) {
73  return res;
74  }
75  std::size_t entries = 0, size = 0;
76  for (const auto &str : strings) {
77  if (omitEmpty && str.empty()) {
78  continue;
79  }
80  size += str.size();
81  ++entries;
82  }
83  if (!entries) {
84  return res;
85  }
86  size += (entries * leftClosure.size()) + (entries * rightClosure.size()) + ((entries - 1) * delimiter.size());
87  res.reserve(size);
88  for (const auto &str : strings) {
89  if (omitEmpty && str.empty()) {
90  continue;
91  }
92  if (!res.empty()) {
93  res.append(delimiter);
94  }
95  res.append(leftClosure);
96  res.append(str);
97  res.append(rightClosure);
98  }
99  return res;
100 }
101 
105 template <class Container = std::initializer_list<std::string>> inline std::vector<std::string> toMultiline(const Container &arrayOfLines)
106 {
107  return joinStrings(arrayOfLines, "\n", false);
108 }
109 
113 enum class EmptyPartsTreat {
114  Keep,
115  Omit,
116  Merge
117 };
118 
128 template <class Container = std::list<std::string>>
129 Container splitString(const typename Container::value_type &string, const typename Container::value_type &delimiter,
130  EmptyPartsTreat emptyPartsRole = EmptyPartsTreat::Keep, int maxParts = -1)
131 {
132  --maxParts;
133  Container res;
134  bool merge = false;
135  for (typename Container::value_type::size_type i = 0, end = string.size(), delimPos; i < end; i = delimPos + delimiter.size()) {
136  delimPos = string.find(delimiter, i);
137  if (!merge && maxParts >= 0 && res.size() == static_cast<typename Container::value_type::size_type>(maxParts)) {
138  if (delimPos == i && emptyPartsRole == EmptyPartsTreat::Merge) {
139  if (!res.empty()) {
140  merge = true;
141  continue;
142  }
143  }
144  delimPos = Container::value_type::npos;
145  }
146  if (delimPos == Container::value_type::npos) {
147  delimPos = string.size();
148  }
149  if (emptyPartsRole == EmptyPartsTreat::Keep || i != delimPos) {
150  if (merge) {
151  res.back().append(delimiter);
152  res.back().append(string.substr(i, delimPos - i));
153  merge = false;
154  } else {
155  res.emplace_back(string.substr(i, delimPos - i));
156  }
157  } else if (emptyPartsRole == EmptyPartsTreat::Merge) {
158  if (!res.empty()) {
159  merge = true;
160  }
161  }
162  }
163  return res;
164 }
165 
175 template <class Container = std::list<std::string>>
176 Container splitStringSimple(const typename Container::value_type &string, const typename Container::value_type &delimiter, int maxParts = -1)
177 {
178  --maxParts;
179  Container res;
180  for (typename Container::value_type::size_type i = 0, end = string.size(), delimPos; i < end; i = delimPos + delimiter.size()) {
181  delimPos = string.find(delimiter, i);
182  if (maxParts >= 0 && res.size() == static_cast<typename Container::value_type::size_type>(maxParts)) {
183  delimPos = Container::value_type::npos;
184  }
185  if (delimPos == Container::value_type::npos) {
186  delimPos = string.size();
187  }
188  res.emplace_back(string.substr(i, delimPos - i));
189  }
190  return res;
191 }
192 
196 template <class Container = std::vector<std::string>> inline std::vector<std::string> toArrayOfLines(const std::string &multilineString)
197 {
198  return splitString<Container>(multilineString, "\n", EmptyPartsTreat::Keep);
199 }
200 
204 template <typename StringType> bool startsWith(const StringType &str, const StringType &phrase)
205 {
206  if (str.size() < phrase.size()) {
207  return false;
208  }
209  for (auto stri = str.cbegin(), strend = str.cend(), phrasei = phrase.cbegin(), phraseend = phrase.cend(); stri != strend; ++stri, ++phrasei) {
210  if (phrasei == phraseend) {
211  return true;
212  } else if (*stri != *phrasei) {
213  return false;
214  }
215  }
216  return false;
217 }
218 
222 template <typename StringType> bool startsWith(const StringType &str, const typename StringType::value_type *phrase)
223 {
224  for (auto stri = str.cbegin(), strend = str.cend(); stri != strend; ++stri, ++phrase) {
225  if (!*phrase) {
226  return true;
227  } else if (*stri != *phrase) {
228  return false;
229  }
230  }
231  return false;
232 }
233 
238 template <typename StringType> bool containsSubstrings(const StringType &str, std::initializer_list<StringType> substrings)
239 {
240  typename StringType::size_type currentPos = 0;
241  for (const auto &substr : substrings) {
242  if ((currentPos = str.find(substr, currentPos)) == StringType::npos) {
243  return false;
244  }
245  currentPos += substr.size();
246  }
247  return true;
248 }
249 
254 template <typename StringType>
255 bool containsSubstrings(const StringType &str, std::initializer_list<const typename StringType::value_type *> substrings)
256 {
257  typename StringType::size_type currentPos = 0;
258  for (const auto *substr : substrings) {
259  if ((currentPos = str.find(substr, currentPos)) == StringType::npos) {
260  return false;
261  }
262  currentPos += std::strlen(substr);
263  }
264  return true;
265 }
266 
270 template <typename StringType> void findAndReplace(StringType &str, const StringType &find, const StringType &replace)
271 {
272  for (typename StringType::size_type i = 0; (i = str.find(find, i)) != StringType::npos; i += replace.size()) {
273  str.replace(i, find.size(), replace);
274  }
275 }
276 
283 template <typename CharType> constexpr CharType digitToChar(CharType digit)
284 {
285  return digit <= 9 ? (digit + '0') : (digit + 'A' - 10);
286 }
287 
294 template <typename IntegralType, class StringType = std::string,
295  Traits::EnableIf<std::is_integral<IntegralType>, std::is_unsigned<IntegralType>> * = nullptr>
296 StringType numberToString(IntegralType number, typename StringType::value_type base = 10)
297 {
298  std::size_t resSize = 0;
299  for (auto n = number; n; n /= base, ++resSize)
300  ;
301  StringType res;
302  res.reserve(resSize);
303  do {
304  res.insert(res.begin(), digitToChar<typename StringType::value_type>(number % base));
305  number /= base;
306  } while (number);
307  return res;
308 }
309 
316 template <typename IntegralType, class StringType = std::string,
317  Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType>> * = nullptr>
318 StringType numberToString(IntegralType number, typename StringType::value_type base = 10)
319 {
320  const bool negative = number < 0;
321  std::size_t resSize;
322  if (negative) {
323  number = -number, resSize = 1;
324  } else {
325  resSize = 0;
326  }
327  for (auto n = number; n; n /= base, ++resSize)
328  ;
329  StringType res;
330  res.reserve(resSize);
331  do {
332  res.insert(res.begin(), digitToChar<typename StringType::value_type>(number % base));
333  number /= base;
334  } while (number);
335  if (negative) {
336  res.insert(res.begin(), '-');
337  }
338  return res;
339 }
340 
349 template <typename FloatingType, class StringType = std::string, Traits::EnableIf<std::is_floating_point<FloatingType>> * = nullptr>
350 StringType numberToString(FloatingType number, typename StringType::value_type base = 10)
351 {
352  std::basic_stringstream<typename StringType::value_type> ss;
353  ss << std::setbase(base) << number;
354  return ss.str();
355 }
356 
362 template <typename CharType> CharType charToDigit(CharType character, CharType base)
363 {
364  CharType res = base;
365  if (character >= '0' && character <= '9') {
366  res = character - '0';
367  } else if (character >= 'a' && character <= 'z') {
368  res = character - 'a' + 10;
369  } else if (character >= 'A' && character <= 'Z') {
370  res = character - 'A' + 10;
371  }
372  if (res < base) {
373  return res;
374  }
375  std::string errorMsg;
376  errorMsg.reserve(36);
377  errorMsg += "The character \"";
378  errorMsg += character;
379  errorMsg += "\" is no valid digit.";
380  throw ConversionException(std::move(errorMsg));
381 }
382 
391 template <typename IntegralType, typename StringType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_unsigned<IntegralType>> * = nullptr>
392 IntegralType stringToNumber(const StringType &string, typename StringType::value_type base = 10)
393 {
394  IntegralType result = 0;
395  for (const auto &c : string) {
396  if (c == ' ') {
397  continue;
398  }
399  result *= base;
400  result += charToDigit<typename StringType::value_type>(c, base);
401  }
402  return result;
403 }
404 
413 template <typename IntegralType, class StringType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType>> * = nullptr>
414 IntegralType stringToNumber(const StringType &string, typename StringType::value_type base = 10)
415 {
416  auto i = string.begin();
417  auto end = string.end();
418  for (; i != end && *i == ' '; ++i)
419  ;
420  if (i == end) {
421  return 0;
422  }
423  const bool negative = (*i == '-');
424  if (negative) {
425  ++i;
426  }
427  IntegralType result = 0;
428  for (; i != end; ++i) {
429  if (*i == ' ') {
430  continue;
431  }
432  result *= base;
433  result += charToDigit<typename StringType::value_type>(*i, base);
434  }
435  return negative ? -result : result;
436 }
437 
448 template <typename FloatingType, class StringType, Traits::EnableIf<std::is_floating_point<FloatingType>> * = nullptr>
449 FloatingType stringToNumber(const StringType &string, typename StringType::value_type base = 10)
450 {
451  std::basic_stringstream<typename StringType::value_type> ss;
452  ss << std::setbase(base) << string;
453  FloatingType result;
454  if ((ss >> result) && ss.eof()) {
455  return result;
456  }
457  std::string errorMsg;
458  errorMsg.reserve(42 + string.size());
459  errorMsg += "The string \"";
460  errorMsg += string;
461  errorMsg += "\" is no valid floating number.";
462  throw ConversionException(errorMsg);
463 }
464 
473 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_unsigned<IntegralType>> * = nullptr>
474 IntegralType stringToNumber(const CharType *string, unsigned char base = 10)
475 {
476  IntegralType result = 0;
477  for (; *string; ++string) {
478  if (*string == ' ') {
479  continue;
480  }
481  result *= base;
482  result += charToDigit<CharType>(*string, base);
483  }
484  return result;
485 }
486 
497 template <typename FloatingType, class CharType, Traits::EnableIf<std::is_floating_point<FloatingType>> * = nullptr>
498 FloatingType stringToNumber(const CharType *string, unsigned char base = 10)
499 {
500  std::basic_stringstream<CharType> ss;
501  ss << std::setbase(base) << string;
502  FloatingType result;
503  if ((ss >> result) && ss.eof()) {
504  return result;
505  }
506  std::string errorMsg;
507  errorMsg.reserve(42 + std::char_traits<CharType>::length(string));
508  errorMsg += "The string \"";
509  errorMsg += string;
510  errorMsg += "\" is no valid floating number.";
511  throw ConversionException(errorMsg);
512 }
513 
521 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_unsigned<IntegralType>> * = nullptr>
522 IntegralType bufferToNumber(const CharType *string, std::size_t size, unsigned char base = 10)
523 {
524  IntegralType result = 0;
525  for (const CharType *end = string + size; string != end; ++string) {
526  if (*string == ' ') {
527  continue;
528  }
529  result *= base;
530  result += charToDigit<CharType>(*string, base);
531  }
532  return result;
533 }
534 
542 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType>> * = nullptr>
543 IntegralType stringToNumber(const CharType *string, unsigned char base = 10)
544 {
545  if (!*string) {
546  return 0;
547  }
548  for (; *string && *string == ' '; ++string)
549  ;
550  if (!*string) {
551  return 0;
552  }
553  const bool negative = (*string == '-');
554  if (negative) {
555  ++string;
556  }
557  IntegralType result = 0;
558  for (; *string; ++string) {
559  if (*string == ' ') {
560  continue;
561  }
562  result *= base;
563  result += charToDigit<CharType>(*string, base);
564  }
565  return negative ? -result : result;
566 }
567 
575 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType>> * = nullptr>
576 IntegralType bufferToNumber(const CharType *string, std::size_t size, unsigned char base = 10)
577 {
578  if (!size) {
579  return 0;
580  }
581  const CharType *end = string + size;
582  for (; string != end && *string == ' '; ++string)
583  ;
584  if (string == end) {
585  return 0;
586  }
587  const bool negative = (*string == '-');
588  if (negative) {
589  ++string;
590  }
591  IntegralType result = 0;
592  for (; string != end; ++string) {
593  if (*string == ' ') {
594  continue;
595  }
596  result *= base;
597  result += charToDigit<CharType>(*string, base);
598  }
599  return negative ? -result : result;
600 }
601 
611 template <typename T> std::string interpretIntegerAsString(T integer, int startOffset = 0)
612 {
613  char buffer[sizeof(T)];
614  ConversionUtilities::BE::getBytes(integer, buffer);
615  return std::string(buffer + startOffset, sizeof(T) - startOffset);
616 }
617 
618 CPP_UTILITIES_EXPORT std::string dataSizeToString(uint64 sizeInByte, bool includeByte = false);
619 CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits = false);
620 CPP_UTILITIES_EXPORT std::string encodeBase64(const byte *data, uint32 dataSize);
621 CPP_UTILITIES_EXPORT std::pair<std::unique_ptr<byte[]>, uint32> decodeBase64(const char *encodedStr, const uint32 strSize);
622 } // namespace ConversionUtilities
623 
624 #endif // CONVERSION_UTILITIES_STRINGCONVERSION_H
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (little-endian).
bool startsWith(const StringType &str, const StringType &phrase)
Returns whether str starts with phrase.
void operator()(char *stringData)
Deletes the specified stringData with std::free(), because the memory has been allocated using std::m...
CPP_UTILITIES_EXPORT std::string encodeBase64(const byte *data, uint32 dataSize)
Encodes the specified data to Base64.
std::pair< std::unique_ptr< char[], StringDataDeleter >, std::size_t > StringData
Type used to return string encoding conversion result.
IntegralType stringToNumber(const StringType &string, typename StringType::value_type base=10)
Converts the given string to an unsigned number assuming string uses the specified base...
Container splitStringSimple(const typename Container::value_type &string, const typename Container::value_type &delimiter, int maxParts=-1)
Splits the given string (which might also be a string view) at the specified delimiter.
bool containsSubstrings(const StringType &str, std::initializer_list< StringType > substrings)
Returns whether str contains the specified substrings.
The ConversionException class is thrown by the various conversion functions of this library when a co...
std::vector< std::string > toArrayOfLines(const std::string &multilineString)
Converts the specified multilineString to an array of lines.
CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (big-endian) string to UTF-8.
CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar='\0')
Truncates all characters after the first occurrence of the specified terminationChar and the terminat...
StringType numberToString(IntegralType number, typename StringType::value_type base=10)
Converts the given number to its equivalent string representation using the specified base...
std::uint64_t uint64
unsigned 64-bit integer
Definition: types.h:49
The StringDataDeleter struct deletes the data of a StringData instance.
typename std::enable_if< All< Condition... >::value, Detail::Enabler >::type EnableIf
Shortcut for std::enable_if to omit ::value and ::type.
Definition: traits.h:48
std::string interpretIntegerAsString(T integer, int startOffset=0)
Interprets the given integer at the specified position as std::string using the specified byte order...
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (big-endian).
EmptyPartsTreat
Specifies the role of empty parts when splitting strings.
constexpr int i
Definition: traitstests.cpp:97
CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to Latin-1.
void findAndReplace(StringType &str, const StringType &find, const StringType &replace)
Replaces all occurences of find with relpace in the specified str.
Contains several functions providing conversions between different data types.
std::uint32_t uint32
unsigned 32-bit integer
Definition: types.h:44
IntegralType bufferToNumber(const CharType *string, std::size_t size, unsigned char base=10)
Converts the given string of size characters to an unsigned numeric value using the specified base...
CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (little-endian) string to UTF-8.
CharType charToDigit(CharType character, CharType base)
Returns number/digit of the specified character representation using the specified base...
Container::value_type joinStrings(const Container &strings, const typename Container::value_type &delimiter=typename Container::value_type(), bool omitEmpty=false, const typename Container::value_type &leftClosure=typename Container::value_type(), const typename Container::value_type &rightClosure=typename Container::value_type())
Joins the given strings using the specified delimiter.
CPP_UTILITIES_EXPORT StringData convertString(const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor=1.0f)
Converts the specified string from one character set to another.
std::uint8_t byte
unsigned byte
Definition: types.h:14
CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified Latin-1 string to UTF-8.
std::vector< std::string > toMultiline(const Container &arrayOfLines)
Converts the specified arrayOfLines to a multiline string.
Container splitString(const typename Container::value_type &string, const typename Container::value_type &delimiter, EmptyPartsTreat emptyPartsRole=EmptyPartsTreat::Keep, int maxParts=-1)
Splits the given string at the specified delimiter.
CPP_UTILITIES_EXPORT std::string dataSizeToString(uint64 sizeInByte, bool includeByte=false)
Converts the specified data size in byte to its equivalent std::string representation.
#define CPP_UTILITIES_EXPORT
Marks the symbol to be exported by the c++utilities library.
CPP_UTILITIES_EXPORT std::pair< std::unique_ptr< byte[]>, uint32 > decodeBase64(const char *encodedStr, const uint32 strSize)
Decodes the specified Base64 encoded string.
constexpr CharType digitToChar(CharType digit)
Returns the character representation of the specified digit.
CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits=false)
Converts the specified bitrate in kbit/s to its equivalent std::string representation.