C++ Utilities  4.17.1
Useful C++ classes and routines such as argument parser, IO and conversion utilities
stringconversion.cpp
Go to the documentation of this file.
1 #include "./stringconversion.h"
2 
3 #ifndef CPP_UTILITIES_NO_THREAD_LOCAL
4 #include "resources/features.h"
5 #else
6 #define CPP_UTILITIES_THREAD_LOCAL
7 #endif
8 
9 #include <cmath>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <limits>
13 #include <memory>
14 #include <sstream>
15 
16 #include <errno.h>
17 #include <iconv.h>
18 
19 #ifdef PLATFORM_WINDOWS
20 #include <windows.h>
21 #endif
22 
23 using namespace std;
24 
32 namespace ConversionUtilities {
33 
35 
36 struct Keep {
37  size_t operator()(size_t value)
38  {
39  return value;
40  }
41 };
42 struct Double {
43  size_t operator()(size_t value)
44  {
45  return value + value;
46  }
47 };
48 struct Half {
49  size_t operator()(size_t value)
50  {
51  return value / 2;
52  }
53 };
54 struct Factor {
55  Factor(float factor)
56  : factor(factor){};
57  size_t operator()(size_t value)
58  {
59  return static_cast<size_t>(value * factor);
60  }
61  float factor;
62 };
63 
64 template <class OutputSizeHint> class ConversionDescriptor {
65 public:
66  ConversionDescriptor(const char *fromCharset, const char *toCharset)
67  : m_ptr(iconv_open(toCharset, fromCharset))
68  , m_outputSizeHint(OutputSizeHint())
69  {
70  if (m_ptr == reinterpret_cast<iconv_t>(-1)) {
71  throw ConversionException("Unable to allocate descriptor for character set conversion.");
72  }
73  }
74 
75  ConversionDescriptor(const char *fromCharset, const char *toCharset, OutputSizeHint outputSizeHint)
76  : m_ptr(iconv_open(toCharset, fromCharset))
77  , m_outputSizeHint(outputSizeHint)
78  {
79  if (m_ptr == reinterpret_cast<iconv_t>(-1)) {
80  throw ConversionException("Unable to allocate descriptor for character set conversion.");
81  }
82  }
83 
84  ~ConversionDescriptor()
85  {
86  iconv_close(m_ptr);
87  }
88 
89 public:
90  StringData convertString(const char *inputBuffer, size_t inputBufferSize)
91  {
92  // setup input and output buffer
93  size_t inputBytesLeft = inputBufferSize;
94  size_t outputSize = m_outputSizeHint(inputBufferSize);
95  size_t outputBytesLeft = outputSize;
96  char *outputBuffer = reinterpret_cast<char *>(malloc(outputSize));
97  size_t bytesWritten;
98 
99  char *currentOutputOffset = outputBuffer;
100  for (;; currentOutputOffset = outputBuffer + bytesWritten) {
101  bytesWritten = iconv(m_ptr, const_cast<char **>(&inputBuffer), &inputBytesLeft, &currentOutputOffset, &outputBytesLeft);
102  if (bytesWritten == static_cast<size_t>(-1)) {
103  if (errno == EINVAL) {
104  // ignore incomplete multibyte sequence in the input
105  bytesWritten = static_cast<size_t>(currentOutputOffset - outputBuffer);
106  break;
107  } else if (errno == E2BIG) {
108  // output buffer has no more room for next converted character
109  bytesWritten = static_cast<size_t>(currentOutputOffset - outputBuffer);
110  outputBytesLeft = (outputSize += m_outputSizeHint(inputBytesLeft)) - bytesWritten;
111  outputBuffer = reinterpret_cast<char *>(realloc(outputBuffer, outputSize));
112  } else /*if(errno == EILSEQ)*/ {
113  // invalid multibyte sequence in the input
114  free(outputBuffer);
115  throw ConversionException("Invalid multibyte sequence in the input.");
116  }
117  } else {
118  // conversion completed without (further) errors
119  break;
120  }
121  }
122  return StringData(std::unique_ptr<char[], StringDataDeleter>(outputBuffer), currentOutputOffset - outputBuffer);
123  }
124 
125 private:
126  iconv_t m_ptr;
127  OutputSizeHint m_outputSizeHint;
128 };
129 
131 
142  const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor)
143 {
144  return ConversionDescriptor<Factor>(fromCharset, toCharset, outputBufferSizeFactor).convertString(inputBuffer, inputBufferSize);
145 }
146 
150 StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
151 {
152  CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Double> descriptor("UTF-8", "UTF-16LE");
153  return descriptor.convertString(inputBuffer, inputBufferSize);
154 }
155 
159 StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
160 {
161  CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Half> descriptor("UTF-16LE", "UTF-8");
162  return descriptor.convertString(inputBuffer, inputBufferSize);
163 }
164 
168 StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
169 {
170  CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Double> descriptor("UTF-8", "UTF-16BE");
171  return descriptor.convertString(inputBuffer, inputBufferSize);
172 }
173 
177 StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
178 {
179  CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Half> descriptor("UTF-16BE", "UTF-8");
180  return descriptor.convertString(inputBuffer, inputBufferSize);
181 }
182 
186 StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
187 {
188  CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Keep> descriptor("ISO-8859-1", "UTF-8");
189  return descriptor.convertString(inputBuffer, inputBufferSize);
190 }
191 
195 StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
196 {
197  CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Keep> descriptor("UTF-8", "ISO-8859-1");
198  return descriptor.convertString(inputBuffer, inputBufferSize);
199 }
200 
201 #ifdef PLATFORM_WINDOWS
202 
208 WideStringData convertMultiByteToWide(const char *inputBuffer, int inputBufferSize)
209 {
210  // calculate required size
211  WideStringData widePath;
212  widePath.second = MultiByteToWideChar(CP_UTF8, 0, inputBuffer, inputBufferSize, nullptr, 0);
213  if (widePath.second <= 0) {
214  return widePath;
215  }
216  // do the actual conversion
217  widePath.first = make_unique<wchar_t[]>(static_cast<size_t>(widePath.second));
218  widePath.second = MultiByteToWideChar(CP_UTF8, 0, inputBuffer, inputBufferSize, widePath.first.get(), widePath.second);
219  if (widePath.second <= 0) {
220  widePath.first.reset();
221  }
222  return widePath;
223 }
224 
229 WideStringData convertMultiByteToWide(const std::string &inputBuffer)
230 {
231  return convertMultiByteToWide(
232  inputBuffer.data(), inputBuffer.size() < (numeric_limits<int>::max() - 1) ? static_cast<int>(inputBuffer.size() + 1) : -1);
233 }
234 #endif
235 
240 void truncateString(string &str, char terminationChar)
241 {
242  string::size_type firstNullByte = str.find(terminationChar);
243  if (firstNullByte != string::npos) {
244  str.resize(firstNullByte);
245  }
246 }
247 
253 string dataSizeToString(uint64 sizeInByte, bool includeByte)
254 {
255  stringstream res(stringstream::in | stringstream::out);
256  res.setf(ios::fixed, ios::floatfield);
257  res << setprecision(2);
258  if (sizeInByte < 1024LL) {
259  res << sizeInByte << " bytes";
260  } else if (sizeInByte < 1048576LL) {
261  res << (static_cast<double>(sizeInByte) / 1024.0) << " KiB";
262  } else if (sizeInByte < 1073741824LL) {
263  res << (static_cast<double>(sizeInByte) / 1048576.0) << " MiB";
264  } else if (sizeInByte < 1099511627776LL) {
265  res << (static_cast<double>(sizeInByte) / 1073741824.0) << " GiB";
266  } else {
267  res << (static_cast<double>(sizeInByte) / 1099511627776.0) << " TiB";
268  }
269  if (includeByte && sizeInByte > 1024LL) {
270  res << ' ' << '(' << sizeInByte << " byte)";
271  }
272  return res.str();
273 }
274 
285 string bitrateToString(double bitrateInKbitsPerSecond, bool useIecBinaryPrefixes)
286 {
287  stringstream res(stringstream::in | stringstream::out);
288  res << setprecision(3);
289  if (std::isnan(bitrateInKbitsPerSecond)) {
290  res << "indeterminable";
291  } else if (useIecBinaryPrefixes) {
292  if (bitrateInKbitsPerSecond < 8.0) {
293  res << (bitrateInKbitsPerSecond * 125.0) << " byte/s";
294  } else if (bitrateInKbitsPerSecond < 8000.0) {
295  res << (bitrateInKbitsPerSecond * 0.125) << " KiB/s";
296  } else if (bitrateInKbitsPerSecond < 8000000.0) {
297  res << (bitrateInKbitsPerSecond * 0.000125) << " MiB/s";
298  } else {
299  res << (bitrateInKbitsPerSecond * 0.000000125) << " GiB/s";
300  }
301  } else {
302  if (bitrateInKbitsPerSecond < 1.0) {
303  res << (bitrateInKbitsPerSecond * 1000.0) << " bit/s";
304  } else if (bitrateInKbitsPerSecond < 1000.0) {
305  res << (bitrateInKbitsPerSecond) << " kbit/s";
306  } else if (bitrateInKbitsPerSecond < 1000000.0) {
307  res << (bitrateInKbitsPerSecond * 0.001) << " Mbit/s";
308  } else {
309  res << (bitrateInKbitsPerSecond * 0.000001) << " Gbit/s";
310  }
311  }
312  return res.str();
313 }
314 
316 const char *const base64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
317 const char base64Pad = '=';
319 
324 string encodeBase64(const byte *data, uint32 dataSize)
325 {
326  string encoded;
327  byte mod = dataSize % 3;
328  encoded.reserve(((dataSize / 3) + (mod > 0)) * 4);
329  uint32 temp;
330  for (const byte *end = --data + dataSize - mod; data != end;) {
331  temp = *++data << 16;
332  temp |= *++data << 8;
333  temp |= *++data;
334  encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
335  encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
336  encoded.push_back(base64Chars[(temp & 0x00000FC0) >> 6]);
337  encoded.push_back(base64Chars[(temp & 0x0000003F)]);
338  }
339  switch (mod) {
340  case 1:
341  temp = *++data << 16;
342  encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
343  encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
344  encoded.push_back(base64Pad);
345  encoded.push_back(base64Pad);
346  break;
347  case 2:
348  temp = *++data << 16;
349  temp |= *++data << 8;
350  encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
351  encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
352  encoded.push_back(base64Chars[(temp & 0x00000FC0) >> 6]);
353  encoded.push_back(base64Pad);
354  break;
355  }
356  return encoded;
357 }
358 
364 pair<unique_ptr<byte[]>, uint32> decodeBase64(const char *encodedStr, const uint32 strSize)
365 {
366  if (strSize % 4) {
367  throw ConversionException("invalid size of base64");
368  }
369  uint32 decodedSize = (strSize / 4) * 3;
370  const char *const end = encodedStr + strSize;
371  if (strSize) {
372  if (*(end - 1) == base64Pad) {
373  --decodedSize;
374  }
375  if (*(end - 2) == base64Pad) {
376  --decodedSize;
377  }
378  }
379  auto buffer = make_unique<byte[]>(decodedSize);
380  auto *iter = buffer.get() - 1;
381  while (encodedStr < end) {
382  int32 temp = 0;
383  for (byte quantumPos = 0; quantumPos < 4; ++quantumPos, ++encodedStr) {
384  temp <<= 6;
385  if (*encodedStr >= 'A' && *encodedStr <= 'Z') {
386  temp |= *encodedStr - 'A';
387  } else if (*encodedStr >= 'a' && *encodedStr <= 'z') {
388  temp |= *encodedStr - 'a' + 26;
389  } else if (*encodedStr >= '0' && *encodedStr <= '9') {
390  temp |= *encodedStr - '0' + 2 * 26;
391  } else if (*encodedStr == '+') {
392  temp |= 2 * 26 + 10;
393  } else if (*encodedStr == '/') {
394  temp |= 2 * 26 + 10 + 1;
395  } else if (*encodedStr == base64Pad) {
396  switch (end - encodedStr) {
397  case 1:
398  *++iter = (temp >> 16) & 0xFF;
399  *++iter = (temp >> 8) & 0xFF;
400  return make_pair(move(buffer), decodedSize);
401  case 2:
402  *++iter = (temp >> 10) & 0xFF;
403  return make_pair(move(buffer), decodedSize);
404  default:
405  throw ConversionException("invalid padding in base64");
406  }
407  } else {
408  throw ConversionException("invalid character in base64");
409  }
410  }
411  *++iter = (temp >> 16) & 0xFF;
412  *++iter = (temp >> 8) & 0xFF;
413  *++iter = (temp)&0xFF;
414  }
415  return make_pair(move(buffer), decodedSize);
416 }
417 } // namespace ConversionUtilities
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (little-endian).
CPP_UTILITIES_EXPORT std::string encodeBase64(const byte *data, uint32 dataSize)
Encodes the specified data to Base64.
std::pair< std::unique_ptr< char[], StringDataDeleter >, std::size_t > StringData
Type used to return string encoding conversion result.
The ConversionException class is thrown by the various conversion functions of this library when a co...
CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (big-endian) string to UTF-8.
CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar='\0')
Truncates all characters after the first occurrence of the specified terminationChar and the terminat...
std::uint64_t uint64
unsigned 64-bit integer
Definition: types.h:49
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (big-endian).
constexpr T max(T first, T second)
Returns the greatest of the given items.
Definition: math.h:29
CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to Latin-1.
Contains several functions providing conversions between different data types.
std::uint32_t uint32
unsigned 32-bit integer
Definition: types.h:44
CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (little-endian) string to UTF-8.
std::int32_t int32
signed 32-bit integer
Definition: types.h:24
CPP_UTILITIES_EXPORT StringData convertString(const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor=1.0f)
Converts the specified string from one character set to another.
CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified Latin-1 string to UTF-8.
CPP_UTILITIES_EXPORT std::string dataSizeToString(uint64 sizeInByte, bool includeByte=false)
Converts the specified data size in byte to its equivalent std::string representation.
CPP_UTILITIES_EXPORT std::pair< std::unique_ptr< byte[]>, uint32 > decodeBase64(const char *encodedStr, const uint32 strSize)
Decodes the specified Base64 encoded string.
CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits=false)
Converts the specified bitrate in kbit/s to its equivalent std::string representation.