C++ Utilities 5.22.0
Useful C++ classes and routines such as argument parser, IO and conversion utilities
Loading...
Searching...
No Matches
stringconversion.cpp
Go to the documentation of this file.
2
3#ifndef CPP_UTILITIES_NO_THREAD_LOCAL
4#include "../feature_detection/features.h"
5#endif
6
7#ifndef CPP_UTILITIES_THREAD_LOCAL
8#define CPP_UTILITIES_THREAD_LOCAL
9#endif
10
11#include <cmath>
12#include <cstdlib>
13#include <iomanip>
14#include <limits>
15#include <memory>
16#include <sstream>
17
18#include <errno.h>
19#include <iconv.h>
20
21#ifdef PLATFORM_WINDOWS
22#include <windows.h>
23// note: The windows header seriously defines a macro called "max" breaking the (common) use
24// of std::numeric_limits in the subsequent code. So we need to undefine this macro. Note that
25// this is not the case using mingw-w64 but it is happening with windows.h from Windows Kits
26// version 10.0.22000.0 via Visual Studio 2022.
27#ifdef max
28#undef max
29#endif
30#endif
31
32using namespace std;
33
34namespace CppUtilities {
35
37
38struct Keep {
39 size_t operator()(size_t value)
40 {
41 return value;
42 }
43};
44struct Double {
45 size_t operator()(size_t value)
46 {
47 return value + value;
48 }
49};
50struct Half {
51 size_t operator()(size_t value)
52 {
53 return value / 2;
54 }
55};
56struct Factor {
57 Factor(float factor)
58 : factor(factor){};
59 size_t operator()(size_t value)
60 {
61 return static_cast<size_t>(static_cast<float>(value) * factor);
62 }
63 float factor;
64};
65
66template <class OutputSizeHint> class ConversionDescriptor {
67public:
68 ConversionDescriptor(const char *fromCharset, const char *toCharset)
69 : m_ptr(iconv_open(toCharset, fromCharset))
70 , m_outputSizeHint(OutputSizeHint())
71 {
72 if (m_ptr == reinterpret_cast<iconv_t>(-1)) {
73 throw ConversionException("Unable to allocate descriptor for character set conversion.");
74 }
75 }
76
77 ConversionDescriptor(const char *fromCharset, const char *toCharset, OutputSizeHint outputSizeHint)
78 : m_ptr(iconv_open(toCharset, fromCharset))
79 , m_outputSizeHint(outputSizeHint)
80 {
81 if (m_ptr == reinterpret_cast<iconv_t>(-1)) {
82 throw ConversionException("Unable to allocate descriptor for character set conversion.");
83 }
84 }
85
86 ~ConversionDescriptor()
87 {
88 iconv_close(m_ptr);
89 }
90
91public:
92 StringData convertString(const char *inputBuffer, size_t inputBufferSize)
93 {
94 // setup input and output buffer
95 size_t inputBytesLeft = inputBufferSize;
96 size_t outputSize = m_outputSizeHint(inputBufferSize);
97 size_t outputBytesLeft = outputSize;
98 char *outputBuffer = reinterpret_cast<char *>(malloc(outputSize));
99 size_t bytesWritten;
100
101 char *currentOutputOffset = outputBuffer;
102 for (;; currentOutputOffset = outputBuffer + bytesWritten) {
103 bytesWritten = iconv(m_ptr, const_cast<char **>(&inputBuffer), &inputBytesLeft, &currentOutputOffset, &outputBytesLeft);
104 if (bytesWritten == static_cast<size_t>(-1)) {
105 if (errno == EINVAL) {
106 // ignore incomplete multibyte sequence in the input
107 bytesWritten = static_cast<size_t>(currentOutputOffset - outputBuffer);
108 break;
109 } else if (errno == E2BIG) {
110 // output buffer has no more room for next converted character
111 bytesWritten = static_cast<size_t>(currentOutputOffset - outputBuffer);
112 outputBytesLeft = (outputSize += m_outputSizeHint(inputBytesLeft)) - bytesWritten;
113 outputBuffer = reinterpret_cast<char *>(realloc(outputBuffer, outputSize));
114 } else /*if(errno == EILSEQ)*/ {
115 // invalid multibyte sequence in the input
116 free(outputBuffer);
117 throw ConversionException("Invalid multibyte sequence in the input.");
118 }
119 } else {
120 // conversion completed without (further) errors
121 break;
122 }
123 }
124 return StringData(std::unique_ptr<char[], StringDataDeleter>(outputBuffer), currentOutputOffset - outputBuffer);
125 }
126
127private:
128 iconv_t m_ptr;
129 OutputSizeHint m_outputSizeHint;
130};
131
133
144 const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor)
145{
146 return ConversionDescriptor<Factor>(fromCharset, toCharset, outputBufferSizeFactor).convertString(inputBuffer, inputBufferSize);
147}
148
152StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
153{
154 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Double> descriptor("UTF-8", "UTF-16LE");
155 return descriptor.convertString(inputBuffer, inputBufferSize);
156}
157
161StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
162{
163 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Half> descriptor("UTF-16LE", "UTF-8");
164 return descriptor.convertString(inputBuffer, inputBufferSize);
165}
166
170StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
171{
172 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Double> descriptor("UTF-8", "UTF-16BE");
173 return descriptor.convertString(inputBuffer, inputBufferSize);
174}
175
179StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
180{
181 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Half> descriptor("UTF-16BE", "UTF-8");
182 return descriptor.convertString(inputBuffer, inputBufferSize);
183}
184
188StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
189{
190 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Keep> descriptor("ISO-8859-1", "UTF-8");
191 return descriptor.convertString(inputBuffer, inputBufferSize);
192}
193
197StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
198{
199 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Keep> descriptor("UTF-8", "ISO-8859-1");
200 return descriptor.convertString(inputBuffer, inputBufferSize);
201}
202
203#ifdef PLATFORM_WINDOWS
208std::wstring convertMultiByteToWide(std::error_code &ec, std::string_view inputBuffer)
209{
210 // calculate required size
211 auto widePath = std::wstring();
212 auto size = MultiByteToWideChar(CP_UTF8, 0, inputBuffer.data(), inputBuffer.size(), nullptr, 0);
213 if (size <= 0) {
214 ec = std::error_code(GetLastError(), std::system_category());
215 return widePath;
216 }
217 // do the actual conversion
218 widePath.resize(static_cast<std::wstring::size_type>(size));
219 size = MultiByteToWideChar(CP_UTF8, 0, inputBuffer.data(), inputBuffer.size(), widePath.data(), size);
220 if (size <= 0) {
221 ec = std::error_code(GetLastError(), std::system_category());
222 widePath.clear();
223 }
224 return widePath;
225}
226
233WideStringData convertMultiByteToWide(std::error_code &ec, const char *inputBuffer, int inputBufferSize)
234{
235 // calculate required size
236 WideStringData widePath;
237 widePath.second = MultiByteToWideChar(CP_UTF8, 0, inputBuffer, inputBufferSize, nullptr, 0);
238 if (widePath.second <= 0) {
239 ec = std::error_code(GetLastError(), std::system_category());
240 return widePath;
241 }
242 // do the actual conversion
243 widePath.first = make_unique<wchar_t[]>(static_cast<size_t>(widePath.second));
244 widePath.second = MultiByteToWideChar(CP_UTF8, 0, inputBuffer, inputBufferSize, widePath.first.get(), widePath.second);
245 if (widePath.second <= 0) {
246 ec = std::error_code(GetLastError(), std::system_category());
247 widePath.first.reset();
248 }
249 return widePath;
250}
251
256WideStringData convertMultiByteToWide(std::error_code &ec, const std::string &inputBuffer)
257{
258 return convertMultiByteToWide(ec, inputBuffer.data(),
259 inputBuffer.size() < static_cast<std::size_t>(std::numeric_limits<int>::max() - 1) ? static_cast<int>(inputBuffer.size() + 1) : -1);
260}
261
268WideStringData convertMultiByteToWide(const char *inputBuffer, int inputBufferSize)
269{
270 std::error_code ec;
271 return convertMultiByteToWide(ec, inputBuffer, inputBufferSize);
272}
273
278WideStringData convertMultiByteToWide(const std::string &inputBuffer)
279{
280 std::error_code ec;
281 return convertMultiByteToWide(ec, inputBuffer);
282}
283#endif
284
289void truncateString(string &str, char terminationChar)
290{
291 string::size_type firstNullByte = str.find(terminationChar);
292 if (firstNullByte != string::npos) {
293 str.resize(firstNullByte);
294 }
295}
296
302string dataSizeToString(std::uint64_t sizeInByte, bool includeByte)
303{
304 stringstream res(stringstream::in | stringstream::out);
305 res.setf(ios::fixed, ios::floatfield);
306 res << setprecision(2);
307 if (sizeInByte < 1024LL) {
308 res << sizeInByte << " bytes";
309 } else if (sizeInByte < 1048576LL) {
310 res << (static_cast<double>(sizeInByte) / 1024.0) << " KiB";
311 } else if (sizeInByte < 1073741824LL) {
312 res << (static_cast<double>(sizeInByte) / 1048576.0) << " MiB";
313 } else if (sizeInByte < 1099511627776LL) {
314 res << (static_cast<double>(sizeInByte) / 1073741824.0) << " GiB";
315 } else {
316 res << (static_cast<double>(sizeInByte) / 1099511627776.0) << " TiB";
317 }
318 if (includeByte && sizeInByte > 1024LL) {
319 res << ' ' << '(' << sizeInByte << " byte)";
320 }
321 return res.str();
322}
323
334string bitrateToString(double bitrateInKbitsPerSecond, bool useIecBinaryPrefixes)
335{
336 stringstream res(stringstream::in | stringstream::out);
337 res << setprecision(3);
338 if (std::isnan(bitrateInKbitsPerSecond)) {
339 res << "indeterminable";
340 } else if (useIecBinaryPrefixes) {
341 if (bitrateInKbitsPerSecond < 8.0) {
342 res << (bitrateInKbitsPerSecond * 125.0) << " byte/s";
343 } else if (bitrateInKbitsPerSecond < 8000.0) {
344 res << (bitrateInKbitsPerSecond * 0.125) << " KiB/s";
345 } else if (bitrateInKbitsPerSecond < 8000000.0) {
346 res << (bitrateInKbitsPerSecond * 0.000125) << " MiB/s";
347 } else {
348 res << (bitrateInKbitsPerSecond * 0.000000125) << " GiB/s";
349 }
350 } else {
351 if (bitrateInKbitsPerSecond < 1.0) {
352 res << (bitrateInKbitsPerSecond * 1000.0) << " bit/s";
353 } else if (bitrateInKbitsPerSecond < 1000.0) {
354 res << (bitrateInKbitsPerSecond) << " kbit/s";
355 } else if (bitrateInKbitsPerSecond < 1000000.0) {
356 res << (bitrateInKbitsPerSecond * 0.001) << " Mbit/s";
357 } else {
358 res << (bitrateInKbitsPerSecond * 0.000001) << " Gbit/s";
359 }
360 }
361 return res.str();
362}
363
365const char *const base64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
366const char base64Pad = '=';
368
373string encodeBase64(const std::uint8_t *data, std::uint32_t dataSize)
374{
375 auto encoded = std::string();
376 auto mod = static_cast<std::uint8_t>(dataSize % 3);
377 auto temp = std::uint32_t();
378 encoded.reserve(((dataSize / 3) + (mod > 0)) * 4);
379 for (const std::uint8_t *end = --data + dataSize - mod; data != end;) {
380 temp = static_cast<std::uint32_t>(*++data << 16);
381 temp |= static_cast<std::uint32_t>(*++data << 8);
382 temp |= *++data;
383 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
384 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
385 encoded.push_back(base64Chars[(temp & 0x00000FC0) >> 6]);
386 encoded.push_back(base64Chars[(temp & 0x0000003F)]);
387 }
388 switch (mod) {
389 case 1:
390 temp = static_cast<std::uint32_t>(*++data << 16);
391 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
392 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
393 encoded.push_back(base64Pad);
394 encoded.push_back(base64Pad);
395 break;
396 case 2:
397 temp = static_cast<std::uint32_t>(*++data << 16);
398 temp |= static_cast<std::uint32_t>(*++data << 8);
399 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
400 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
401 encoded.push_back(base64Chars[(temp & 0x00000FC0) >> 6]);
402 encoded.push_back(base64Pad);
403 break;
404 }
405 return encoded;
406}
407
413pair<unique_ptr<std::uint8_t[]>, std::uint32_t> decodeBase64(const char *encodedStr, const std::uint32_t strSize)
414{
415 if (strSize % 4) {
416 throw ConversionException("invalid size of base64");
417 }
418 std::uint32_t decodedSize = (strSize / 4) * 3;
419 const char *const end = encodedStr + strSize;
420 if (strSize) {
421 if (*(end - 1) == base64Pad) {
422 --decodedSize;
423 }
424 if (*(end - 2) == base64Pad) {
425 --decodedSize;
426 }
427 }
428 auto buffer = make_unique<std::uint8_t[]>(decodedSize);
429 auto *iter = buffer.get() - 1;
430 while (encodedStr < end) {
431 std::int32_t temp = 0;
432 for (std::uint8_t quantumPos = 0; quantumPos < 4; ++quantumPos, ++encodedStr) {
433 temp <<= 6;
434 if (*encodedStr >= 'A' && *encodedStr <= 'Z') {
435 temp |= *encodedStr - 'A';
436 } else if (*encodedStr >= 'a' && *encodedStr <= 'z') {
437 temp |= *encodedStr - 'a' + 26;
438 } else if (*encodedStr >= '0' && *encodedStr <= '9') {
439 temp |= *encodedStr - '0' + 2 * 26;
440 } else if (*encodedStr == '+') {
441 temp |= 2 * 26 + 10;
442 } else if (*encodedStr == '/') {
443 temp |= 2 * 26 + 10 + 1;
444 } else if (*encodedStr == base64Pad) {
445 switch (end - encodedStr) {
446 case 1:
447 *++iter = static_cast<std::uint8_t>((temp >> 16) & 0xFF);
448 *++iter = static_cast<std::uint8_t>((temp >> 8) & 0xFF);
449 return make_pair(std::move(buffer), decodedSize);
450 case 2:
451 *++iter = static_cast<std::uint8_t>((temp >> 10) & 0xFF);
452 return make_pair(std::move(buffer), decodedSize);
453 default:
454 throw ConversionException("invalid padding in base64");
455 }
456 } else {
457 throw ConversionException("invalid character in base64");
458 }
459 }
460 *++iter = static_cast<std::uint8_t>((temp >> 16) & 0xFF);
461 *++iter = static_cast<std::uint8_t>((temp >> 8) & 0xFF);
462 *++iter = static_cast<std::uint8_t>(temp & 0xFF);
463 }
464 return make_pair(std::move(buffer), decodedSize);
465}
466} // namespace CppUtilities
The ConversionException class is thrown by the various conversion functions of this library when a co...
Contains all utilities provides by the c++utilities library.
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (big-endian).
CPP_UTILITIES_EXPORT StringData convertString(const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor=1.0f)
Converts the specified string from one character set to another.
CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified Latin-1 string to UTF-8.
CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (little-endian) string to UTF-8.
CPP_UTILITIES_EXPORT std::pair< std::unique_ptr< std::uint8_t[]>, std::uint32_t > decodeBase64(const char *encodedStr, const std::uint32_t strSize)
Decodes the specified Base64 encoded string.
CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar='\0')
Truncates all characters after the first occurrence of the specified terminationChar and the terminat...
std::pair< std::unique_ptr< char[], StringDataDeleter >, std::size_t > StringData
Type used to return string encoding conversion result.
CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (big-endian) string to UTF-8.
CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to Latin-1.
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (little-endian).
CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits=false)
Converts the specified bitrate in kbit/s to its equivalent std::string representation.
CPP_UTILITIES_EXPORT std::string encodeBase64(const std::uint8_t *data, std::uint32_t dataSize)
Encodes the specified data to Base64.
CPP_UTILITIES_EXPORT std::string dataSizeToString(std::uint64_t sizeInByte, bool includeByte=false)
Converts the specified data size in byte to its equivalent std::string representation.
STL namespace.
#define CPP_UTILITIES_THREAD_LOCAL