C++ Utilities 5.20.0
Useful C++ classes and routines such as argument parser, IO and conversion utilities
stringconversion.cpp
Go to the documentation of this file.
2
3#ifndef CPP_UTILITIES_NO_THREAD_LOCAL
4#include "../feature_detection/features.h"
5#else
6#define CPP_UTILITIES_THREAD_LOCAL
7#endif
8
9#include <cmath>
10#include <cstdlib>
11#include <iomanip>
12#include <limits>
13#include <memory>
14#include <sstream>
15
16#include <errno.h>
17#include <iconv.h>
18
19#ifdef PLATFORM_WINDOWS
20#include <windows.h>
21#endif
22
23using namespace std;
24
25namespace CppUtilities {
26
28
29struct Keep {
30 size_t operator()(size_t value)
31 {
32 return value;
33 }
34};
35struct Double {
36 size_t operator()(size_t value)
37 {
38 return value + value;
39 }
40};
41struct Half {
42 size_t operator()(size_t value)
43 {
44 return value / 2;
45 }
46};
47struct Factor {
48 Factor(float factor)
49 : factor(factor){};
50 size_t operator()(size_t value)
51 {
52 return static_cast<size_t>(static_cast<float>(value) * factor);
53 }
54 float factor;
55};
56
57template <class OutputSizeHint> class ConversionDescriptor {
58public:
59 ConversionDescriptor(const char *fromCharset, const char *toCharset)
60 : m_ptr(iconv_open(toCharset, fromCharset))
61 , m_outputSizeHint(OutputSizeHint())
62 {
63 if (m_ptr == reinterpret_cast<iconv_t>(-1)) {
64 throw ConversionException("Unable to allocate descriptor for character set conversion.");
65 }
66 }
67
68 ConversionDescriptor(const char *fromCharset, const char *toCharset, OutputSizeHint outputSizeHint)
69 : m_ptr(iconv_open(toCharset, fromCharset))
70 , m_outputSizeHint(outputSizeHint)
71 {
72 if (m_ptr == reinterpret_cast<iconv_t>(-1)) {
73 throw ConversionException("Unable to allocate descriptor for character set conversion.");
74 }
75 }
76
77 ~ConversionDescriptor()
78 {
79 iconv_close(m_ptr);
80 }
81
82public:
83 StringData convertString(const char *inputBuffer, size_t inputBufferSize)
84 {
85 // setup input and output buffer
86 size_t inputBytesLeft = inputBufferSize;
87 size_t outputSize = m_outputSizeHint(inputBufferSize);
88 size_t outputBytesLeft = outputSize;
89 char *outputBuffer = reinterpret_cast<char *>(malloc(outputSize));
90 size_t bytesWritten;
91
92 char *currentOutputOffset = outputBuffer;
93 for (;; currentOutputOffset = outputBuffer + bytesWritten) {
94 bytesWritten = iconv(m_ptr, const_cast<char **>(&inputBuffer), &inputBytesLeft, &currentOutputOffset, &outputBytesLeft);
95 if (bytesWritten == static_cast<size_t>(-1)) {
96 if (errno == EINVAL) {
97 // ignore incomplete multibyte sequence in the input
98 bytesWritten = static_cast<size_t>(currentOutputOffset - outputBuffer);
99 break;
100 } else if (errno == E2BIG) {
101 // output buffer has no more room for next converted character
102 bytesWritten = static_cast<size_t>(currentOutputOffset - outputBuffer);
103 outputBytesLeft = (outputSize += m_outputSizeHint(inputBytesLeft)) - bytesWritten;
104 outputBuffer = reinterpret_cast<char *>(realloc(outputBuffer, outputSize));
105 } else /*if(errno == EILSEQ)*/ {
106 // invalid multibyte sequence in the input
107 free(outputBuffer);
108 throw ConversionException("Invalid multibyte sequence in the input.");
109 }
110 } else {
111 // conversion completed without (further) errors
112 break;
113 }
114 }
115 return StringData(std::unique_ptr<char[], StringDataDeleter>(outputBuffer), currentOutputOffset - outputBuffer);
116 }
117
118private:
119 iconv_t m_ptr;
120 OutputSizeHint m_outputSizeHint;
121};
122
124
135 const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor)
136{
137 return ConversionDescriptor<Factor>(fromCharset, toCharset, outputBufferSizeFactor).convertString(inputBuffer, inputBufferSize);
138}
139
143StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
144{
145 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Double> descriptor("UTF-8", "UTF-16LE");
146 return descriptor.convertString(inputBuffer, inputBufferSize);
147}
148
152StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
153{
154 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Half> descriptor("UTF-16LE", "UTF-8");
155 return descriptor.convertString(inputBuffer, inputBufferSize);
156}
157
161StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
162{
163 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Double> descriptor("UTF-8", "UTF-16BE");
164 return descriptor.convertString(inputBuffer, inputBufferSize);
165}
166
170StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
171{
172 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Half> descriptor("UTF-16BE", "UTF-8");
173 return descriptor.convertString(inputBuffer, inputBufferSize);
174}
175
179StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
180{
181 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Keep> descriptor("ISO-8859-1", "UTF-8");
182 return descriptor.convertString(inputBuffer, inputBufferSize);
183}
184
188StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
189{
190 CPP_UTILITIES_THREAD_LOCAL ConversionDescriptor<Keep> descriptor("UTF-8", "ISO-8859-1");
191 return descriptor.convertString(inputBuffer, inputBufferSize);
192}
193
194#ifdef PLATFORM_WINDOWS
199std::wstring convertMultiByteToWide(std::error_code &ec, std::string_view inputBuffer)
200{
201 // calculate required size
202 auto widePath = std::wstring();
203 auto size = MultiByteToWideChar(CP_UTF8, 0, inputBuffer.data(), inputBuffer.size(), nullptr, 0);
204 if (size <= 0) {
205 ec = std::error_code(GetLastError(), std::system_category());
206 return widePath;
207 }
208 // do the actual conversion
209 widePath.resize(static_cast<std::wstring::size_type>(size));
210 size = MultiByteToWideChar(CP_UTF8, 0, inputBuffer.data(), inputBuffer.size(), widePath.data(), size);
211 if (size <= 0) {
212 ec = std::error_code(GetLastError(), std::system_category());
213 widePath.clear();
214 }
215 return widePath;
216}
217
224WideStringData convertMultiByteToWide(std::error_code &ec, const char *inputBuffer, int inputBufferSize)
225{
226 // calculate required size
227 WideStringData widePath;
228 widePath.second = MultiByteToWideChar(CP_UTF8, 0, inputBuffer, inputBufferSize, nullptr, 0);
229 if (widePath.second <= 0) {
230 ec = std::error_code(GetLastError(), std::system_category());
231 return widePath;
232 }
233 // do the actual conversion
234 widePath.first = make_unique<wchar_t[]>(static_cast<size_t>(widePath.second));
235 widePath.second = MultiByteToWideChar(CP_UTF8, 0, inputBuffer, inputBufferSize, widePath.first.get(), widePath.second);
236 if (widePath.second <= 0) {
237 ec = std::error_code(GetLastError(), std::system_category());
238 widePath.first.reset();
239 }
240 return widePath;
241}
242
247WideStringData convertMultiByteToWide(std::error_code &ec, const std::string &inputBuffer)
248{
249 return convertMultiByteToWide(
250 ec, inputBuffer.data(), inputBuffer.size() < (numeric_limits<int>::max() - 1) ? static_cast<int>(inputBuffer.size() + 1) : -1);
251}
252
259WideStringData convertMultiByteToWide(const char *inputBuffer, int inputBufferSize)
260{
261 std::error_code ec;
262 return convertMultiByteToWide(ec, inputBuffer, inputBufferSize);
263}
264
269WideStringData convertMultiByteToWide(const std::string &inputBuffer)
270{
271 std::error_code ec;
272 return convertMultiByteToWide(ec, inputBuffer);
273}
274#endif
275
280void truncateString(string &str, char terminationChar)
281{
282 string::size_type firstNullByte = str.find(terminationChar);
283 if (firstNullByte != string::npos) {
284 str.resize(firstNullByte);
285 }
286}
287
293string dataSizeToString(std::uint64_t sizeInByte, bool includeByte)
294{
295 stringstream res(stringstream::in | stringstream::out);
296 res.setf(ios::fixed, ios::floatfield);
297 res << setprecision(2);
298 if (sizeInByte < 1024LL) {
299 res << sizeInByte << " bytes";
300 } else if (sizeInByte < 1048576LL) {
301 res << (static_cast<double>(sizeInByte) / 1024.0) << " KiB";
302 } else if (sizeInByte < 1073741824LL) {
303 res << (static_cast<double>(sizeInByte) / 1048576.0) << " MiB";
304 } else if (sizeInByte < 1099511627776LL) {
305 res << (static_cast<double>(sizeInByte) / 1073741824.0) << " GiB";
306 } else {
307 res << (static_cast<double>(sizeInByte) / 1099511627776.0) << " TiB";
308 }
309 if (includeByte && sizeInByte > 1024LL) {
310 res << ' ' << '(' << sizeInByte << " byte)";
311 }
312 return res.str();
313}
314
325string bitrateToString(double bitrateInKbitsPerSecond, bool useIecBinaryPrefixes)
326{
327 stringstream res(stringstream::in | stringstream::out);
328 res << setprecision(3);
329 if (std::isnan(bitrateInKbitsPerSecond)) {
330 res << "indeterminable";
331 } else if (useIecBinaryPrefixes) {
332 if (bitrateInKbitsPerSecond < 8.0) {
333 res << (bitrateInKbitsPerSecond * 125.0) << " byte/s";
334 } else if (bitrateInKbitsPerSecond < 8000.0) {
335 res << (bitrateInKbitsPerSecond * 0.125) << " KiB/s";
336 } else if (bitrateInKbitsPerSecond < 8000000.0) {
337 res << (bitrateInKbitsPerSecond * 0.000125) << " MiB/s";
338 } else {
339 res << (bitrateInKbitsPerSecond * 0.000000125) << " GiB/s";
340 }
341 } else {
342 if (bitrateInKbitsPerSecond < 1.0) {
343 res << (bitrateInKbitsPerSecond * 1000.0) << " bit/s";
344 } else if (bitrateInKbitsPerSecond < 1000.0) {
345 res << (bitrateInKbitsPerSecond) << " kbit/s";
346 } else if (bitrateInKbitsPerSecond < 1000000.0) {
347 res << (bitrateInKbitsPerSecond * 0.001) << " Mbit/s";
348 } else {
349 res << (bitrateInKbitsPerSecond * 0.000001) << " Gbit/s";
350 }
351 }
352 return res.str();
353}
354
356const char *const base64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
357const char base64Pad = '=';
359
364string encodeBase64(const std::uint8_t *data, std::uint32_t dataSize)
365{
366 auto encoded = std::string();
367 auto mod = static_cast<std::uint8_t>(dataSize % 3);
368 auto temp = std::uint32_t();
369 encoded.reserve(((dataSize / 3) + (mod > 0)) * 4);
370 for (const std::uint8_t *end = --data + dataSize - mod; data != end;) {
371 temp = static_cast<std::uint32_t>(*++data << 16);
372 temp |= static_cast<std::uint32_t>(*++data << 8);
373 temp |= *++data;
374 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
375 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
376 encoded.push_back(base64Chars[(temp & 0x00000FC0) >> 6]);
377 encoded.push_back(base64Chars[(temp & 0x0000003F)]);
378 }
379 switch (mod) {
380 case 1:
381 temp = static_cast<std::uint32_t>(*++data << 16);
382 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
383 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
384 encoded.push_back(base64Pad);
385 encoded.push_back(base64Pad);
386 break;
387 case 2:
388 temp = static_cast<std::uint32_t>(*++data << 16);
389 temp |= static_cast<std::uint32_t>(*++data << 8);
390 encoded.push_back(base64Chars[(temp & 0x00FC0000) >> 18]);
391 encoded.push_back(base64Chars[(temp & 0x0003F000) >> 12]);
392 encoded.push_back(base64Chars[(temp & 0x00000FC0) >> 6]);
393 encoded.push_back(base64Pad);
394 break;
395 }
396 return encoded;
397}
398
404pair<unique_ptr<std::uint8_t[]>, std::uint32_t> decodeBase64(const char *encodedStr, const std::uint32_t strSize)
405{
406 if (strSize % 4) {
407 throw ConversionException("invalid size of base64");
408 }
409 std::uint32_t decodedSize = (strSize / 4) * 3;
410 const char *const end = encodedStr + strSize;
411 if (strSize) {
412 if (*(end - 1) == base64Pad) {
413 --decodedSize;
414 }
415 if (*(end - 2) == base64Pad) {
416 --decodedSize;
417 }
418 }
419 auto buffer = make_unique<std::uint8_t[]>(decodedSize);
420 auto *iter = buffer.get() - 1;
421 while (encodedStr < end) {
422 std::int32_t temp = 0;
423 for (std::uint8_t quantumPos = 0; quantumPos < 4; ++quantumPos, ++encodedStr) {
424 temp <<= 6;
425 if (*encodedStr >= 'A' && *encodedStr <= 'Z') {
426 temp |= *encodedStr - 'A';
427 } else if (*encodedStr >= 'a' && *encodedStr <= 'z') {
428 temp |= *encodedStr - 'a' + 26;
429 } else if (*encodedStr >= '0' && *encodedStr <= '9') {
430 temp |= *encodedStr - '0' + 2 * 26;
431 } else if (*encodedStr == '+') {
432 temp |= 2 * 26 + 10;
433 } else if (*encodedStr == '/') {
434 temp |= 2 * 26 + 10 + 1;
435 } else if (*encodedStr == base64Pad) {
436 switch (end - encodedStr) {
437 case 1:
438 *++iter = static_cast<std::uint8_t>((temp >> 16) & 0xFF);
439 *++iter = static_cast<std::uint8_t>((temp >> 8) & 0xFF);
440 return make_pair(move(buffer), decodedSize);
441 case 2:
442 *++iter = static_cast<std::uint8_t>((temp >> 10) & 0xFF);
443 return make_pair(move(buffer), decodedSize);
444 default:
445 throw ConversionException("invalid padding in base64");
446 }
447 } else {
448 throw ConversionException("invalid character in base64");
449 }
450 }
451 *++iter = static_cast<std::uint8_t>((temp >> 16) & 0xFF);
452 *++iter = static_cast<std::uint8_t>((temp >> 8) & 0xFF);
453 *++iter = static_cast<std::uint8_t>(temp & 0xFF);
454 }
455 return make_pair(move(buffer), decodedSize);
456}
457} // namespace CppUtilities
The ConversionException class is thrown by the various conversion functions of this library when a co...
Contains all utilities provides by the c++utilities library.
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (big-endian).
CPP_UTILITIES_EXPORT StringData convertString(const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor=1.0f)
Converts the specified string from one character set to another.
CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified Latin-1 string to UTF-8.
CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (little-endian) string to UTF-8.
CPP_UTILITIES_EXPORT std::pair< std::unique_ptr< std::uint8_t[]>, std::uint32_t > decodeBase64(const char *encodedStr, const std::uint32_t strSize)
Decodes the specified Base64 encoded string.
CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar='\0')
Truncates all characters after the first occurrence of the specified terminationChar and the terminat...
std::pair< std::unique_ptr< char[], StringDataDeleter >, std::size_t > StringData
Type used to return string encoding conversion result.
CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (big-endian) string to UTF-8.
constexpr T max(T first, T second)
Returns the greatest of the given items.
Definition: math.h:100
CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to Latin-1.
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (little-endian).
CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits=false)
Converts the specified bitrate in kbit/s to its equivalent std::string representation.
CPP_UTILITIES_EXPORT std::string encodeBase64(const std::uint8_t *data, std::uint32_t dataSize)
Encodes the specified data to Base64.
CPP_UTILITIES_EXPORT std::string dataSizeToString(std::uint64_t sizeInByte, bool includeByte=false)
Converts the specified data size in byte to its equivalent std::string representation.
STL namespace.