#ifndef CONVERSION_UTILITIES_STRINGCONVERSION_H #define CONVERSION_UTILITIES_STRINGCONVERSION_H #include "./binaryconversion.h" #include "./conversionexception.h" #include "../misc/traits.h" #include #include #include #include #include #include #include #include #include #include #include #if __cplusplus >= 201709 && !defined(REFLECTIVE_RAPIDJSON_GENERATOR) #ifndef CPP_UTILITIES_USE_RANGES #define CPP_UTILITIES_USE_RANGES #endif #include #endif namespace CppUtilities { /*! * \brief The StringDataDeleter struct deletes the data of a StringData instance. */ struct CPP_UTILITIES_EXPORT StringDataDeleter { /*! * \brief Deletes the specified \a stringData with std::free(), because the memory has been * allocated using std::malloc()/std::realloc(). */ void operator()(char *stringData) { std::free(stringData); } }; /*! * \brief Type used to return string encoding conversion result. */ using StringData = std::pair, std::size_t>; //using StringData = std::pair, std::size_t>; // might work too CPP_UTILITIES_EXPORT StringData convertString( const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor = 1.0f); CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize); CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize); CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize); CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize); CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize); CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize); #ifdef PLATFORM_WINDOWS using WideStringData = std::pair, int>; CPP_UTILITIES_EXPORT WideStringData convertMultiByteToWide(std::error_code &ec, const char *inputBuffer, int inputBufferSize = -1); CPP_UTILITIES_EXPORT WideStringData convertMultiByteToWide(std::error_code &ec, const std::string &inputBuffer); CPP_UTILITIES_EXPORT WideStringData convertMultiByteToWide(const char *inputBuffer, int inputBufferSize = -1); CPP_UTILITIES_EXPORT WideStringData convertMultiByteToWide(const std::string &inputBuffer); #endif CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar = '\0'); /// \cond namespace Detail { #ifdef CPP_UTILITIES_USE_RANGES template using ContainerValueType = typename std::conditional_t, std::iterator_traits>>, Container>::value_type; #else template using ContainerValueType = typename Container::value_type; #endif template using DefaultReturnTypeForContainer = ContainerValueType; template using StringParamForContainer = std::basic_string_view::value_type>; } // namespace Detail /// \endcond /*! * \brief Joins the given \a strings using the specified \a delimiter. * * The strings will be enclosed using the provided closures \a leftClosure and \a rightClosure. * * \param strings The string parts to be joined. * \param delimiter Specifies a delimiter to be used (empty string by default). * \param omitEmpty Indicates whether empty part should be omitted. * \param leftClosure Specifies a string to be inserted before each string (empty string by default). * \param rightClosure Specifies a string to be appended after each string (empty string by default). * \tparam Container Container The STL-container used to provide the \a strings. * \tparam ReturnType Type to store the result; defaults to the container's element type. * \returns Returns the joined string. */ template , class ReturnType = Detail::DefaultReturnTypeForContainer> ReturnType joinStrings(const Container &strings, Detail::StringParamForContainer delimiter = Detail::StringParamForContainer(), bool omitEmpty = false, Detail::StringParamForContainer leftClosure = Detail::StringParamForContainer(), Detail::StringParamForContainer rightClosure = Detail::StringParamForContainer()) { ReturnType res; if (!strings.size()) { return res; } std::size_t entries = 0, size = 0; for (const auto &str : strings) { if (omitEmpty && str.empty()) { continue; } size += str.size(); ++entries; } if (!entries) { return res; } size += (entries * leftClosure.size()) + (entries * rightClosure.size()) + ((entries - 1) * delimiter.size()); res.reserve(size); for (const auto &str : strings) { if (omitEmpty && str.empty()) { continue; } if (!res.empty()) { res.append(delimiter); } res.append(leftClosure); res.append(str); res.append(rightClosure); } return res; } /*! * \brief Converts the specified \a arrayOfLines to a multiline string. */ template > inline auto toMultiline(const Container &arrayOfLines) { return joinStrings(arrayOfLines, "\n", false); } /*! * \brief Specifies the role of empty parts when splitting strings. */ enum class EmptyPartsTreat { Keep, /**< empty parts are kept */ Omit, /**< empty parts are omitted */ Merge /**< empty parts are omitted but cause the adjacent parts being joined using the delimiter */ }; /*! * \brief Splits the given \a string at the specified \a delimiter. * \param string The string to be split. * \param delimiter Specifies the delimiter. * \param emptyPartsRole Specifies the treatment of empty parts. * \param maxParts Specifies the maximal number of parts. Values less or equal zero indicate an unlimited number of parts. * \tparam Container The STL-container used to return the parts. * \returns Returns the parts. */ template > Container splitString(Detail::StringParamForContainer string, Detail::StringParamForContainer delimiter, EmptyPartsTreat emptyPartsRole = EmptyPartsTreat::Keep, int maxParts = -1) { --maxParts; Container res; typename Container::value_type *last = nullptr; bool merge = false; typename Container::value_type::size_type i = 0, end = string.size(); for (typename Container::value_type::size_type delimPos; i < end; i = delimPos + delimiter.size()) { delimPos = string.find(delimiter, i); if (!merge && maxParts >= 0 && res.size() == static_cast(maxParts)) { if (delimPos == i && emptyPartsRole == EmptyPartsTreat::Merge) { if (last) { merge = true; continue; } } delimPos = Container::value_type::npos; } if (delimPos == Container::value_type::npos) { delimPos = string.size(); } if (emptyPartsRole == EmptyPartsTreat::Keep || i != delimPos) { if (merge) { last->append(delimiter); last->append(string, i, delimPos - i); merge = false; } else { last = &res.emplace_back(string, i, delimPos - i); } } else if (emptyPartsRole == EmptyPartsTreat::Merge) { if (last) { merge = true; } } } if (i == end && emptyPartsRole == EmptyPartsTreat::Keep) { res.emplace_back(); } return res; } /*! * \brief Splits the given \a string (which might also be a string view) at the specified \a delimiter. * \param string The string to be split. * \param delimiter Specifies the delimiter. * \param maxParts Specifies the maximal number of parts. Values less or equal zero indicate an unlimited number of parts. * \tparam Container The STL-container used to return the parts. * \returns Returns the parts. * \remarks This is a simplified version of splitString() where emptyPartsRole is always EmptyPartsTreat::Keep. */ template > Container splitStringSimple( Detail::StringParamForContainer string, Detail::StringParamForContainer delimiter, int maxParts = -1) { --maxParts; Container res; typename Container::value_type::size_type i = 0, end = string.size(); for (typename Container::value_type::size_type delimPos; i < end; i = delimPos + delimiter.size()) { delimPos = string.find(delimiter, i); if (maxParts >= 0 && res.size() == static_cast(maxParts)) { delimPos = Container::value_type::npos; } if (delimPos == Container::value_type::npos) { delimPos = string.size(); } #if __cplusplus >= 201709 if constexpr (requires { res.emplace_back(string); }) { #endif res.emplace_back(string.data() + i, delimPos - i); #if __cplusplus >= 201709 } else { res.emplace(string.data() + i, delimPos - i); } #endif } if (i == end) { #if __cplusplus >= 201709 if constexpr (requires { res.emplace_back(); }) { #endif res.emplace_back(); #if __cplusplus >= 201709 } else { res.emplace(); } #endif } return res; } /*! * \brief Converts the specified \a multilineString to an array of lines. */ template > inline auto toArrayOfLines(const std::string &multilineString) { return splitString(multilineString, "\n", EmptyPartsTreat::Keep); } /*! * \brief Returns whether \a str starts with \a phrase. */ template bool startsWith(const StringType &str, const StringType &phrase) { if (str.size() < phrase.size()) { return false; } for (auto stri = str.cbegin(), strend = str.cend(), phrasei = phrase.cbegin(), phraseend = phrase.cend();; ++stri, ++phrasei) { if (phrasei == phraseend) { return true; } else if (stri == strend) { return false; } else if (*stri != *phrasei) { return false; } } return false; } /*! * \brief Returns whether \a str starts with \a phrase. */ template bool startsWith(const StringType &str, const typename StringType::value_type *phrase) { for (auto stri = str.cbegin(), strend = str.cend();; ++stri, ++phrase) { if (!*phrase) { return true; } else if (stri == strend) { return false; } else if (*stri != *phrase) { return false; } } return false; } /*! * \brief Returns whether \a str ends with \a phrase. */ template bool endsWith(const StringType &str, const StringType &phrase) { if (str.size() < phrase.size()) { return false; } for (auto stri = str.cend() - static_cast(phrase.size()), strend = str.cend(), phrasei = phrase.cbegin(); stri != strend; ++stri, ++phrasei) { if (*stri != *phrasei) { return false; } } return true; } /*! * \brief Returns whether \a str ends with \a phrase. */ template bool endsWith(const StringType &str, const typename StringType::value_type *phrase) { const auto phraseSize = std::strlen(phrase); if (str.size() < phraseSize) { return false; } for (auto stri = str.cend() - static_cast(phraseSize), strend = str.cend(); stri != strend; ++stri, ++phrase) { if (*stri != *phrase) { return false; } } return true; } /*! * \brief Returns whether \a str contains the specified \a substrings. * \remarks The \a substrings must occur in the specified order. */ template bool containsSubstrings(const StringType &str, std::initializer_list substrings) { typename StringType::size_type currentPos = 0; for (const auto &substr : substrings) { if ((currentPos = str.find(substr, currentPos)) == StringType::npos) { return false; } currentPos += substr.size(); } return true; } /*! * \brief Returns whether \a str contains the specified \a substrings. * \remarks The \a substrings must occur in the specified order. */ template bool containsSubstrings(const StringType &str, std::initializer_list substrings) { typename StringType::size_type currentPos = 0; for (const auto *substr : substrings) { if ((currentPos = str.find(substr, currentPos)) == StringType::npos) { return false; } currentPos += std::strlen(substr); } return true; } /*! * \brief Replaces all occurrences of \a find with \a relpace in the specified \a str. */ template void findAndReplace(StringType1 &str, const StringType2 &find, const StringType3 &replace) { for (typename StringType1::size_type i = 0; (i = str.find(find, i)) != StringType1::npos; i += replace.size()) { str.replace(i, find.size(), replace); } } /*! * \brief Replaces all occurrences of \a find with \a relpace in the specified \a str. */ template inline void findAndReplace(StringType &str, const typename StringType::value_type *find, const typename StringType::value_type *replace) { findAndReplace( str, std::basic_string_view(find), std::basic_string_view(replace)); } /*! * \brief Replaces all occurrences of \a find with \a relpace in the specified \a str. */ template inline void findAndReplace(StringType1 &str, const StringType2 &find, const typename StringType1::value_type *replace) { findAndReplace(str, find, std::basic_string_view(replace)); } /*! * \brief Replaces all occurrences of \a find with \a relpace in the specified \a str. */ template inline void findAndReplace(StringType1 &str, const typename StringType1::value_type *find, const StringType2 &replace) { findAndReplace(str, std::basic_string_view(find), replace); } /*! * \brief Returns the character representation of the specified \a digit. * \remarks * - Uses capital letters. * - Valid values for \a digit: 0 <= \a digit <= 35 */ template constexpr CharType digitToChar(CharType digit) { return digit <= 9 ? (digit + '0') : (digit + 'A' - 10); } /*! * \brief Converts the given \a number to its equivalent string representation using the specified \a base. * \tparam IntegralType The data type of the given number. * \tparam StringType The string type (should be an instantiation of the basic_string class template). * \sa stringToNumber() */ template , std::is_unsigned> * = nullptr> StringType numberToString(IntegralType number, BaseType base = 10) { std::size_t resSize = 0; for (auto n = number; n; n /= static_cast(base), ++resSize) ; StringType res; res.reserve(resSize); do { res.insert(res.begin(), digitToChar(static_cast(number % base))); number /= static_cast(base); } while (number); return res; } /*! * \brief Converts the given \a number to its equivalent string representation using the specified \a base. * \tparam IntegralType The data type of the given number. * \tparam StringType The string type (should be an instantiation of the basic_string class template). * \sa stringToNumber() */ template , std::is_signed> * = nullptr> StringType numberToString(IntegralType number, BaseType base = 10) { const bool negative = number < 0; std::size_t resSize; if (negative) { number = -number, resSize = 1; } else { resSize = 0; } for (auto n = number; n; n /= static_cast(base), ++resSize) ; StringType res; res.reserve(resSize); do { res.insert(res.begin(), digitToChar(static_cast(number % static_cast(base)))); number /= static_cast(base); } while (number); if (negative) { res.insert(res.begin(), '-'); } return res; } /*! * \brief Converts the given \a number to its equivalent string representation using the specified \a base. * \tparam FloatingType The data type of the given number. * \tparam StringType The string type (should be an instantiation of the basic_string class template). * \remarks This function is using std::basic_stringstream internally and hence also has its limitations (eg. regarding * \a base and types). * \sa stringToNumber(), bufferToNumber() */ template > * = nullptr> StringType numberToString(FloatingType number, int base = 10) { std::basic_stringstream ss; ss << std::setbase(base) << number; return ss.str(); } /*! * \brief Returns number/digit of the specified \a character representation using the specified \a base. * \throws A ConversionException will be thrown if the provided \a character does not represent a valid digit for the specified \a base. */ template CharType charToDigit(CharType character, CharType base) { CharType res = base; if (character >= '0' && character <= '9') { res = character - '0'; } else if (character >= 'a' && character <= 'z') { res = character - 'a' + 10; } else if (character >= 'A' && character <= 'Z') { res = character - 'A' + 10; } if (res < base) { return res; } std::string errorMsg; errorMsg.reserve(36); errorMsg += "The character \""; errorMsg += character >= ' ' && character <= '~' ? static_cast(character) : '?'; errorMsg += "\" is no valid digit."; throw ConversionException(std::move(errorMsg)); } /// \cond namespace Detail { template void raiseAndAdd(IntegralType &result, BaseType base, CharType character) { if (character == ' ') { return; } #ifdef __GNUC__ // overflow detection only supported on GCC and Clang if (__builtin_mul_overflow(result, base, &result) || __builtin_add_overflow(result, charToDigit(character, static_cast(base)), &result)) { throw ConversionException("Number exceeds limit."); } #else result *= static_cast(base); result += static_cast(charToDigit(character, static_cast(base))); #endif } } // namespace Detail /// \endcond /*! * \brief Converts the given \a string of \a size characters to an unsigned numeric value using the specified \a base. * \tparam IntegralType The data type used to store the converted value. * \tparam CharType The character type. * \throws A ConversionException will be thrown if the provided \a string is not a valid number. * \sa numberToString(), stringToNumber() */ template , std::is_unsigned> * = nullptr> IntegralType bufferToNumber(const CharType *string, std::size_t size, BaseType base = 10) { IntegralType result = 0; for (const CharType *end = string + size; string != end; ++string) { Detail::raiseAndAdd(result, base, *string); } return result; } /*! * \brief Converts the given \a string of \a size characters to a signed numeric value using the specified \a base. * \tparam IntegralType The data type used to store the converted value. * \tparam CharType The character type. * \throws A ConversionException will be thrown if the provided \a string is not a valid number. * \sa numberToString(), stringToNumber() */ template , std::is_signed> * = nullptr> IntegralType bufferToNumber(const CharType *string, std::size_t size, BaseType base = 10) { if (!size) { return 0; } const CharType *end = string + size; for (; string != end && *string == ' '; ++string) ; if (string == end) { return 0; } const bool negative = (*string == '-'); if (negative) { ++string; } IntegralType result = 0; for (; string != end; ++string) { Detail::raiseAndAdd(result, base, *string); } return negative ? -result : result; } /*! * \brief Converts the given \a string to an unsigned/signed number assuming \a string uses the specified \a base. * \tparam IntegralType The data type used to store the converted value. * \tparam StringType The string type (should be an instantiation of the basic_string class template). * \throws A ConversionException will be thrown if the provided \a string is not a valid number. * \sa numberToString(), bufferToNumber() */ template , Traits::Not>>> * = nullptr> IntegralType stringToNumber(const StringType &string, BaseType base = 10) { return bufferToNumber(string.data(), string.size(), base); } /*! * \brief Converts the given \a stringView to a number assuming \a stringView uses the specified \a base. * \tparam FloatingType The data type used to store the converted value. * \tparam StringViewType The string view type (must be an instantiation of the basic_string_view class template). * \throws A ConversionException will be thrown if the provided \a string is not a valid number. * \remarks This function is using std::basic_stringstream internally and hence also has its limitations (eg. regarding * \a base and types). * \sa numberToString(), bufferToNumber() */ template , Traits::IsSpecializationOf> * = nullptr> FloatingType stringToNumber(StringViewType stringView, int base = 10) { std::basic_stringstream ss; ss << std::setbase(base) << stringView; FloatingType result; if ((ss >> result) && ss.eof()) { return result; } std::string errorMsg; errorMsg.reserve(48 + stringView.size()); errorMsg += "The string \""; errorMsg += stringView; errorMsg += "\" is no valid floating point number."; throw ConversionException(errorMsg); } /*! * \brief Converts the given \a string to a number assuming \a string uses the specified \a base. * \tparam FloatingType The data type used to store the converted value. * \tparam StringType The string type (should be an instantiation of the basic_string class template). * \throws A ConversionException will be thrown if the provided \a string is not a valid number. * \remarks This function is using std::basic_stringstream internally and hence also has its limitations (eg. regarding * \a base and types). * \sa numberToString(), bufferToNumber() */ template , Traits::Not>>, Traits::Not>> * = nullptr> FloatingType stringToNumber(const StringType &string, int base = 10) { using StringViewType = std::basic_string_view; return stringToNumber(StringViewType(string.data(), string.size()), base); } /*! * \brief Converts the given null-terminated \a string to an unsigned numeric value using the specified \a base. * \tparam IntegralType The data type used to store the converted value. * \tparam CharType The character type. * \throws A ConversionException will be thrown if the provided \a string is not a valid number. * \sa numberToString(), bufferToNumber() */ template , std::is_unsigned> * = nullptr> IntegralType stringToNumber(const CharType *string, BaseType base = 10) { IntegralType result = 0; for (; *string; ++string) { Detail::raiseAndAdd(result, base, *string); } return result; } /*! * \brief Converts the given null-terminated \a string to a number assuming \a string uses the specified \a base. * \tparam FloatingType The data type used to store the converted value. * \tparam CharType The character type. * \throws A ConversionException will be thrown if the provided \a string is not a valid number. * \remarks This function is using std::basic_stringstream internally and hence also has its limitations (eg. regarding * \a base and types). * \sa numberToString(), bufferToNumber() */ template > * = nullptr> FloatingType stringToNumber(const CharType *string, int base = 10) { return stringToNumber>(string, base); } /*! * \brief Converts the given null-terminated \a string to a signed numeric value using the specified \a base. * \tparam IntegralType The data type used to store the converted value. * \tparam CharType The character type. * \throws A ConversionException will be thrown if the provided \a string is not a valid number. * \sa numberToString(), bufferToNumber() */ template , std::is_signed> * = nullptr> IntegralType stringToNumber(const CharType *string, IntegralType base = 10) { if (!*string) { return 0; } for (; *string && *string == ' '; ++string) ; if (!*string) { return 0; } const bool negative = (*string == '-'); if (negative) { ++string; } IntegralType result = 0; for (; *string; ++string) { Detail::raiseAndAdd(result, base, *string); } return negative ? -result : result; } /*! * \brief Interprets the given \a integer at the specified position as std::string using the specified byte order. * * Example: interpretation of ID3v2 frame IDs (stored as 32-bit integer) as string * - 0x54495432/1414091826 will be interpreted as "TIT2". * - 0x00545432/5526578 will be interpreted as "TT2" using start offset 1 to omit the first byte. * * \tparam T The data type of the integer to be interpreted. */ template std::string interpretIntegerAsString(T integer, int startOffset = 0) { char buffer[sizeof(T)]; BE::getBytes(integer, buffer); return std::string(buffer + startOffset, sizeof(T) - static_cast(startOffset)); } CPP_UTILITIES_EXPORT std::string dataSizeToString(std::uint64_t sizeInByte, bool includeByte = false); CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits = false); CPP_UTILITIES_EXPORT std::string encodeBase64(const std::uint8_t *data, std::uint32_t dataSize); CPP_UTILITIES_EXPORT std::pair, std::uint32_t> decodeBase64(const char *encodedStr, const std::uint32_t strSize); } // namespace CppUtilities #endif // CONVERSION_UTILITIES_STRINGCONVERSION_H