From ccf77959b4cbc31a389ed474f6feed34fefdd486 Mon Sep 17 00:00:00 2001 From: Martchus Date: Thu, 21 Jul 2022 23:30:45 +0200 Subject: [PATCH] Convert text values to a supported encoding if needed on the fly * This makes it harder to use the library wrongly and does not lead to worse performance as character set conversions are only done as needed. * That's actually already done by serializers for most tag formats. This change ensures serializers for Matroska and Vorbis tag fields do this as well. * Update documentation accordingly. --- matroska/matroskatagfield.cpp | 2 +- tagvalue.cpp | 20 ++++++++++++++++---- vorbis/vorbiscommentfield.cpp | 2 +- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/matroska/matroskatagfield.cpp b/matroska/matroskatagfield.cpp index 900a77b..33cff07 100644 --- a/matroska/matroskatagfield.cpp +++ b/matroska/matroskatagfield.cpp @@ -220,7 +220,7 @@ MatroskaTagFieldMaker::MatroskaTagFieldMaker(MatroskaTagField &field, Diagnostic if (m_field.value().type() == TagDataType::Popularity) { m_stringValue = m_field.value().toScaledPopularity(TagType::MatroskaTag).toString(); } else { - m_stringValue = m_field.value().toString(); + m_stringValue = m_field.value().toString(TagTextEncoding::Utf8); } } catch (const ConversionException &) { diag.emplace_back(DiagLevel::Warning, diff --git a/tagvalue.cpp b/tagvalue.cpp index 28faec6..ee95e12 100644 --- a/tagvalue.cpp +++ b/tagvalue.cpp @@ -87,10 +87,22 @@ pair encodingParameter(TagTextEncoding tagTextEncoding) * TagValue class take care of neccassary conversions, eg. TagValue::toInteger() will attempt to convert a * string to a number (an possibly throw a ConversionException on failure). * - * Values of the type TagDataType::Text can be differently encoded. See TagParser::TagTextEncoding for a - * list of supported encodings. Be sure to use an encoding which is supported by the tag implementation. - * To ensure that, the functions Tag::canEncodingBeUsed(), Tag::proposedTextEncoding() and - * Tag::ensureTextValuesAreProperlyEncoded() can be used. + * Values of the type TagDataType::Text can be differently encoded. + * - See TagParser::TagTextEncoding for a list of encodings supported by this library. + * - Tag formats usually only support a subset of these encodings. The serializers for the varoius tag + * formats provided by this library will keep the encoding if possible and otherwise convert the assigned + * text to an encoding supported by the tag format on the fly. Note that ID3v1 does not specify which + * encodings are supported (or unsupported) so the serializer will just write text data as-is. + * - The deserializers will store text data in the encoding that is used in the tag. + * - The functions Tag::canEncodingBeUsed() and Tag::proposedTextEncoding() can be used to check + * whether an encoding can be used by a certain tag format to avoid any unnecessary character set + * conversions. + * - There's also the function Tag::ensureTextValuesAreProperlyEncoded() which can be used to convert all + * text values currently assigned to a tag to the encoding which is deemed best for the current tag format. + * This function is a bit more agressive than the implict conversions, e.g. it ensures no UTF-16 encoded + * text ends up in ID3v1 tags. + * - If you want to use UTF-8 everywhere, simply always assign UTF-8 text and use + * TagValue::toString(TagTextEncoding::Utf8) when reading text. * * Values of the type TagDataType::Popularity might use different rating scales depending on the tag * format. diff --git a/vorbis/vorbiscommentfield.cpp b/vorbis/vorbiscommentfield.cpp index 0232f22..c68dba8 100644 --- a/vorbis/vorbiscommentfield.cpp +++ b/vorbis/vorbiscommentfield.cpp @@ -213,7 +213,7 @@ bool VorbisCommentField::make(BinaryWriter &writer, VorbisCommentFlags flags, Di valueString = value().toScaledPopularity(TagType::VorbisComment).toString(); } else { // make normal string value - valueString = value().toString(); + valueString = value().toString(TagTextEncoding::Utf8); } const auto size(valueString.size() + id().size() + 1); if (size > numeric_limits::max()) {