From f042d216fdb35753cca72ba58073379037af33f5 Mon Sep 17 00:00:00 2001 From: Martchus Date: Sat, 1 Jun 2019 22:36:08 +0200 Subject: [PATCH] Do not completely refuse on UTF-8 in ID3v1 since it is apparently used by some software. But * Write at least a BOM so it can be interpreted later correctly as UTF-8 * Print a warning * Keep proposing Latin-1 The tag editor should allow to configure which encoding is used and whether the BOM is used and which encoding is assumed when parsing a file. --- id3/id3v1tag.cpp | 80 +++++++++++++++++++++++++++++++++++--------- id3/id3v2tag.cpp | 10 ++++++ id3/id3v2tag.h | 1 + tests/overallmp3.cpp | 14 ++++++++ 4 files changed, 89 insertions(+), 16 deletions(-) diff --git a/id3/id3v1tag.cpp b/id3/id3v1tag.cpp index efc4117..450f0ca 100644 --- a/id3/id3v1tag.cpp +++ b/id3/id3v1tag.cpp @@ -5,8 +5,10 @@ #include "../exceptions.h" #include +#include #include +#include using namespace std; using namespace ConversionUtilities; @@ -37,7 +39,7 @@ const char *Id3v1Tag::typeName() const bool Id3v1Tag::canEncodingBeUsed(TagTextEncoding encoding) const { - return Tag::canEncodingBeUsed(encoding); + return encoding == TagTextEncoding::Latin1 || encoding == TagTextEncoding::Utf8; } /*! @@ -48,7 +50,7 @@ bool Id3v1Tag::canEncodingBeUsed(TagTextEncoding encoding) const */ void Id3v1Tag::parse(std::istream &stream, Diagnostics &diag) { - VAR_UNUSED(diag); + VAR_UNUSED(diag) char buffer[128]; stream.read(buffer, 128); if (buffer[0] != 0x54 || buffer[1] != 0x41 || buffer[2] != 0x47) { @@ -59,15 +61,16 @@ void Id3v1Tag::parse(std::istream &stream, Diagnostics &diag) readValue(m_artist, 30, buffer + 33); readValue(m_album, 30, buffer + 63); readValue(m_year, 4, buffer + 93); - if (buffer[125] == 0) { + const auto is11 = buffer[125] == 0; + if (is11) { readValue(m_comment, 28, buffer + 97); m_version = "1.1"; } else { readValue(m_comment, 30, buffer + 97); m_version = "1.0"; } - readValue(m_comment, buffer[125] == 0 ? 28 : 30, buffer + 97); - if (buffer[125] == 0) { + readValue(m_comment, is11 ? 28 : 30, buffer + 97); + if (is11) { m_trackPos.assignPosition(PositionInSet(*reinterpret_cast(buffer + 126), 0)); } m_genre.assignStandardGenreIndex(*reinterpret_cast(buffer + 127)); @@ -250,13 +253,17 @@ bool Id3v1Tag::supportsField(KnownField field) const void Id3v1Tag::ensureTextValuesAreProperlyEncoded() { - m_title.convertDataEncodingForTag(this); - m_artist.convertDataEncodingForTag(this); - m_album.convertDataEncodingForTag(this); - m_year.convertDataEncodingForTag(this); - m_comment.convertDataEncodingForTag(this); - m_trackPos.convertDataEncodingForTag(this); - m_genre.convertDataEncodingForTag(this); + for (auto *value : initializer_list{ &m_title, &m_artist, &m_album, &m_year, &m_comment, &m_trackPos, &m_genre }) { + // convert UTF-16 to UTF-8 + switch (value->dataEncoding()) { + case TagTextEncoding::Latin1: + case TagTextEncoding::Utf8: + case TagTextEncoding::Unspecified: + break; + default: + value->convertDataEncoding(TagTextEncoding::Utf8); + } + } } /*! @@ -265,11 +272,15 @@ void Id3v1Tag::ensureTextValuesAreProperlyEncoded() void Id3v1Tag::readValue(TagValue &value, size_t maxLength, const char *buffer) { const char *end = buffer + maxLength - 1; - while ((*end == 0x0 || *end == ' ') && end >= buffer) { + while ((*end == 0x0 || *end == ' ') && end > buffer) { --end; --maxLength; } - value.assignData(buffer, maxLength, TagDataType::Text, TagTextEncoding::Latin1); + if (maxLength >= 3 && ConversionUtilities::BE::toUInt24(buffer) == 0x00EFBBBF) { + value.assignData(buffer + 3, maxLength - 3, TagDataType::Text, TagTextEncoding::Utf8); + } else { + value.assignData(buffer, maxLength, TagDataType::Text, TagTextEncoding::Latin1); + } } /*! @@ -277,14 +288,51 @@ void Id3v1Tag::readValue(TagValue &value, size_t maxLength, const char *buffer) */ void Id3v1Tag::writeValue(const TagValue &value, size_t length, char *buffer, ostream &targetStream, Diagnostics &diag) { + // initialize buffer with zeroes memset(buffer, 0, length); + + // stringify value + string valueAsString; try { - value.toString().copy(buffer, length); + valueAsString = value.toString(); } catch (const ConversionException &) { diag.emplace_back( DiagLevel::Warning, "Field can not be set because given value can not be converted appropriately.", "making ID3v1 tag field"); } - targetStream.write(buffer, length); + + // handle encoding + auto *valueStart = buffer; + auto valueLength = length; + switch (value.dataEncoding()) { + case TagTextEncoding::Latin1: + case TagTextEncoding::Unspecified: + break; + case TagTextEncoding::Utf8: + // write + for (const auto c : valueAsString) { + if ((c & 0x80) == 0) { + continue; + } + buffer[0] = static_cast(0xEF); + buffer[1] = static_cast(0xBB); + buffer[2] = static_cast(0xBF); + valueStart += 3; + valueLength -= 3; + break; + } + FALLTHROUGH; + default: + diag.emplace_back(DiagLevel::Warning, "The used encoding is unlikely to be supported by other software.", "making ID3v1 tag field"); + } + + // copy the string + if (valueAsString.size() > length) { + diag.emplace_back( + DiagLevel::Warning, argsToString("Value has been truncated. Max. ", length, " characters supported."), "making ID3v1 tag field"); + } + valueAsString.copy(valueStart, valueLength); + + targetStream.write(buffer, static_cast(length)); } } // namespace TagParser diff --git a/id3/id3v2tag.cpp b/id3/id3v2tag.cpp index a0c9c7f..c7ccaf6 100644 --- a/id3/id3v2tag.cpp +++ b/id3/id3v2tag.cpp @@ -58,6 +58,16 @@ bool Id3v2Tag::supportsMultipleValues(KnownField field) const } } +void Id3v2Tag::ensureTextValuesAreProperlyEncoded() +{ + const auto encoding = proposedTextEncoding(); + for (auto &field : fields()) { + auto &value = field.second.value(); + value.convertDataEncoding(encoding); + value.convertDescriptionEncoding(encoding); + } +} + /*! * \brief Works like the default implementation but adds additional values as well. */ diff --git a/id3/id3v2tag.h b/id3/id3v2tag.h index 0631585..ca33278 100644 --- a/id3/id3v2tag.h +++ b/id3/id3v2tag.h @@ -72,6 +72,7 @@ public: bool supportsDescription(KnownField field) const override; bool supportsMimeType(KnownField field) const override; bool supportsMultipleValues(KnownField field) const override; + void ensureTextValuesAreProperlyEncoded() override; void parse(std::istream &sourceStream, const std::uint64_t maximalSize, Diagnostics &diag); Id3v2TagMaker prepareMaking(Diagnostics &diag); diff --git a/tests/overallmp3.cpp b/tests/overallmp3.cpp index ea4794b..d20ace8 100644 --- a/tests/overallmp3.cpp +++ b/tests/overallmp3.cpp @@ -80,6 +80,17 @@ void OverallTests::checkMp3Testfile1() CPPUNIT_ASSERT_EQUAL(0_st, tracks.size()); } + auto warningAboutEncoding = false; + for (auto &msg : m_diag) { + if (msg.message() == "The used encoding is unlikely to be supported by other software.") { + CPPUNIT_ASSERT_EQUAL(DiagLevel::Warning, msg.level()); + warningAboutEncoding = true; + msg = DiagMessage(DiagLevel::Information, string(), string()); + } + } + const auto encodingWarningExpected + = m_tagStatus == TagStatus::TestMetaDataPresent && (m_mode & Mp3TestFlags::Id3v1Only || m_mode & Mp3TestFlags::Id3v2AndId3v1); + CPPUNIT_ASSERT_EQUAL(encodingWarningExpected, warningAboutEncoding); CPPUNIT_ASSERT(m_diag.level() <= DiagLevel::Information); } @@ -325,6 +336,9 @@ void OverallTests::setMp3TestMetaData1() tag->setValue(KnownField::DiskPosition, m_testPosition); // TODO: set more fields } + if (id3v1Tag) { + id3v1Tag->ensureTextValuesAreProperlyEncoded(); + } } /*!