From 943123afa1b99e2a3598561d3cf93b9340f441dc Mon Sep 17 00:00:00 2001 From: Martchus Date: Sun, 1 Jul 2018 02:04:29 +0200 Subject: [PATCH] Warn about ID3v2 text frame with multiple strings First step to support multiple strings within ID3v2 text frame. See * https://github.com/Martchus/tagparser/issues/10 * https://github.com/Martchus/tageditor/issues/38 --- id3/id3v2frame.cpp | 203 +++++++++++++++++++++++----------- id3/id3v2frame.h | 2 +- scripts/download_testfiles.sh | 6 + tests/overall.h | 1 + tests/overallmp3.cpp | 62 +++++++++++ tests/testfilecheck.cpp | 1 + 6 files changed, 211 insertions(+), 64 deletions(-) diff --git a/id3/id3v2frame.cpp b/id3/id3v2frame.cpp index a553d77..e002e9c 100644 --- a/id3/id3v2frame.cpp +++ b/id3/id3v2frame.cpp @@ -102,6 +102,24 @@ template int parseGenreIndex(const stringtype &denotation) return index; } +/*! + * \brief Returns an std::string instance for the substring parsed using parseSubstring(). + */ +string stringFromSubstring(tuple substr) +{ + return string(get<0>(substr), get<1>(substr)); +} + +/*! + * \brief Returns an std::u16string instance for the substring parsed using parseSubstring(). + */ +u16string wideStringFromSubstring(tuple substr, TagTextEncoding encoding) +{ + u16string res(reinterpret_cast(get<0>(substr)), get<1>(substr) / 2); + TagValue::ensureHostByteOrder(res, encoding); + return res; +} + /*! * \brief Parses a frame from the stream read using the specified \a reader. * @@ -226,85 +244,148 @@ void Id3v2Frame::parse(BinaryReader &reader, uint32 version, uint32 maximalSize, reader.read(buffer.get(), m_dataSize); } - // -> get tag value depending of field type + // read tag value depending on frame ID/type if (Id3v2FrameIds::isTextFrame(id())) { - // frame contains text - TagTextEncoding dataEncoding = parseTextEncodingByte(static_cast(*buffer.get()), diag); // the first byte stores the encoding - if ((version >= 3 && (id() == Id3v2FrameIds::lTrackPosition || id() == Id3v2FrameIds::lDiskPosition)) - || (version < 3 && (id() == Id3v2FrameIds::sTrackPosition || id() == Id3v2FrameIds::sDiskPosition))) { - // the track number or the disk number frame - try { - if (characterSize(dataEncoding) > 1) { - value().assignPosition(PositionInSet(parseWideString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag))); + // parse text encoding byte + TagTextEncoding dataEncoding = parseTextEncodingByte(static_cast(*buffer.get()), diag); + + // parse string values (since ID3v2.4 a text frame may contain multiple strings) + std::vector additionalValues; + const char *currentOffset = buffer.get() + 1; + for (size_t currentIndex = 1; currentIndex < m_dataSize;) { + // determine the next substring + const auto substr(parseSubstring(currentOffset, m_dataSize - currentIndex, dataEncoding, false, diag)); + + // handle case when string is empty + if (!get<1>(substr)) { + if (currentIndex == 1) { + value().clearDataAndMetadata(); + } + currentIndex = static_cast(get<2>(substr) - buffer.get()); + currentOffset = get<2>(substr); + continue; + } + + // determine the TagValue instance to store the value + TagValue *const value = [&] { + if (this->value().isEmpty()) { + return &this->value(); + } + additionalValues.emplace_back(); + return &additionalValues.back(); + }(); + + // apply further parsing for some text frame types (eg. convert track number to PositionInSet) + if ((version >= 3 && (id() == Id3v2FrameIds::lTrackPosition || id() == Id3v2FrameIds::lDiskPosition)) + || (version < 3 && (id() == Id3v2FrameIds::sTrackPosition || id() == Id3v2FrameIds::sDiskPosition))) { + // parse the track number or the disk number frame + try { + if (characterSize(dataEncoding) > 1) { + value->assignPosition(PositionInSet(wideStringFromSubstring(substr, dataEncoding))); + } else { + value->assignPosition(PositionInSet(stringFromSubstring(substr))); + } + } catch (const ConversionException &) { + diag.emplace_back(DiagLevel::Warning, "The value of track/disk position frame is not numeric and will be ignored.", context); + } + + } else if ((version >= 3 && id() == Id3v2FrameIds::lLength) || (version < 3 && id() == Id3v2FrameIds::sLength)) { + // parse frame contains length + try { + const auto milliseconds = [&] { + if (dataEncoding == TagTextEncoding::Utf16BigEndian || dataEncoding == TagTextEncoding::Utf16LittleEndian) { + const auto parsedStringRef = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag); + const auto convertedStringData = dataEncoding == TagTextEncoding::Utf16BigEndian + ? convertUtf16BEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef)) + : convertUtf16LEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef)); + return string(convertedStringData.first.get(), convertedStringData.second); + } else { // Latin-1 or UTF-8 + return stringFromSubstring(substr); + } + }(); + value->assignTimeSpan(TimeSpan::fromMilliseconds(stringToNumber(milliseconds))); + } catch (const ConversionException &) { + diag.emplace_back(DiagLevel::Warning, "The value of the length frame is not numeric and will be ignored.", context); + } + + } else if ((version >= 3 && id() == Id3v2FrameIds::lGenre) || (version < 3 && id() == Id3v2FrameIds::sGenre)) { + // parse genre/content type + const auto genreIndex = [&] { + if (characterSize(dataEncoding) > 1) { + return parseGenreIndex(wideStringFromSubstring(substr, dataEncoding)); + } else { + return parseGenreIndex(stringFromSubstring(substr)); + } + }(); + if (genreIndex != -1) { + // genre is specified as ID3 genre number + value->assignStandardGenreIndex(genreIndex); } else { - value().assignPosition(PositionInSet(parseString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag))); + // genre is specified as string + value->assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding); } - } catch (const ConversionException &) { - diag.emplace_back(DiagLevel::Warning, "The value of track/disk position frame is not numeric and will be ignored.", context); + } else { + // store any other text frames as-is + value->assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding); } - } else if ((version >= 3 && id() == Id3v2FrameIds::lLength) || (version < 3 && id() == Id3v2FrameIds::sLength)) { - // frame contains length - try { - string milliseconds; - if (dataEncoding == TagTextEncoding::Utf16BigEndian || dataEncoding == TagTextEncoding::Utf16LittleEndian) { - const auto parsedStringRef = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag); - const auto convertedStringData = dataEncoding == TagTextEncoding::Utf16BigEndian - ? convertUtf16BEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef)) - : convertUtf16LEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef)); - milliseconds = string(convertedStringData.first.get(), convertedStringData.second); - } else { // Latin-1 or UTF-8 - milliseconds = parseString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag); - } - value().assignTimeSpan(TimeSpan::fromMilliseconds(stringToNumber(milliseconds))); - } catch (const ConversionException &) { - diag.emplace_back(DiagLevel::Warning, "The value of the length frame is not numeric and will be ignored.", context); - } + currentIndex = static_cast(get<2>(substr) - buffer.get()); + currentOffset = get<2>(substr); + } - } else if ((version >= 3 && id() == Id3v2FrameIds::lGenre) || (version < 3 && id() == Id3v2FrameIds::sGenre)) { - // genre/content type - int genreIndex; - if (characterSize(dataEncoding) > 1) { - const auto genreDenotation = parseWideString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag); - genreIndex = parseGenreIndex(genreDenotation); + // add warning about additional values + if (!additionalValues.empty()) { + // format list of values + const auto valuesString = [&]() -> string { + if (additionalValues.size() == 1) { + return argsToString("value \"", additionalValues.front().toString(TagTextEncoding::Utf8), "\" is ignored."); + } + string valuesString = "values"; + for (auto value = additionalValues.cbegin(), end = additionalValues.cend() - 1; value != end; ++value) { + valuesString += ' '; + valuesString += '\"'; + valuesString += value->toString(TagTextEncoding::Utf8); + valuesString += '\"'; + if (value != end) { + valuesString += ','; + } + } + valuesString += " and \""; + valuesString += additionalValues.back().toString(TagTextEncoding::Utf8); + valuesString += "\" are ignored."; + return valuesString; + }(); + + // emplace diag message + if (version < 4) { + diag.emplace_back( + DiagLevel::Warning, argsToString("Multiple strings found though the tag is pre-ID3v2.4. Additional ", valuesString), context); + additionalValues.clear(); } else { - const auto genreDenotation = parseString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag); - genreIndex = parseGenreIndex(genreDenotation); + diag.emplace_back(DiagLevel::Warning, + argsToString("Multiple strings found. This is not supported so far. Hence the additional ", valuesString), context); } - if (genreIndex != -1) { - // genre is specified as ID3 genre number - value().assignStandardGenreIndex(genreIndex); - } else { - // genre is specified as string - // string might be null terminated - const auto substr = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag); - value().assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding); - } - } else { - // any other text frame - const auto substr = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag); - value().assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding); } } else if (version >= 3 && id() == Id3v2FrameIds::lCover) { - // frame stores picture + // parse picture frame byte type; parsePicture(buffer.get(), m_dataSize, value(), type, diag); setTypeInfo(type); } else if (version < 3 && id() == Id3v2FrameIds::sCover) { - // frame stores legacy picutre + // parse legacy picutre byte type; parseLegacyPicture(buffer.get(), m_dataSize, value(), type, diag); setTypeInfo(type); } else if (((version >= 3 && id() == Id3v2FrameIds::lComment) || (version < 3 && id() == Id3v2FrameIds::sComment)) || ((version >= 3 && id() == Id3v2FrameIds::lUnsynchronizedLyrics) || (version < 3 && id() == Id3v2FrameIds::sUnsynchronizedLyrics))) { - // comment frame or unsynchronized lyrics frame (these two frame types have the same structure) + // parse comment frame or unsynchronized lyrics frame (these two frame types have the same structure) parseComment(buffer.get(), m_dataSize, value(), diag); } else { - // unknown frame + // parse unknown/unsupported frame value().assignData(buffer.get(), m_dataSize, TagDataType::Undefined); } } @@ -609,7 +690,7 @@ byte Id3v2Frame::makeTextEncodingByte(TagTextEncoding textEncoding) } /*! - * \brief Parses a substring in the specified \a buffer. + * \brief Parses a substring from the specified \a buffer. * * This method ensures that byte order marks and termination characters for the specified \a encoding are omitted. * It might add a waring if the substring is not terminated. @@ -691,18 +772,17 @@ tuple Id3v2Frame::parseSubstring( } /*! - * \brief Parses a substring in the specified \a buffer. + * \brief Parses a substring from the specified \a buffer. * * Same as Id3v2Frame::parseSubstring() but returns the substring as string object. */ string Id3v2Frame::parseString(const char *buffer, size_t dataSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag) { - const auto substr = parseSubstring(buffer, dataSize, encoding, addWarnings, diag); - return string(get<0>(substr), get<1>(substr)); + return stringFromSubstring(parseSubstring(buffer, dataSize, encoding, addWarnings, diag)); } /*! - * \brief Parses a substring in the specified \a buffer. + * \brief Parses a substring from the specified \a buffer. * * Same as Id3v2Frame::parseSubstring() but returns the substring as u16string object * @@ -710,10 +790,7 @@ string Id3v2Frame::parseString(const char *buffer, size_t dataSize, TagTextEncod */ u16string Id3v2Frame::parseWideString(const char *buffer, size_t dataSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag) { - const auto substr = parseSubstring(buffer, dataSize, encoding, addWarnings, diag); - u16string res(reinterpret_cast(get<0>(substr)), get<1>(substr) / 2); - TagValue::ensureHostByteOrder(res, encoding); - return res; + return wideStringFromSubstring(parseSubstring(buffer, dataSize, encoding, addWarnings, diag), encoding); } /*! diff --git a/id3/id3v2frame.h b/id3/id3v2frame.h index c0d0059..74a66ad 100644 --- a/id3/id3v2frame.h +++ b/id3/id3v2frame.h @@ -117,7 +117,7 @@ public: // parsing helper TagTextEncoding parseTextEncodingByte(byte textEncodingByte, Diagnostics &diag); - std::tuple parseSubstring( + std::tuple parseSubstring( const char *buffer, std::size_t maxSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag); std::string parseString(const char *buffer, std::size_t maxSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag); std::u16string parseWideString(const char *buffer, std::size_t dataSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag); diff --git a/scripts/download_testfiles.sh b/scripts/download_testfiles.sh index ee46600..ca52a65 100755 --- a/scripts/download_testfiles.sh +++ b/scripts/download_testfiles.sh @@ -102,6 +102,12 @@ download_custom \ 'MPlayer samples' \ 'wget -r -np -R index.html* http://samples.mplayerhq.hu/A-codecs/lossless' +mkdir -p misc && pushd misc +download_custom \ + 'ID3v2.4 with multiple strings' \ + 'wget https://trac.ffmpeg.org/raw-attachment/ticket/6949/multiple_id3v2_4_values.mp3' +popd + # convert FLAC files for FLAC tests with ffmpeg mkdir -p flac [[ ! -f flac/test.flac ]] && ffmpeg -i mtx-test-data/alac/othertest-itunes.m4a -c:a flac flac/test.flac # raw FLAC stream diff --git a/tests/overall.h b/tests/overall.h index 2e79218..9b1900e 100644 --- a/tests/overall.h +++ b/tests/overall.h @@ -89,6 +89,7 @@ private: void checkMp4Constraints(); void checkMp3Testfile1(); + void checkMp3Testfile2(); void checkMp3TestMetaData(); void checkMp3PaddingConstraints(); diff --git a/tests/overallmp3.cpp b/tests/overallmp3.cpp index 527e5f2..35c7162 100644 --- a/tests/overallmp3.cpp +++ b/tests/overallmp3.cpp @@ -81,6 +81,67 @@ void OverallTests::checkMp3Testfile1() CPPUNIT_ASSERT(m_diag.level() <= DiagLevel::Information); } +/*! + * \brief Checks "misc/multiple_id3v2_4_values.mp3" (from https://trac.ffmpeg.org/ticket/6949). + */ +void OverallTests::checkMp3Testfile2() +{ + CPPUNIT_ASSERT_EQUAL(ContainerFormat::MpegAudioFrames, m_fileInfo.containerFormat()); + const auto tracks = m_fileInfo.tracks(); + CPPUNIT_ASSERT_EQUAL(1_st, tracks.size()); + for (const auto &track : tracks) { + CPPUNIT_ASSERT_EQUAL(MediaType::Audio, track->mediaType()); + CPPUNIT_ASSERT_EQUAL(GeneralMediaFormat::Mpeg1Audio, track->format().general); + CPPUNIT_ASSERT_EQUAL(static_cast(SubFormats::Mpeg1Layer3), track->format().sub); + CPPUNIT_ASSERT_EQUAL(static_cast(2), track->channelCount()); + CPPUNIT_ASSERT_EQUAL(static_cast(MpegChannelMode::Stereo), track->channelConfig()); + CPPUNIT_ASSERT_EQUAL(44100u, track->samplingFrequency()); + CPPUNIT_ASSERT_EQUAL(20, track->duration().seconds()); + } + const auto tags = m_fileInfo.tags(); + switch (m_tagStatus) { + case TagStatus::Original: + CPPUNIT_ASSERT(!m_fileInfo.id3v1Tag()); + CPPUNIT_ASSERT_EQUAL(1_st, m_fileInfo.id3v2Tags().size()); + CPPUNIT_ASSERT_EQUAL(1_st, tags.size()); + for (const auto &tag : tags) { + switch (tag->type()) { + case TagType::Id3v1Tag: + CPPUNIT_FAIL("no ID3v1 tag expected"); + case TagType::Id3v2Tag: + CPPUNIT_ASSERT_EQUAL(TagTextEncoding::Utf8, tag->value(KnownField::Title).dataEncoding()); + CPPUNIT_ASSERT_EQUAL("Infinite (Original Mix)"s, tag->value(KnownField::Title).toString(TagTextEncoding::Utf8)); + CPPUNIT_ASSERT_EQUAL("B-Front"s, tag->value(KnownField::Artist).toString(TagTextEncoding::Utf8)); + CPPUNIT_ASSERT_EQUAL("Infinite"s, tag->value(KnownField::Album).toString(TagTextEncoding::Utf8)); + CPPUNIT_ASSERT_EQUAL("Hardstyle"s, tag->value(KnownField::Genre).toString(TagTextEncoding::Utf8)); + CPPUNIT_ASSERT_EQUAL("Lavf57.83.100"s, tag->value(KnownField::EncoderSettings).toString(TagTextEncoding::Utf8)); + CPPUNIT_ASSERT_EQUAL("Roughstate"s, tag->value(KnownField::RecordLabel).toString(TagTextEncoding::Utf8)); + CPPUNIT_ASSERT_EQUAL("2017"s, tag->value(KnownField::RecordDate).toString(TagTextEncoding::Utf8)); + CPPUNIT_ASSERT_EQUAL(1, tag->value(KnownField::TrackPosition).toPositionInSet().position()); + CPPUNIT_ASSERT(tag->value(KnownField::Length).toTimeSpan().isNull()); + CPPUNIT_ASSERT(tag->value(KnownField::Lyricist).isEmpty()); + break; + default:; + } + } + CPPUNIT_ASSERT_GREATEREQUAL(2_st, m_diag.size()); + CPPUNIT_ASSERT_EQUAL(DiagLevel::Warning, m_diag[0].level()); + CPPUNIT_ASSERT_EQUAL(DiagLevel::Warning, m_diag[1].level()); + CPPUNIT_ASSERT_EQUAL("parsing TCON frame"s, m_diag[1].context()); + CPPUNIT_ASSERT_EQUAL( + "Multiple strings found. This is not supported so far. Hence the additional values \"Test\", \"Example\", and \"Hard Dance\" are ignored."s, + m_diag[1].message()); + break; + case TagStatus::TestMetaDataPresent: + checkMp3TestMetaData(); + break; + case TagStatus::Removed: + CPPUNIT_ASSERT_EQUAL(0_st, tracks.size()); + } + + CPPUNIT_ASSERT(m_diag.level() <= DiagLevel::Warning); +} + /*! * \brief Checks whether test meta data for MP3 files has been applied correctly. */ @@ -213,6 +274,7 @@ void OverallTests::testMp3Parsing() m_fileInfo.setForceFullParse(false); m_tagStatus = TagStatus::Original; parseFile(TestUtilities::testFilePath("mtx-test-data/mp3/id3-tag-and-xing-header.mp3"), &OverallTests::checkMp3Testfile1); + parseFile(TestUtilities::testFilePath("misc/multiple_id3v2_4_values.mp3"), &OverallTests::checkMp3Testfile2); } #ifdef PLATFORM_UNIX diff --git a/tests/testfilecheck.cpp b/tests/testfilecheck.cpp index 5428e3b..86023a9 100644 --- a/tests/testfilecheck.cpp +++ b/tests/testfilecheck.cpp @@ -77,6 +77,7 @@ struct TestFile { { "mtx-test-data/mp4/dash/dragon-age-inquisition-H1LkM6IVlm4-video.mp4", { "864891f4510f3fa9c49c19e671171cec08ceb331362cf7161419b957be090d47" } }, { "mtx-test-data/ogg/qt4dance_medium.ogg", { "0b5429da9713be171c6ae0da69621261e8d5ddc9db3da872e5ade1a1c883decd" } }, { "mtx-test-data/opus/v-opus.ogg", { "e12adece4dbcccf2471b61c3ebd7c6576dee351d85809ab6f01d6f324d65b417" } }, + { "misc/multiple_id3v2_4_values.mp3", { "da012a41213cdc49b2afe1457625d8baced1a64e2351f17b520bf82c6bfe4e03" } }, }; /*!