Warn about ID3v2 text frame with multiple strings

First step to support multiple strings within ID3v2 text
frame.

See
* https://github.com/Martchus/tagparser/issues/10
* https://github.com/Martchus/tageditor/issues/38
This commit is contained in:
Martchus 2018-07-01 02:04:29 +02:00
parent 262b823030
commit 943123afa1
6 changed files with 211 additions and 64 deletions

View File

@ -102,6 +102,24 @@ template <class stringtype> int parseGenreIndex(const stringtype &denotation)
return index;
}
/*!
* \brief Returns an std::string instance for the substring parsed using parseSubstring().
*/
string stringFromSubstring(tuple<const char *, size_t, const char *> substr)
{
return string(get<0>(substr), get<1>(substr));
}
/*!
* \brief Returns an std::u16string instance for the substring parsed using parseSubstring().
*/
u16string wideStringFromSubstring(tuple<const char *, size_t, const char *> substr, TagTextEncoding encoding)
{
u16string res(reinterpret_cast<u16string::const_pointer>(get<0>(substr)), get<1>(substr) / 2);
TagValue::ensureHostByteOrder(res, encoding);
return res;
}
/*!
* \brief Parses a frame from the stream read using the specified \a reader.
*
@ -226,85 +244,148 @@ void Id3v2Frame::parse(BinaryReader &reader, uint32 version, uint32 maximalSize,
reader.read(buffer.get(), m_dataSize);
}
// -> get tag value depending of field type
// read tag value depending on frame ID/type
if (Id3v2FrameIds::isTextFrame(id())) {
// frame contains text
TagTextEncoding dataEncoding = parseTextEncodingByte(static_cast<byte>(*buffer.get()), diag); // the first byte stores the encoding
if ((version >= 3 && (id() == Id3v2FrameIds::lTrackPosition || id() == Id3v2FrameIds::lDiskPosition))
|| (version < 3 && (id() == Id3v2FrameIds::sTrackPosition || id() == Id3v2FrameIds::sDiskPosition))) {
// the track number or the disk number frame
try {
if (characterSize(dataEncoding) > 1) {
value().assignPosition(PositionInSet(parseWideString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag)));
// parse text encoding byte
TagTextEncoding dataEncoding = parseTextEncodingByte(static_cast<byte>(*buffer.get()), diag);
// parse string values (since ID3v2.4 a text frame may contain multiple strings)
std::vector<TagValue> additionalValues;
const char *currentOffset = buffer.get() + 1;
for (size_t currentIndex = 1; currentIndex < m_dataSize;) {
// determine the next substring
const auto substr(parseSubstring(currentOffset, m_dataSize - currentIndex, dataEncoding, false, diag));
// handle case when string is empty
if (!get<1>(substr)) {
if (currentIndex == 1) {
value().clearDataAndMetadata();
}
currentIndex = static_cast<size_t>(get<2>(substr) - buffer.get());
currentOffset = get<2>(substr);
continue;
}
// determine the TagValue instance to store the value
TagValue *const value = [&] {
if (this->value().isEmpty()) {
return &this->value();
}
additionalValues.emplace_back();
return &additionalValues.back();
}();
// apply further parsing for some text frame types (eg. convert track number to PositionInSet)
if ((version >= 3 && (id() == Id3v2FrameIds::lTrackPosition || id() == Id3v2FrameIds::lDiskPosition))
|| (version < 3 && (id() == Id3v2FrameIds::sTrackPosition || id() == Id3v2FrameIds::sDiskPosition))) {
// parse the track number or the disk number frame
try {
if (characterSize(dataEncoding) > 1) {
value->assignPosition(PositionInSet(wideStringFromSubstring(substr, dataEncoding)));
} else {
value->assignPosition(PositionInSet(stringFromSubstring(substr)));
}
} catch (const ConversionException &) {
diag.emplace_back(DiagLevel::Warning, "The value of track/disk position frame is not numeric and will be ignored.", context);
}
} else if ((version >= 3 && id() == Id3v2FrameIds::lLength) || (version < 3 && id() == Id3v2FrameIds::sLength)) {
// parse frame contains length
try {
const auto milliseconds = [&] {
if (dataEncoding == TagTextEncoding::Utf16BigEndian || dataEncoding == TagTextEncoding::Utf16LittleEndian) {
const auto parsedStringRef = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
const auto convertedStringData = dataEncoding == TagTextEncoding::Utf16BigEndian
? convertUtf16BEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef))
: convertUtf16LEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef));
return string(convertedStringData.first.get(), convertedStringData.second);
} else { // Latin-1 or UTF-8
return stringFromSubstring(substr);
}
}();
value->assignTimeSpan(TimeSpan::fromMilliseconds(stringToNumber<double>(milliseconds)));
} catch (const ConversionException &) {
diag.emplace_back(DiagLevel::Warning, "The value of the length frame is not numeric and will be ignored.", context);
}
} else if ((version >= 3 && id() == Id3v2FrameIds::lGenre) || (version < 3 && id() == Id3v2FrameIds::sGenre)) {
// parse genre/content type
const auto genreIndex = [&] {
if (characterSize(dataEncoding) > 1) {
return parseGenreIndex(wideStringFromSubstring(substr, dataEncoding));
} else {
return parseGenreIndex(stringFromSubstring(substr));
}
}();
if (genreIndex != -1) {
// genre is specified as ID3 genre number
value->assignStandardGenreIndex(genreIndex);
} else {
value().assignPosition(PositionInSet(parseString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag)));
// genre is specified as string
value->assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding);
}
} catch (const ConversionException &) {
diag.emplace_back(DiagLevel::Warning, "The value of track/disk position frame is not numeric and will be ignored.", context);
} else {
// store any other text frames as-is
value->assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding);
}
} else if ((version >= 3 && id() == Id3v2FrameIds::lLength) || (version < 3 && id() == Id3v2FrameIds::sLength)) {
// frame contains length
try {
string milliseconds;
if (dataEncoding == TagTextEncoding::Utf16BigEndian || dataEncoding == TagTextEncoding::Utf16LittleEndian) {
const auto parsedStringRef = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
const auto convertedStringData = dataEncoding == TagTextEncoding::Utf16BigEndian
? convertUtf16BEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef))
: convertUtf16LEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef));
milliseconds = string(convertedStringData.first.get(), convertedStringData.second);
} else { // Latin-1 or UTF-8
milliseconds = parseString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
}
value().assignTimeSpan(TimeSpan::fromMilliseconds(stringToNumber<double>(milliseconds)));
} catch (const ConversionException &) {
diag.emplace_back(DiagLevel::Warning, "The value of the length frame is not numeric and will be ignored.", context);
}
currentIndex = static_cast<size_t>(get<2>(substr) - buffer.get());
currentOffset = get<2>(substr);
}
} else if ((version >= 3 && id() == Id3v2FrameIds::lGenre) || (version < 3 && id() == Id3v2FrameIds::sGenre)) {
// genre/content type
int genreIndex;
if (characterSize(dataEncoding) > 1) {
const auto genreDenotation = parseWideString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
genreIndex = parseGenreIndex(genreDenotation);
// add warning about additional values
if (!additionalValues.empty()) {
// format list of values
const auto valuesString = [&]() -> string {
if (additionalValues.size() == 1) {
return argsToString("value \"", additionalValues.front().toString(TagTextEncoding::Utf8), "\" is ignored.");
}
string valuesString = "values";
for (auto value = additionalValues.cbegin(), end = additionalValues.cend() - 1; value != end; ++value) {
valuesString += ' ';
valuesString += '\"';
valuesString += value->toString(TagTextEncoding::Utf8);
valuesString += '\"';
if (value != end) {
valuesString += ',';
}
}
valuesString += " and \"";
valuesString += additionalValues.back().toString(TagTextEncoding::Utf8);
valuesString += "\" are ignored.";
return valuesString;
}();
// emplace diag message
if (version < 4) {
diag.emplace_back(
DiagLevel::Warning, argsToString("Multiple strings found though the tag is pre-ID3v2.4. Additional ", valuesString), context);
additionalValues.clear();
} else {
const auto genreDenotation = parseString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
genreIndex = parseGenreIndex(genreDenotation);
diag.emplace_back(DiagLevel::Warning,
argsToString("Multiple strings found. This is not supported so far. Hence the additional ", valuesString), context);
}
if (genreIndex != -1) {
// genre is specified as ID3 genre number
value().assignStandardGenreIndex(genreIndex);
} else {
// genre is specified as string
// string might be null terminated
const auto substr = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
value().assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding);
}
} else {
// any other text frame
const auto substr = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
value().assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding);
}
} else if (version >= 3 && id() == Id3v2FrameIds::lCover) {
// frame stores picture
// parse picture frame
byte type;
parsePicture(buffer.get(), m_dataSize, value(), type, diag);
setTypeInfo(type);
} else if (version < 3 && id() == Id3v2FrameIds::sCover) {
// frame stores legacy picutre
// parse legacy picutre
byte type;
parseLegacyPicture(buffer.get(), m_dataSize, value(), type, diag);
setTypeInfo(type);
} else if (((version >= 3 && id() == Id3v2FrameIds::lComment) || (version < 3 && id() == Id3v2FrameIds::sComment))
|| ((version >= 3 && id() == Id3v2FrameIds::lUnsynchronizedLyrics) || (version < 3 && id() == Id3v2FrameIds::sUnsynchronizedLyrics))) {
// comment frame or unsynchronized lyrics frame (these two frame types have the same structure)
// parse comment frame or unsynchronized lyrics frame (these two frame types have the same structure)
parseComment(buffer.get(), m_dataSize, value(), diag);
} else {
// unknown frame
// parse unknown/unsupported frame
value().assignData(buffer.get(), m_dataSize, TagDataType::Undefined);
}
}
@ -609,7 +690,7 @@ byte Id3v2Frame::makeTextEncodingByte(TagTextEncoding textEncoding)
}
/*!
* \brief Parses a substring in the specified \a buffer.
* \brief Parses a substring from the specified \a buffer.
*
* This method ensures that byte order marks and termination characters for the specified \a encoding are omitted.
* It might add a waring if the substring is not terminated.
@ -691,18 +772,17 @@ tuple<const char *, size_t, const char *> Id3v2Frame::parseSubstring(
}
/*!
* \brief Parses a substring in the specified \a buffer.
* \brief Parses a substring from the specified \a buffer.
*
* Same as Id3v2Frame::parseSubstring() but returns the substring as string object.
*/
string Id3v2Frame::parseString(const char *buffer, size_t dataSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag)
{
const auto substr = parseSubstring(buffer, dataSize, encoding, addWarnings, diag);
return string(get<0>(substr), get<1>(substr));
return stringFromSubstring(parseSubstring(buffer, dataSize, encoding, addWarnings, diag));
}
/*!
* \brief Parses a substring in the specified \a buffer.
* \brief Parses a substring from the specified \a buffer.
*
* Same as Id3v2Frame::parseSubstring() but returns the substring as u16string object
*
@ -710,10 +790,7 @@ string Id3v2Frame::parseString(const char *buffer, size_t dataSize, TagTextEncod
*/
u16string Id3v2Frame::parseWideString(const char *buffer, size_t dataSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag)
{
const auto substr = parseSubstring(buffer, dataSize, encoding, addWarnings, diag);
u16string res(reinterpret_cast<u16string::const_pointer>(get<0>(substr)), get<1>(substr) / 2);
TagValue::ensureHostByteOrder(res, encoding);
return res;
return wideStringFromSubstring(parseSubstring(buffer, dataSize, encoding, addWarnings, diag), encoding);
}
/*!

View File

@ -117,7 +117,7 @@ public:
// parsing helper
TagTextEncoding parseTextEncodingByte(byte textEncodingByte, Diagnostics &diag);
std::tuple<const char *, size_t, const char *> parseSubstring(
std::tuple<const char *, std::size_t, const char *> parseSubstring(
const char *buffer, std::size_t maxSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag);
std::string parseString(const char *buffer, std::size_t maxSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag);
std::u16string parseWideString(const char *buffer, std::size_t dataSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag);

View File

@ -102,6 +102,12 @@ download_custom \
'MPlayer samples' \
'wget -r -np -R index.html* http://samples.mplayerhq.hu/A-codecs/lossless'
mkdir -p misc && pushd misc
download_custom \
'ID3v2.4 with multiple strings' \
'wget https://trac.ffmpeg.org/raw-attachment/ticket/6949/multiple_id3v2_4_values.mp3'
popd
# convert FLAC files for FLAC tests with ffmpeg
mkdir -p flac
[[ ! -f flac/test.flac ]] && ffmpeg -i mtx-test-data/alac/othertest-itunes.m4a -c:a flac flac/test.flac # raw FLAC stream

View File

@ -89,6 +89,7 @@ private:
void checkMp4Constraints();
void checkMp3Testfile1();
void checkMp3Testfile2();
void checkMp3TestMetaData();
void checkMp3PaddingConstraints();

View File

@ -81,6 +81,67 @@ void OverallTests::checkMp3Testfile1()
CPPUNIT_ASSERT(m_diag.level() <= DiagLevel::Information);
}
/*!
* \brief Checks "misc/multiple_id3v2_4_values.mp3" (from https://trac.ffmpeg.org/ticket/6949).
*/
void OverallTests::checkMp3Testfile2()
{
CPPUNIT_ASSERT_EQUAL(ContainerFormat::MpegAudioFrames, m_fileInfo.containerFormat());
const auto tracks = m_fileInfo.tracks();
CPPUNIT_ASSERT_EQUAL(1_st, tracks.size());
for (const auto &track : tracks) {
CPPUNIT_ASSERT_EQUAL(MediaType::Audio, track->mediaType());
CPPUNIT_ASSERT_EQUAL(GeneralMediaFormat::Mpeg1Audio, track->format().general);
CPPUNIT_ASSERT_EQUAL(static_cast<unsigned char>(SubFormats::Mpeg1Layer3), track->format().sub);
CPPUNIT_ASSERT_EQUAL(static_cast<uint16>(2), track->channelCount());
CPPUNIT_ASSERT_EQUAL(static_cast<byte>(MpegChannelMode::Stereo), track->channelConfig());
CPPUNIT_ASSERT_EQUAL(44100u, track->samplingFrequency());
CPPUNIT_ASSERT_EQUAL(20, track->duration().seconds());
}
const auto tags = m_fileInfo.tags();
switch (m_tagStatus) {
case TagStatus::Original:
CPPUNIT_ASSERT(!m_fileInfo.id3v1Tag());
CPPUNIT_ASSERT_EQUAL(1_st, m_fileInfo.id3v2Tags().size());
CPPUNIT_ASSERT_EQUAL(1_st, tags.size());
for (const auto &tag : tags) {
switch (tag->type()) {
case TagType::Id3v1Tag:
CPPUNIT_FAIL("no ID3v1 tag expected");
case TagType::Id3v2Tag:
CPPUNIT_ASSERT_EQUAL(TagTextEncoding::Utf8, tag->value(KnownField::Title).dataEncoding());
CPPUNIT_ASSERT_EQUAL("Infinite (Original Mix)"s, tag->value(KnownField::Title).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL("B-Front"s, tag->value(KnownField::Artist).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL("Infinite"s, tag->value(KnownField::Album).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL("Hardstyle"s, tag->value(KnownField::Genre).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL("Lavf57.83.100"s, tag->value(KnownField::EncoderSettings).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL("Roughstate"s, tag->value(KnownField::RecordLabel).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL("2017"s, tag->value(KnownField::RecordDate).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL(1, tag->value(KnownField::TrackPosition).toPositionInSet().position());
CPPUNIT_ASSERT(tag->value(KnownField::Length).toTimeSpan().isNull());
CPPUNIT_ASSERT(tag->value(KnownField::Lyricist).isEmpty());
break;
default:;
}
}
CPPUNIT_ASSERT_GREATEREQUAL(2_st, m_diag.size());
CPPUNIT_ASSERT_EQUAL(DiagLevel::Warning, m_diag[0].level());
CPPUNIT_ASSERT_EQUAL(DiagLevel::Warning, m_diag[1].level());
CPPUNIT_ASSERT_EQUAL("parsing TCON frame"s, m_diag[1].context());
CPPUNIT_ASSERT_EQUAL(
"Multiple strings found. This is not supported so far. Hence the additional values \"Test\", \"Example\", and \"Hard Dance\" are ignored."s,
m_diag[1].message());
break;
case TagStatus::TestMetaDataPresent:
checkMp3TestMetaData();
break;
case TagStatus::Removed:
CPPUNIT_ASSERT_EQUAL(0_st, tracks.size());
}
CPPUNIT_ASSERT(m_diag.level() <= DiagLevel::Warning);
}
/*!
* \brief Checks whether test meta data for MP3 files has been applied correctly.
*/
@ -213,6 +274,7 @@ void OverallTests::testMp3Parsing()
m_fileInfo.setForceFullParse(false);
m_tagStatus = TagStatus::Original;
parseFile(TestUtilities::testFilePath("mtx-test-data/mp3/id3-tag-and-xing-header.mp3"), &OverallTests::checkMp3Testfile1);
parseFile(TestUtilities::testFilePath("misc/multiple_id3v2_4_values.mp3"), &OverallTests::checkMp3Testfile2);
}
#ifdef PLATFORM_UNIX

View File

@ -77,6 +77,7 @@ struct TestFile {
{ "mtx-test-data/mp4/dash/dragon-age-inquisition-H1LkM6IVlm4-video.mp4", { "864891f4510f3fa9c49c19e671171cec08ceb331362cf7161419b957be090d47" } },
{ "mtx-test-data/ogg/qt4dance_medium.ogg", { "0b5429da9713be171c6ae0da69621261e8d5ddc9db3da872e5ade1a1c883decd" } },
{ "mtx-test-data/opus/v-opus.ogg", { "e12adece4dbcccf2471b61c3ebd7c6576dee351d85809ab6f01d6f324d65b417" } },
{ "misc/multiple_id3v2_4_values.mp3", { "da012a41213cdc49b2afe1457625d8baced1a64e2351f17b520bf82c6bfe4e03" } },
};
/*!