Normalize known Matroska tag fields so they can be recognized despite different casing

* Do this by default with an opt-out; changing only known fields should not
  be very intrusive
* Fix recognizing known fields when only the case differs, see
  https://github.com/Martchus/tageditor/issues/72
This commit is contained in:
Martchus 2021-08-25 18:53:20 +02:00
parent dd787e2f2a
commit 14b4185023
8 changed files with 88 additions and 6 deletions

View File

@ -38,6 +38,7 @@ public:
TagField(const IdentifierType &id, const TagValue &value);
~TagField();
IdentifierType &id();
const IdentifierType &id() const;
std::string idToString() const;
void setId(const IdentifierType &id);
@ -110,6 +111,14 @@ template <class ImplementationType> TagField<ImplementationType>::~TagField()
{
}
/*!
* \brief Returns the id of the current TagField.
*/
template <class ImplementationType> inline typename TagField<ImplementationType>::IdentifierType &TagField<ImplementationType>::id()
{
return m_id;
}
/*!
* \brief Returns the id of the current TagField.
*/

View File

@ -663,7 +663,11 @@ void MatroskaContainer::internalParseTags(Diagnostics &diag, AbortableProgressFe
CPP_UTILITIES_UNUSED(progress)
static const string context("parsing tags of Matroska container");
for (EbmlElement *element : m_tagsElements) {
auto flags = MatroskaTagFlags::None;
if (fileInfo().fileHandlingFlags() & MediaFileHandlingFlags::NormalizeKnownTagFieldIds) {
flags += MatroskaTagFlags::NormalizeKnownFieldIds;
}
for (EbmlElement *const element : m_tagsElements) {
try {
element->parse(diag);
for (EbmlElement *subElement = element->firstChild(); subElement; subElement = subElement->nextSibling()) {
@ -672,7 +676,7 @@ void MatroskaContainer::internalParseTags(Diagnostics &diag, AbortableProgressFe
case MatroskaIds::Tag:
m_tags.emplace_back(make_unique<MatroskaTag>());
try {
m_tags.back()->parse(*subElement, diag);
m_tags.back()->parse2(*subElement, flags, diag);
} catch (const NoDataFoundException &) {
m_tags.pop_back();
} catch (const Failure &) {

View File

@ -108,6 +108,18 @@ KnownField MatroskaTag::internallyGetKnownField(const IdentifierType &id) const
* error occurs.
*/
void MatroskaTag::parse(EbmlElement &tagElement, Diagnostics &diag)
{
parse2(tagElement, MatroskaTagFlags::None, diag);
}
/*!
* \brief Parses tag information from the specified \a tagElement.
*
* \throws Throws std::ios_base::failure when an IO error occurs.
* \throws Throws TagParser::Failure or a derived exception when a parsing
* error occurs.
*/
void MatroskaTag::parse2(EbmlElement &tagElement, MatroskaTagFlags flags, Diagnostics &diag)
{
static const string context("parsing Matroska tag");
m_size = tagElement.totalSize();
@ -117,15 +129,24 @@ void MatroskaTag::parse(EbmlElement &tagElement, Diagnostics &diag)
diag.emplace_back(DiagLevel::Critical, "Matroska tag is too big.", context);
throw NotImplementedException();
}
const auto normalize = flags & MatroskaTagFlags::NormalizeKnownFieldIds;
for (EbmlElement *child = tagElement.firstChild(); child; child = child->nextSibling()) {
child->parse(diag);
switch (child->id()) {
case MatroskaIds::SimpleTag:
try {
MatroskaTagField field;
auto field = MatroskaTagField();
field.reparse(*child, diag, true);
fields().emplace(field.id(), move(field));
if (normalize) {
auto normalizedId = field.id();
MatroskaTagField::normalizeId(normalizedId);
if (internallyGetKnownField(normalizedId) != KnownField::Invalid) {
field.id() = std::move(normalizedId);
}
}
fields().emplace(field.id(), std::move(field));
} catch (const Failure &) {
// message will be added to diag anyways
}
break;
case MatroskaIds::Targets:

View File

@ -11,6 +11,20 @@ namespace TagParser {
class EbmlElement;
class MatroskaTag;
/*!
* \brief The MatroskaTagFlags enum specifies flags which controls parsing and making of Matroska tags.
*/
enum class MatroskaTagFlags : std::uint64_t {
None = 0x0, /**< Regular parsing/making. */
NormalizeKnownFieldIds = 0x1, /**< Normalize known field IDs when parsing. */
};
} // namespace TagParser
CPP_UTILITIES_MARK_FLAG_ENUM_CLASS(TagParser, TagParser::MatroskaTagFlags)
namespace TagParser {
class TAG_PARSER_EXPORT MatroskaTagMaker {
friend class MatroskaTag;
@ -70,6 +84,7 @@ public:
TagTargetLevel targetLevel() const override;
void parse(EbmlElement &tagElement, Diagnostics &diag);
void parse2(EbmlElement &tagElement, MatroskaTagFlags flags, Diagnostics &diag);
MatroskaTagMaker prepareMaking(Diagnostics &diag);
void make(std::ostream &stream, Diagnostics &diag);

View File

@ -176,6 +176,19 @@ void MatroskaTagField::make(ostream &stream, Diagnostics &diag)
prepareMaking(diag).make(stream);
}
/*!
* \brief Ensures the specified \a id is upper-case as recommended by the Matroska spec.
* \sa https://matroska.org/technical/tagging.html#tag-formatting
*/
void MatroskaTagField::normalizeId(std::string &id)
{
for (auto &c : id) {
if (c >= 'a' && c <= 'z') {
c -= 'a' - 'A';
}
}
}
/*!
* \class TagParser::MatroskaTagFieldMaker
* \brief The MatroskaTagFieldMaker class helps making tag fields.

View File

@ -79,6 +79,7 @@ public:
static typename std::string fieldIdFromString(std::string_view idString);
static std::string fieldIdToString(const std::string &id);
static void normalizeId(std::string &id);
};
/*!

View File

@ -90,8 +90,8 @@ MediaFileInfo::MediaFileInfo(std::string &&path)
, m_preferredPadding(0)
, m_tagPosition(ElementPosition::BeforeData)
, m_indexPosition(ElementPosition::BeforeData)
, m_fileHandlingFlags(
MediaFileHandlingFlags::ForceRewrite | MediaFileHandlingFlags::ForceTagPosition | MediaFileHandlingFlags::ForceIndexPosition)
, m_fileHandlingFlags(MediaFileHandlingFlags::ForceRewrite | MediaFileHandlingFlags::ForceTagPosition | MediaFileHandlingFlags::ForceIndexPosition
| MediaFileHandlingFlags::NormalizeKnownTagFieldIds)
{
}

View File

@ -62,6 +62,7 @@ enum class MediaFileHandlingFlags : std::uint64_t {
ForceRewrite = (1 << 1), /**< enforces a re-write of the file when applying changes */
ForceTagPosition = (1 << 2), /**< enforces the tag position when applying changes, see remarks of MediaFileInfo::setTagPosition() */
ForceIndexPosition = (1 << 3), /**< enforces the index position when applying changes, see remarks of MediaFileInfo::setIndexPosition() */
NormalizeKnownTagFieldIds = (1 << 4), /**< normalizes known tag field IDs when parsing to match the tag specification's recommendations */
};
} // namespace TagParser
@ -162,6 +163,8 @@ public:
void setSaveFilePath(std::string &&saveFilePath);
const std::string &writingApplication() const;
void setWritingApplication(std::string_view writingApplication);
MediaFileHandlingFlags fileHandlingFlags();
void setFileHandlingFlags(MediaFileHandlingFlags flags);
bool isForcingFullParse() const;
void setForceFullParse(bool forceFullParse);
bool isForcingRewrite() const;
@ -472,6 +475,22 @@ inline AbstractContainer *MediaFileInfo::container() const
return m_container.get();
}
/*!
* \brief Returns the currently configured file handling flags.
*/
inline MediaFileHandlingFlags MediaFileInfo::fileHandlingFlags()
{
return m_fileHandlingFlags;
}
/*!
* \brief Replaces all currently configured file handling flags with the specified \a flags.
*/
inline void MediaFileInfo::setFileHandlingFlags(MediaFileHandlingFlags flags)
{
m_fileHandlingFlags = flags;
}
/*!
* \brief Returns an indication whether forcing a full parse is enabled.
*