From 480857b1b63a233d8a77112ad92eb48c5618115d Mon Sep 17 00:00:00 2001 From: Martchus Date: Mon, 17 Jun 2019 19:11:00 +0200 Subject: [PATCH] Parse "SeekHead" elements referenced by "Seek" elements Follow at least one level of indirection by default --- matroska/matroskacontainer.cpp | 8 ++--- matroska/matroskaseekinfo.cpp | 63 ++++++++++++++++++++++++++-------- matroska/matroskaseekinfo.h | 16 +++++---- 3 files changed, 60 insertions(+), 27 deletions(-) diff --git a/matroska/matroskacontainer.cpp b/matroska/matroskacontainer.cpp index b149be5..450a991 100644 --- a/matroska/matroskacontainer.cpp +++ b/matroska/matroskacontainer.cpp @@ -500,8 +500,8 @@ void MatroskaContainer::internalParseHeader(Diagnostics &diag) } break; case MatroskaIds::Cluster: - // cluster reached - // stop here if all relevant information has been gathered + // stop as soon as the first cluster has been reached if all relevant information has been gathered + // -> take elements from seek tables within this segment into account for (auto i = m_seekInfos.cbegin() + seekInfosIndex, end = m_seekInfos.cend(); i != end; ++i, ++seekInfosIndex) { for (const auto &infoPair : (*i)->info()) { std::uint64_t offset = currentOffset + topLevelElement->dataOffset() + infoPair.second; @@ -559,9 +559,7 @@ void MatroskaContainer::internalParseHeader(Diagnostics &diag) } } } - // not checking if m_tagsElements is empty avoids long parsing times when loading big files - // but also has the disadvantage that the parser relies on the presence of a SeekHead element - // (which is not mandatory) to detect tags at the end of the segment + // -> stop if tracks and tags have been found or the file exceeds the max. size to fully process if (((!m_tracksElements.empty() && !m_tagsElements.empty()) || fileInfo().size() > m_maxFullParseSize) && !m_segmentInfoElements.empty()) { goto finish; diff --git a/matroska/matroskaseekinfo.cpp b/matroska/matroskaseekinfo.cpp index 2e6b919..e9602df 100644 --- a/matroska/matroskaseekinfo.cpp +++ b/matroska/matroskaseekinfo.cpp @@ -36,25 +36,30 @@ void MatroskaSeekInfo::shift(std::uint64_t start, std::int64_t amount) } /*! - * \brief Parses the specified \a seekHeadElement. + * \brief Parses the specified \a seekHeadElement and populates info() with the gathered information. * \throws Throws ios_base::failure when an IO error occurs. * \throws Throws Failure or a derived exception when a parsing error occurs. - * \remarks The object does not take ownership over the specified \a seekHeadElement. + * \remarks + * - The object does not take ownership over the specified \a seekHeadElement. + * - Possibly previously parsed info() is not cleared. So subsequent calls can be used to gather seek + * information from multiple seek head elements. Use clear() manually if that is not wanted. + * - If the specified \a seekHeadElement references another seek head element the referenced seek head + * element is parsed as well. One can set \a maxNesting to 0 to prevent that or even increase the value + * to allow following references even more deeply. References to elements which have already been visited + * are never followed, though. */ -void MatroskaSeekInfo::parse(EbmlElement *seekHeadElement, Diagnostics &diag) +void MatroskaSeekInfo::parse(EbmlElement *seekHeadElement, Diagnostics &diag, size_t maxNesting) { static const string context("parsing \"SeekHead\"-element"); - m_seekHeadElement = seekHeadElement; - m_info.clear(); - EbmlElement *seekElement = seekHeadElement->firstChild(); - EbmlElement *seekElementChild, *seekIdElement, *seekPositionElement; - while (seekElement) { + + m_seekHeadElements.emplace_back(seekHeadElement); + + for (EbmlElement *seekElement = seekHeadElement->firstChild(), *seekIdElement, *seekPositionElement; seekElement; seekElement = seekElement->nextSibling()) { seekElement->parse(diag); switch (seekElement->id()) { case MatroskaIds::Seek: - seekElementChild = seekElement->firstChild(); seekIdElement = seekPositionElement = nullptr; - while (seekElementChild) { + for (auto *seekElementChild = seekElement->firstChild(); seekElementChild; seekElementChild = seekElementChild->nextSibling()) { seekElementChild->parse(diag); switch (seekElementChild->id()) { case MatroskaIds::SeekID: @@ -80,13 +85,42 @@ void MatroskaSeekInfo::parse(EbmlElement *seekHeadElement, Diagnostics &diag) + "\" within the \"Seek\" element is not a \"SeekID\"-element nor a \"SeekPosition\"-element and will be ignored.", context); } - seekElementChild = seekElementChild->nextSibling(); } - if (seekIdElement && seekPositionElement) { - m_info.emplace_back(seekIdElement->readUInteger(), seekPositionElement->readUInteger()); - } else { + + if (!seekIdElement || !seekPositionElement) { diag.emplace_back(DiagLevel::Warning, "The \"Seek\"-element does not contain a \"SeekID\"- and a \"SeekPosition\"-element.", context); + break; } + + m_info.emplace_back(seekIdElement->readUInteger(), seekPositionElement->readUInteger()); + + // follow possibly referenced seek head element + if (m_info.back().first == MatroskaIds::SeekHead) { + const auto startOffset = m_info.back().second; + if (!maxNesting) { + diag.emplace_back(DiagLevel::Warning, + argsToString("Not following reference by \"Seek\" element at ", seekElement->startOffset(), " contains to another \"SeekHead\" element at ", startOffset, '.'), + context); + break; + } + + auto visited = false; + for (const auto *const visitedSeekHeadElement : m_seekHeadElements) { + if (visitedSeekHeadElement->startOffset() == startOffset) { + diag.emplace_back(DiagLevel::Warning, + argsToString("The \"Seek\" element at ", seekElement->startOffset(), " contains a loop to the \"SeekHead\" element at ", visitedSeekHeadElement->startOffset(), '.'), + context); + visited = true; + break; + } + } + if (visited) { + break; + } + m_additionalSeekHeadElements.emplace_back(make_unique(seekHeadElement->container(), startOffset)); + parse(m_additionalSeekHeadElements.back().get(), diag, maxNesting - 1); + } + break; case EbmlIds::Crc32: case EbmlIds::Void: @@ -95,7 +129,6 @@ void MatroskaSeekInfo::parse(EbmlElement *seekHeadElement, Diagnostics &diag) diag.emplace_back( DiagLevel::Warning, "The element " % seekElement->idToString() + " is not a seek element and will be ignored.", context); } - seekElement = seekElement->nextSibling(); } if (m_info.empty()) { diag.emplace_back(DiagLevel::Warning, "No seek information found.", context); diff --git a/matroska/matroskaseekinfo.h b/matroska/matroskaseekinfo.h index fe4dc7d..6cef8ac 100644 --- a/matroska/matroskaseekinfo.h +++ b/matroska/matroskaseekinfo.h @@ -11,11 +11,11 @@ class TAG_PARSER_EXPORT MatroskaSeekInfo { public: MatroskaSeekInfo(); - EbmlElement *seekHeadElement() const; + const std::vector &seekHeadElements() const; const std::vector> &info() const; std::vector> &info(); void shift(std::uint64_t start, std::int64_t amount); - void parse(EbmlElement *seekHeadElement, Diagnostics &diag); + void parse(EbmlElement *seekHeadElements, Diagnostics &diag, std::size_t maxNesting = 1); void make(std::ostream &stream, Diagnostics &diag); std::uint64_t minSize() const; std::uint64_t maxSize() const; @@ -30,7 +30,8 @@ public: static bool updateSeekInfo(std::vector &newSeekInfos, std::uint64_t oldOffset, std::uint64_t newOffset); private: - EbmlElement *m_seekHeadElement; + std::vector m_seekHeadElements; + std::vector> m_additionalSeekHeadElements; std::vector> m_info; }; @@ -38,16 +39,17 @@ private: * \brief Constructs a new MatroskaSeekInfo. */ inline MatroskaSeekInfo::MatroskaSeekInfo() - : m_seekHeadElement(nullptr) { } /*! - * \brief Returns a pointer to the \a seekHeadElement specified when the parse() method was called. + * \brief Returns a pointer to the seek head elements the seek information is composed of. + * \remarks This list is initially empty. When calling parse() it is at least populated with the specified seek head element (ownership remains + * by the caller). In case that seek table references another seek table those elements are also returned (the MatroskaSeekInfo has ownership). */ -inline EbmlElement *MatroskaSeekInfo::seekHeadElement() const +inline const std::vector &MatroskaSeekInfo::seekHeadElements() const { - return m_seekHeadElement; + return m_seekHeadElements; } /*!