Parse "SeekHead" elements referenced by "Seek" elements
Follow at least one level of indirection by default
This commit is contained in:
parent
0c2056c2f9
commit
480857b1b6
|
@ -500,8 +500,8 @@ void MatroskaContainer::internalParseHeader(Diagnostics &diag)
|
|||
}
|
||||
break;
|
||||
case MatroskaIds::Cluster:
|
||||
// cluster reached
|
||||
// stop here if all relevant information has been gathered
|
||||
// stop as soon as the first cluster has been reached if all relevant information has been gathered
|
||||
// -> take elements from seek tables within this segment into account
|
||||
for (auto i = m_seekInfos.cbegin() + seekInfosIndex, end = m_seekInfos.cend(); i != end; ++i, ++seekInfosIndex) {
|
||||
for (const auto &infoPair : (*i)->info()) {
|
||||
std::uint64_t offset = currentOffset + topLevelElement->dataOffset() + infoPair.second;
|
||||
|
@ -559,9 +559,7 @@ void MatroskaContainer::internalParseHeader(Diagnostics &diag)
|
|||
}
|
||||
}
|
||||
}
|
||||
// not checking if m_tagsElements is empty avoids long parsing times when loading big files
|
||||
// but also has the disadvantage that the parser relies on the presence of a SeekHead element
|
||||
// (which is not mandatory) to detect tags at the end of the segment
|
||||
// -> stop if tracks and tags have been found or the file exceeds the max. size to fully process
|
||||
if (((!m_tracksElements.empty() && !m_tagsElements.empty()) || fileInfo().size() > m_maxFullParseSize)
|
||||
&& !m_segmentInfoElements.empty()) {
|
||||
goto finish;
|
||||
|
|
|
@ -36,25 +36,30 @@ void MatroskaSeekInfo::shift(std::uint64_t start, std::int64_t amount)
|
|||
}
|
||||
|
||||
/*!
|
||||
* \brief Parses the specified \a seekHeadElement.
|
||||
* \brief Parses the specified \a seekHeadElement and populates info() with the gathered information.
|
||||
* \throws Throws ios_base::failure when an IO error occurs.
|
||||
* \throws Throws Failure or a derived exception when a parsing error occurs.
|
||||
* \remarks The object does not take ownership over the specified \a seekHeadElement.
|
||||
* \remarks
|
||||
* - The object does not take ownership over the specified \a seekHeadElement.
|
||||
* - Possibly previously parsed info() is not cleared. So subsequent calls can be used to gather seek
|
||||
* information from multiple seek head elements. Use clear() manually if that is not wanted.
|
||||
* - If the specified \a seekHeadElement references another seek head element the referenced seek head
|
||||
* element is parsed as well. One can set \a maxNesting to 0 to prevent that or even increase the value
|
||||
* to allow following references even more deeply. References to elements which have already been visited
|
||||
* are never followed, though.
|
||||
*/
|
||||
void MatroskaSeekInfo::parse(EbmlElement *seekHeadElement, Diagnostics &diag)
|
||||
void MatroskaSeekInfo::parse(EbmlElement *seekHeadElement, Diagnostics &diag, size_t maxNesting)
|
||||
{
|
||||
static const string context("parsing \"SeekHead\"-element");
|
||||
m_seekHeadElement = seekHeadElement;
|
||||
m_info.clear();
|
||||
EbmlElement *seekElement = seekHeadElement->firstChild();
|
||||
EbmlElement *seekElementChild, *seekIdElement, *seekPositionElement;
|
||||
while (seekElement) {
|
||||
|
||||
m_seekHeadElements.emplace_back(seekHeadElement);
|
||||
|
||||
for (EbmlElement *seekElement = seekHeadElement->firstChild(), *seekIdElement, *seekPositionElement; seekElement; seekElement = seekElement->nextSibling()) {
|
||||
seekElement->parse(diag);
|
||||
switch (seekElement->id()) {
|
||||
case MatroskaIds::Seek:
|
||||
seekElementChild = seekElement->firstChild();
|
||||
seekIdElement = seekPositionElement = nullptr;
|
||||
while (seekElementChild) {
|
||||
for (auto *seekElementChild = seekElement->firstChild(); seekElementChild; seekElementChild = seekElementChild->nextSibling()) {
|
||||
seekElementChild->parse(diag);
|
||||
switch (seekElementChild->id()) {
|
||||
case MatroskaIds::SeekID:
|
||||
|
@ -80,13 +85,42 @@ void MatroskaSeekInfo::parse(EbmlElement *seekHeadElement, Diagnostics &diag)
|
|||
+ "\" within the \"Seek\" element is not a \"SeekID\"-element nor a \"SeekPosition\"-element and will be ignored.",
|
||||
context);
|
||||
}
|
||||
seekElementChild = seekElementChild->nextSibling();
|
||||
}
|
||||
if (seekIdElement && seekPositionElement) {
|
||||
m_info.emplace_back(seekIdElement->readUInteger(), seekPositionElement->readUInteger());
|
||||
} else {
|
||||
|
||||
if (!seekIdElement || !seekPositionElement) {
|
||||
diag.emplace_back(DiagLevel::Warning, "The \"Seek\"-element does not contain a \"SeekID\"- and a \"SeekPosition\"-element.", context);
|
||||
break;
|
||||
}
|
||||
|
||||
m_info.emplace_back(seekIdElement->readUInteger(), seekPositionElement->readUInteger());
|
||||
|
||||
// follow possibly referenced seek head element
|
||||
if (m_info.back().first == MatroskaIds::SeekHead) {
|
||||
const auto startOffset = m_info.back().second;
|
||||
if (!maxNesting) {
|
||||
diag.emplace_back(DiagLevel::Warning,
|
||||
argsToString("Not following reference by \"Seek\" element at ", seekElement->startOffset(), " contains to another \"SeekHead\" element at ", startOffset, '.'),
|
||||
context);
|
||||
break;
|
||||
}
|
||||
|
||||
auto visited = false;
|
||||
for (const auto *const visitedSeekHeadElement : m_seekHeadElements) {
|
||||
if (visitedSeekHeadElement->startOffset() == startOffset) {
|
||||
diag.emplace_back(DiagLevel::Warning,
|
||||
argsToString("The \"Seek\" element at ", seekElement->startOffset(), " contains a loop to the \"SeekHead\" element at ", visitedSeekHeadElement->startOffset(), '.'),
|
||||
context);
|
||||
visited = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (visited) {
|
||||
break;
|
||||
}
|
||||
m_additionalSeekHeadElements.emplace_back(make_unique<EbmlElement>(seekHeadElement->container(), startOffset));
|
||||
parse(m_additionalSeekHeadElements.back().get(), diag, maxNesting - 1);
|
||||
}
|
||||
|
||||
break;
|
||||
case EbmlIds::Crc32:
|
||||
case EbmlIds::Void:
|
||||
|
@ -95,7 +129,6 @@ void MatroskaSeekInfo::parse(EbmlElement *seekHeadElement, Diagnostics &diag)
|
|||
diag.emplace_back(
|
||||
DiagLevel::Warning, "The element " % seekElement->idToString() + " is not a seek element and will be ignored.", context);
|
||||
}
|
||||
seekElement = seekElement->nextSibling();
|
||||
}
|
||||
if (m_info.empty()) {
|
||||
diag.emplace_back(DiagLevel::Warning, "No seek information found.", context);
|
||||
|
|
|
@ -11,11 +11,11 @@ class TAG_PARSER_EXPORT MatroskaSeekInfo {
|
|||
public:
|
||||
MatroskaSeekInfo();
|
||||
|
||||
EbmlElement *seekHeadElement() const;
|
||||
const std::vector<EbmlElement *> &seekHeadElements() const;
|
||||
const std::vector<std::pair<EbmlElement::IdentifierType, std::uint64_t>> &info() const;
|
||||
std::vector<std::pair<EbmlElement::IdentifierType, std::uint64_t>> &info();
|
||||
void shift(std::uint64_t start, std::int64_t amount);
|
||||
void parse(EbmlElement *seekHeadElement, Diagnostics &diag);
|
||||
void parse(EbmlElement *seekHeadElements, Diagnostics &diag, std::size_t maxNesting = 1);
|
||||
void make(std::ostream &stream, Diagnostics &diag);
|
||||
std::uint64_t minSize() const;
|
||||
std::uint64_t maxSize() const;
|
||||
|
@ -30,7 +30,8 @@ public:
|
|||
static bool updateSeekInfo(std::vector<MatroskaSeekInfo> &newSeekInfos, std::uint64_t oldOffset, std::uint64_t newOffset);
|
||||
|
||||
private:
|
||||
EbmlElement *m_seekHeadElement;
|
||||
std::vector<EbmlElement *> m_seekHeadElements;
|
||||
std::vector<std::unique_ptr<EbmlElement>> m_additionalSeekHeadElements;
|
||||
std::vector<std::pair<EbmlElement::IdentifierType, std::uint64_t>> m_info;
|
||||
};
|
||||
|
||||
|
@ -38,16 +39,17 @@ private:
|
|||
* \brief Constructs a new MatroskaSeekInfo.
|
||||
*/
|
||||
inline MatroskaSeekInfo::MatroskaSeekInfo()
|
||||
: m_seekHeadElement(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Returns a pointer to the \a seekHeadElement specified when the parse() method was called.
|
||||
* \brief Returns a pointer to the seek head elements the seek information is composed of.
|
||||
* \remarks This list is initially empty. When calling parse() it is at least populated with the specified seek head element (ownership remains
|
||||
* by the caller). In case that seek table references another seek table those elements are also returned (the MatroskaSeekInfo has ownership).
|
||||
*/
|
||||
inline EbmlElement *MatroskaSeekInfo::seekHeadElement() const
|
||||
inline const std::vector<EbmlElement *> &MatroskaSeekInfo::seekHeadElements() const
|
||||
{
|
||||
return m_seekHeadElement;
|
||||
return m_seekHeadElements;
|
||||
}
|
||||
|
||||
/*!
|
||||
|
|
Loading…
Reference in New Issue