skip invalid bytes when parsing EBML

This commit is contained in:
Martchus 2016-03-13 22:00:23 +01:00
parent 9016097d36
commit e0437c0a43
4 changed files with 262 additions and 221 deletions

View File

@ -200,6 +200,7 @@ public:
protected:
identifierType m_id;
uint64 m_startOffset;
uint64 m_maxSize;
uint32 m_idLength;
dataSizeType m_dataSize;
uint32 m_sizeLength;
@ -212,7 +213,6 @@ private:
void copyInternal(std::ostream &targetStream, uint64 startOffset, uint64 bytesToCopy);
containerType* m_container;
uint64 m_maxSize;
bool m_parsed;
};
@ -248,12 +248,12 @@ template <class ImplementationType>
GenericFileElement<ImplementationType>::GenericFileElement(GenericFileElement<ImplementationType>::implementationType &parent, uint64 startOffset) :
m_id(identifierType()),
m_startOffset(startOffset),
m_maxSize(parent.startOffset() + parent.totalSize() - startOffset),
m_idLength(0),
m_dataSize(0),
m_sizeLength(0),
m_parent(&parent),
m_container(&parent.container()),
m_maxSize(parent.startOffset() + parent.totalSize() - startOffset),
m_parsed(false)
{}
@ -264,12 +264,12 @@ template <class ImplementationType>
GenericFileElement<ImplementationType>::GenericFileElement(GenericFileElement<ImplementationType>::containerType &container, uint64 startOffset, uint64 maxSize) :
m_id(identifierType()),
m_startOffset(startOffset),
m_maxSize(maxSize),
m_idLength(0),
m_dataSize(0),
m_sizeLength(0),
m_parent(nullptr),
m_container(&container),
m_maxSize(maxSize),
m_parsed(false)
{}

View File

@ -64,6 +64,9 @@ void EbmlElement::internalParse()
{
invalidateStatus();
static const string context("parsing EBML element header");
byte skipped;
for(skipped = 0; /* TODO: add a sane limit here */; ++m_startOffset, --m_maxSize, ++skipped) {
// check whether max size is valid
if(maxTotalSize() < 2) {
addNotification(NotificationType::Critical, "The EBML element at " + numberToString(startOffset()) + " is truncated or does not exist.", context);
@ -80,30 +83,44 @@ void EbmlElement::internalParse()
mask >>= 1;
}
if(m_idLength > GenericFileElement<implementationType>::maximumIdLengthSupported()) {
addNotification(NotificationType::Critical, "EBML ID length is not supported.", context);
throw VersionNotSupportedException();
if(!skipped) {
addNotification(NotificationType::Critical, "EBML ID length is not supported, trying to skip.", context);
}
continue; // try again
}
if(m_idLength > container().maxIdLength()) {
if(!skipped) {
addNotification(NotificationType::Critical, "EBML ID length is invalid.", context);
throw InvalidDataException();
}
continue; // try again
}
reader().read(buf + (GenericFileElement<implementationType>::maximumIdLengthSupported() - m_idLength), m_idLength);
m_id = BE::toUInt32(buf);
// read size
mask = 0x80;
m_sizeLength = 1;
beg = stream().peek();
if(beg == 0xFF) {
// this indicates that the element size is unknown
// -> just assume the element takes the maximum available size
m_dataSize = maxTotalSize() - headerSize();
} else {
while(m_sizeLength <= GenericFileElement<implementationType>::maximumSizeLengthSupported() && (beg & mask) == 0) {
++m_sizeLength;
mask >>= 1;
}
if(m_sizeLength > GenericFileElement<implementationType>::maximumSizeLengthSupported()) {
if(!skipped) {
addNotification(NotificationType::Critical, "EBML size length is not supported.", parsingContext());
throw VersionNotSupportedException();
}
continue; // try again
}
if(m_sizeLength > container().maxSizeLength()) {
if(!skipped) {
addNotification(NotificationType::Critical, "EBML size length is invalid.", parsingContext());
throw InvalidDataException();
}
continue; // try again
}
// read size into buffer
memset(buf, 0, sizeof(dataSizeType)); // reset buffer
@ -113,15 +130,19 @@ void EbmlElement::internalParse()
// check if element is truncated
if(totalSize() > maxTotalSize()) {
if(m_idLength + m_sizeLength > maxTotalSize()) { // header truncated
if(!skipped) {
addNotification(NotificationType::Critical, "EBML header seems to be truncated.", parsingContext());
throw TruncatedDataException();
}
continue; // try again
} else { // data truncated
addNotification(NotificationType::Warning, "Data of EBML element seems to be truncated; unable to parse siblings of that element.", parsingContext());
m_dataSize = maxTotalSize() - m_idLength - m_sizeLength; // using max size instead
}
}
}
// check if there's a first child
if(uint64 firstChildOffset = this->firstChildOffset()) {
if(const uint64 firstChildOffset = this->firstChildOffset()) {
if(firstChildOffset < dataSize()) {
m_firstChild.reset(new EbmlElement(static_cast<EbmlElement &>(*this), startOffset() + firstChildOffset));
} else {
@ -130,6 +151,7 @@ void EbmlElement::internalParse()
} else {
m_firstChild.reset();
}
// check if there's a sibling
if(totalSize() < maxTotalSize()) {
if(parent()) {
@ -140,6 +162,18 @@ void EbmlElement::internalParse()
} else {
m_nextSibling.reset();
}
// no critical errors occured
// -> add a warning if bytes have been skipped
if(skipped) {
addNotification(NotificationType::Warning, numberToString<unsigned int>(skipped) + " bytes have been skipped", parsingContext());
}
// -> don't need another try, return here
return;
}
// critical errors occured and skipping some bytes wasn't successful
throw InvalidDataException();
}
/*!

View File

@ -136,6 +136,7 @@ inline bool EbmlElement::isPadding() const
/*!
* \brief Returns the offset of the first child of the element.
* \remarks The returned offset is relative to the start offset if this element.
*/
inline uint64 EbmlElement::firstChildOffset() const
{

View File

@ -354,19 +354,11 @@ void MatroskaContainer::internalParseHeader()
for(EbmlElement *topLevelElement = m_firstElement.get(); topLevelElement; topLevelElement = topLevelElement->nextSibling()) {
try {
topLevelElement->parse();
} catch(const Failure &) {
addNotification(NotificationType::Critical, "Unable to parse top-level element at " + numberToString(topLevelElement->startOffset()) + ".", context);
break;
}
switch(topLevelElement->id()) {
case EbmlIds::Header:
for(EbmlElement *subElement = topLevelElement->firstChild(); subElement; subElement = subElement->nextSibling()) {
try {
subElement->parse();
} catch (Failure &) {
addNotification(NotificationType::Critical, "Unable to parse all childs of EBML header.", context);
break;
}
switch(subElement->id()) {
case EbmlIds::Version:
m_version = subElement->readUInteger();
@ -402,6 +394,12 @@ void MatroskaContainer::internalParseHeader()
}
break;
}
addNotifications(*subElement);
} catch(const Failure &) {
addNotifications(*subElement);
addNotification(NotificationType::Critical, "Unable to parse all childs of EBML header.", context);
break;
}
}
break;
case MatroskaIds::Segment:
@ -409,10 +407,6 @@ void MatroskaContainer::internalParseHeader()
for(EbmlElement *subElement = topLevelElement->firstChild(); subElement; subElement = subElement->nextSibling()) {
try {
subElement->parse();
} catch (Failure &) {
addNotification(NotificationType::Critical, "Unable to parse all childs of \"Segment\"-element.", context);
break;
}
switch(subElement->id()) {
case MatroskaIds::SeekHead:
m_seekInfos.emplace_back(make_unique<MatroskaSeekInfo>());
@ -507,18 +501,30 @@ void MatroskaContainer::internalParseHeader()
}
break;
}
addNotifications(*subElement);
} catch(const Failure &) {
addNotifications(*subElement);
addNotification(NotificationType::Critical, "Unable to parse all childs of \"Segment\"-element.", context);
break;
}
}
currentOffset += topLevelElement->totalSize();
break;
default:
;
}
addNotifications(*topLevelElement);
} catch(const Failure &) {
addNotifications(*topLevelElement);
addNotification(NotificationType::Critical, "Unable to parse top-level element at " + numberToString(topLevelElement->startOffset()) + ".", context);
break;
}
}
// finally parse the "Info"-element and fetch "EditionEntry"-elements
finish:
try {
parseSegmentInfo();
} catch (Failure &) {
} catch(const Failure &) {
addNotification(NotificationType::Critical, "Unable to parse EBML (segment) \"Info\"-element.", context);
}
}