Tag Parser 12.1.0
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
Loading...
Searching...
No Matches
ebmlelement.cpp
Go to the documentation of this file.
1#include "./ebmlelement.h"
2#include "./ebmlid.h"
4#include "./matroskaid.h"
5
6#include "../exceptions.h"
7#include "../mediafileinfo.h"
8
9#include <c++utilities/conversion/binaryconversion.h>
10#include <c++utilities/io/binaryreader.h>
11#include <c++utilities/io/binarywriter.h>
12
13#include <cstdint>
14#include <cstring>
15#include <memory>
16#include <sstream>
17#include <string>
18
19using namespace std;
20using namespace CppUtilities;
21
22namespace TagParser {
23
32std::uint64_t EbmlElement::bytesToBeSkipped = 0x4000;
33
37EbmlElement::EbmlElement(MatroskaContainer &container, std::uint64_t startOffset)
38 : GenericFileElement<EbmlElement>(container, startOffset)
39{
40}
41
45EbmlElement::EbmlElement(MatroskaContainer &container, std::uint64_t startOffset, std::uint64_t maxSize)
46 : GenericFileElement<EbmlElement>(container, startOffset, maxSize)
47{
48}
49
53EbmlElement::EbmlElement(EbmlElement &parent, std::uint64_t startOffset)
54 : GenericFileElement<EbmlElement>(parent, startOffset)
55{
56}
57
61string EbmlElement::parsingContext() const
62{
63 return ("parsing header of EBML element " % idToString() % " at ") + startOffset();
64}
65
70{
71 static const string context("parsing EBML element header");
72
73 for (std::uint64_t skipped = 0; skipped < bytesToBeSkipped; ++m_startOffset, --m_maxSize, ++skipped) {
74 // check whether max size is valid
75 if (maxTotalSize() < 2) {
76 diag.emplace_back(DiagLevel::Critical, argsToString("The EBML element at ", startOffset(), " is truncated or does not exist."), context);
78 }
79 stream().seekg(static_cast<streamoff>(startOffset()));
80
81 // read ID
83 std::uint8_t beg = static_cast<std::uint8_t>(stream().peek()), mask = 0x80;
84 m_idLength = 1;
85 while (m_idLength <= maximumIdLengthSupported() && (beg & mask) == 0) {
86 ++m_idLength;
87 mask >>= 1;
88 }
90 if (!skipped) {
91 diag.emplace_back(
92 DiagLevel::Critical, argsToString("EBML ID length at ", startOffset(), " is not supported, trying to skip."), context);
93 }
94 continue; // try again
95 }
96 if (m_idLength > container().maxIdLength()) {
97 if (!skipped) {
98 diag.emplace_back(DiagLevel::Critical, argsToString("EBML ID length at ", startOffset(), " is invalid, trying to skip."), context);
99 }
100 continue; // try again
101 }
103 m_id = BE::toInt<std::uint32_t>(buf);
104
105 // check whether this element is actually a sibling of one of its parents rather then a child
106 // (might be the case if the parent's size is unknown and hence assumed to be the max file size)
108 // check at which level in the hierarchy the element is supposed to occur using its ID
109 // (the only chance to find out whether the element belongs higher up in the hierarchy)
110 const MatroskaElementLevel supposedLevel = matroskaIdLevel(m_id);
111 const std::uint8_t actualLevel = level();
112 if (actualLevel > supposedLevel) {
113 // the file belongs higher up in the hierarchy so find a better parent
114 if (EbmlElement *betterParent = m_parent->parent(actualLevel - static_cast<std::uint8_t>(supposedLevel))) {
115 // recompute the parent size (assumption - which was rest of the available space - was wrong)
117 m_parent->m_sizeUnknown = false;
118 // detach from ...
119 if (m_parent->firstChild() == this) {
120 // ... parent
121 m_parent->m_firstChild.release();
122 m_parent->m_firstChild = std::move(m_nextSibling);
123 } else {
124 // ... previous sibling
125 for (EbmlElement *sibling = m_parent->firstChild(); sibling; sibling = sibling->nextSibling()) {
126 if (sibling->nextSibling() == this) {
127 sibling->m_nextSibling.release();
128 sibling->m_nextSibling = std::move(m_nextSibling);
129 break;
130 }
131 }
132 }
133 // insert as child of better parent
134 if (EbmlElement *previousSibling = betterParent->lastChild()) {
135 previousSibling->m_nextSibling.reset(this);
136 } else {
137 betterParent->m_firstChild.reset(this);
138 }
139 // update own reference to parent
140 m_parent = betterParent;
141 }
142 }
143 }
144
145 // read size
146 beg = static_cast<std::uint8_t>(stream().peek());
147 mask = 0x80;
148 m_sizeLength = 1;
149 if ((m_sizeUnknown = (beg == 0xFF))) {
150 // this indicates that the element size is unknown
151 // -> just assume the element takes the maximum available size
153 } else {
154 while (m_sizeLength <= maximumSizeLengthSupported() && (beg & mask) == 0) {
155 ++m_sizeLength;
156 mask >>= 1;
157 }
159 if (!skipped) {
160 diag.emplace_back(DiagLevel::Critical, "EBML size length is not supported.", parsingContext());
161 }
162 continue; // try again
163 }
164 if (m_sizeLength > container().maxSizeLength()) {
165 if (!skipped) {
166 diag.emplace_back(DiagLevel::Critical, "EBML size length is invalid.", parsingContext());
167 }
168 continue; // try again
169 }
170 // read size into buffer
171 memset(buf, 0, sizeof(DataSizeType)); // reset buffer
173 // xor the first byte in buffer which has been read from the file with mask
174 *(buf + (maximumSizeLengthSupported() - m_sizeLength)) ^= static_cast<char>(mask);
175 m_dataSize = BE::toInt<std::uint64_t>(buf);
176 // check if element is truncated
177 if (totalSize() > maxTotalSize()) {
178 if (m_idLength + m_sizeLength > maxTotalSize()) { // header truncated
179 if (!skipped) {
180 diag.emplace_back(DiagLevel::Critical, "EBML header seems to be truncated.", parsingContext());
181 }
182 continue; // try again
183 } else { // data truncated
184 diag.emplace_back(DiagLevel::Warning, "Data of EBML element seems to be truncated; unable to parse siblings of that element.",
185 parsingContext());
186 m_dataSize = maxTotalSize() - m_idLength - m_sizeLength; // using max size instead
187 }
188 }
189 }
190
191 // check if there's a first child
192 const std::uint64_t firstChildOffset = this->firstChildOffset();
193 if (firstChildOffset && firstChildOffset < totalSize()) {
194 m_firstChild.reset(new EbmlElement(static_cast<EbmlElement &>(*this), startOffset() + firstChildOffset));
195 } else {
196 m_firstChild.reset();
197 }
198
199 // check if there's a sibling
200 if (totalSize() < maxTotalSize()) {
201 if (parent()) {
202 m_nextSibling.reset(new EbmlElement(*(parent()), startOffset() + totalSize()));
203 } else {
205 }
206 } else {
207 m_nextSibling.reset();
208 }
209
210 // no critical errors occurred
211 // -> add a warning if bytes have been skipped
212 if (skipped) {
213 diag.emplace_back(DiagLevel::Warning, argsToString(skipped, " bytes have been skipped"), parsingContext());
214 }
215 // -> don't need another try, return here
216 return;
217 }
218
219 // critical errors occurred and skipping some bytes wasn't successful
220 throw InvalidDataException();
221}
222
227{
228 stream().seekg(static_cast<streamoff>(dataOffset()));
229 return reader().readString(dataSize());
230}
231
239{
240 constexpr DataSizeType maxBytesToRead = 8;
241 char buff[maxBytesToRead] = { 0 };
242 const auto bytesToSkip = maxBytesToRead - min(dataSize(), maxBytesToRead);
243 stream().seekg(static_cast<streamoff>(dataOffset()), ios_base::beg);
244 stream().read(buff + bytesToSkip, static_cast<streamoff>(sizeof(buff) - bytesToSkip));
245 return BE::toInt<std::uint64_t>(buff);
246}
247
253{
254 stream().seekg(static_cast<streamoff>(dataOffset()));
255 switch (dataSize()) {
256 case sizeof(float):
257 return static_cast<double>(reader().readFloat32BE());
258 case sizeof(double):
259 return reader().readFloat64BE();
260 default:
261 return 0.0;
262 }
263}
264
270{
271 if (id <= 0xFF) {
272 return 1;
273 } else if (id <= 0x7FFF) {
274 return 2;
275 } else if (id <= 0x3FFFFF) {
276 return 3;
277 } else if (id <= 0x1FFFFFFF) {
278 return 4;
279 } else {
280 throw InvalidDataException();
281 }
282}
283
288std::uint8_t EbmlElement::calculateSizeDenotationLength(std::uint64_t size)
289{
290 if (size < 126) {
291 return 1;
292 } else if (size <= 16382ul) {
293 return 2;
294 } else if (size <= 2097150ul) {
295 return 3;
296 } else if (size <= 268435454ul) {
297 return 4;
298 } else if (size <= 34359738366ul) {
299 return 5;
300 } else if (size <= 4398046511102ul) {
301 return 6;
302 } else if (size <= 562949953421310ul) {
303 return 7;
304 } else if (size <= 72057594037927934ul) {
305 return 8;
306 } else {
307 throw InvalidDataException();
308 }
309}
310
318{
319 if (id <= 0xFF) {
320 *buff = static_cast<char>(id);
321 return 1;
322 } else if (id <= 0x7FFF) {
323 BE::getBytes(static_cast<std::uint16_t>(id), buff);
324 return 2;
325 } else if (id <= 0x3FFFFF) {
326 BE::getBytes(static_cast<std::uint32_t>(id << 0x8), buff);
327 return 3;
328 } else if (id <= 0x1FFFFFFF) {
329 BE::getBytes(static_cast<std::uint32_t>(id), buff);
330 return 4;
331 } else {
332 throw InvalidDataException();
333 }
334}
335
343std::uint8_t EbmlElement::makeSizeDenotation(std::uint64_t size, char *buff)
344{
345 if (size < 126) {
346 *buff = static_cast<char>(size | 0x80);
347 return 1;
348 } else if (size <= 16382ul) {
349 BE::getBytes(static_cast<std::uint16_t>(size | 0x4000), buff);
350 return 2;
351 } else if (size <= 2097150ul) {
352 BE::getBytes(static_cast<std::uint32_t>((size | 0x200000) << 0x08), buff);
353 return 3;
354 } else if (size <= 268435454ul) {
355 BE::getBytes(static_cast<std::uint32_t>(size | 0x10000000), buff);
356 return 4;
357 } else if (size <= 34359738366ul) {
358 BE::getBytes(static_cast<std::uint64_t>((size | 0x800000000) << 0x18), buff);
359 return 5;
360 } else if (size <= 4398046511102ul) {
361 BE::getBytes(static_cast<std::uint64_t>((size | 0x40000000000) << 0x10), buff);
362 return 6;
363 } else if (size <= 562949953421310ul) {
364 BE::getBytes(static_cast<std::uint64_t>((size | 0x2000000000000) << 0x08), buff);
365 return 7;
366 } else if (size <= 72057594037927934ul) {
367 BE::getBytes(static_cast<std::uint64_t>(size | 0x100000000000000), buff);
368 return 8;
369 }
370 throw InvalidDataException();
371}
372
381std::uint8_t EbmlElement::makeSizeDenotation(std::uint64_t size, char *buff, std::uint8_t minBytes)
382{
383 if (minBytes <= 1 && size < 126) {
384 *buff = static_cast<char>(size | 0x80);
385 return 1;
386 } else if (minBytes <= 2 && size <= 16382ul) {
387 BE::getBytes(static_cast<std::uint16_t>(size | 0x4000), buff);
388 return 2;
389 } else if (minBytes <= 3 && size <= 2097150ul) {
390 BE::getBytes(static_cast<std::uint32_t>((size | 0x200000) << 0x08), buff);
391 return 3;
392 } else if (minBytes <= 4 && size <= 268435454ul) {
393 BE::getBytes(static_cast<std::uint32_t>(size | 0x10000000), buff);
394 return 4;
395 } else if (minBytes <= 5 && size <= 34359738366ul) {
396 BE::getBytes(static_cast<std::uint64_t>((size | 0x800000000) << 0x18), buff);
397 return 5;
398 } else if (minBytes <= 6 && size <= 4398046511102ul) {
399 BE::getBytes(static_cast<std::uint64_t>((size | 0x40000000000) << 0x10), buff);
400 return 6;
401 } else if (minBytes <= 7 && size <= 562949953421310ul) {
402 BE::getBytes(static_cast<std::uint64_t>((size | 0x2000000000000) << 0x08), buff);
403 return 7;
404 } else if (minBytes <= 8 && size <= 72057594037927934ul) {
405 BE::getBytes(static_cast<std::uint64_t>(size | 0x100000000000000), buff);
406 return 8;
407 }
408 throw InvalidDataException();
409}
410
415std::uint8_t EbmlElement::calculateUIntegerLength(std::uint64_t integer)
416{
417 if (integer <= 0xFFul) {
418 return 1;
419 } else if (integer <= 0xFFFFul) {
420 return 2;
421 } else if (integer <= 0xFFFFFFul) {
422 return 3;
423 } else if (integer <= 0xFFFFFFFFul) {
424 return 4;
425 } else if (integer <= 0xFFFFFFFFFFul) {
426 return 5;
427 } else if (integer <= 0xFFFFFFFFFFFFul) {
428 return 6;
429 } else if (integer <= 0xFFFFFFFFFFFFFFul) {
430 return 7;
431 } else {
432 return 8;
433 }
434}
435
440std::uint8_t EbmlElement::makeUInteger(std::uint64_t value, char *buff)
441{
442 if (value <= 0xFFul) {
443 *buff = static_cast<char>(value);
444 return 1;
445 } else if (value <= 0xFFFFul) {
446 BE::getBytes(static_cast<std::uint16_t>(value), buff);
447 return 2;
448 } else if (value <= 0xFFFFFFul) {
449 BE::getBytes(static_cast<std::uint32_t>(value << 0x08), buff);
450 return 3;
451 } else if (value <= 0xFFFFFFFFul) {
452 BE::getBytes(static_cast<std::uint32_t>(value), buff);
453 return 4;
454 } else if (value <= 0xFFFFFFFFFFul) {
455 BE::getBytes(static_cast<std::uint64_t>(value << 0x18), buff);
456 return 5;
457 } else if (value <= 0xFFFFFFFFFFFFul) {
458 BE::getBytes(static_cast<std::uint64_t>(value << 0x10), buff);
459 return 6;
460 } else if (value <= 0xFFFFFFFFFFFFFFul) {
461 BE::getBytes(static_cast<std::uint64_t>(value << 0x08), buff);
462 return 7;
463 } else {
464 BE::getBytes(static_cast<std::uint64_t>(value), buff);
465 return 8;
466 }
467}
468
478std::uint8_t EbmlElement::makeUInteger(std::uint64_t value, char *buff, std::uint8_t minBytes)
479{
480 if (minBytes <= 1 && value <= 0xFFul) {
481 *buff = static_cast<char>(value);
482 return 1;
483 } else if (minBytes <= 2 && value <= 0xFFFFul) {
484 BE::getBytes(static_cast<std::uint16_t>(value), buff);
485 return 2;
486 } else if (minBytes <= 3 && value <= 0xFFFFFFul) {
487 BE::getBytes(static_cast<std::uint32_t>(value << 0x08), buff);
488 return 3;
489 } else if (minBytes <= 4 && value <= 0xFFFFFFFFul) {
490 BE::getBytes(static_cast<std::uint32_t>(value), buff);
491 return 4;
492 } else if (minBytes <= 5 && value <= 0xFFFFFFFFFFul) {
493 BE::getBytes(static_cast<std::uint64_t>(value << 0x18), buff);
494 return 5;
495 } else if (minBytes <= 6 && value <= 0xFFFFFFFFFFFFul) {
496 BE::getBytes(static_cast<std::uint64_t>(value << 0x10), buff);
497 return 6;
498 } else if (minBytes <= 7 && value <= 0xFFFFFFFFFFFFFFul) {
499 BE::getBytes(static_cast<std::uint64_t>(value << 0x08), buff);
500 return 7;
501 } else {
502 BE::getBytes(static_cast<std::uint64_t>(value), buff);
503 return 8;
504 }
505}
506
513void EbmlElement::makeSimpleElement(ostream &stream, IdentifierType id, std::uint64_t content)
514{
515 char buff1[8];
516 char buff2[8];
517 std::uint8_t sizeLength = EbmlElement::makeId(id, buff1);
518 stream.write(buff1, sizeLength);
519 std::uint8_t elementSize = EbmlElement::makeUInteger(content, buff2);
520 sizeLength = EbmlElement::makeSizeDenotation(elementSize, buff1);
521 stream.write(buff1, sizeLength);
522 stream.write(buff2, elementSize);
523}
524
531void EbmlElement::makeSimpleElement(std::ostream &stream, GenericFileElement::IdentifierType id, string_view content)
532{
533 char buff1[8];
534 std::uint8_t sizeLength = EbmlElement::makeId(id, buff1);
535 stream.write(buff1, sizeLength);
536 sizeLength = EbmlElement::makeSizeDenotation(content.size(), buff1);
537 stream.write(buff1, sizeLength);
538 stream.write(content.data(), static_cast<std::streamsize>(content.size()));
539}
540
541} // namespace TagParser
The Diagnostics class is a container for DiagMessage.
The EbmlElement class helps to parse EBML files such as Matroska files.
Definition ebmlelement.h:32
std::uint64_t firstChildOffset() const
Returns the offset of the first child of the element.
static std::uint8_t makeUInteger(std::uint64_t value, char *buff)
Writes value to buff.
EbmlElement(MatroskaContainer &container, std::uint64_t startOffset)
Constructs a new top level element with the specified container at the specified startOffset.
static std::uint8_t calculateIdLength(IdentifierType id)
Returns the length of the specified id in byte.
static void makeSimpleElement(std::ostream &stream, IdentifierType id, std::uint64_t content)
Makes a simple EBML element.
static std::uint8_t calculateSizeDenotationLength(std::uint64_t size)
Returns the length of the size denotation for the specified size in byte.
static std::uint8_t makeId(IdentifierType id, char *buff)
Stores the specified id in the specified buffer which must be at least 8 bytes long.
double readFloat()
Reads the content of the element as float.
static std::uint64_t bytesToBeSkipped
Specifies the number of bytes to be skipped till a valid EBML element is found in the stream.
Definition ebmlelement.h:56
std::string idToString() const
Converts the specified EBML ID to a printable string.
Definition ebmlelement.h:71
std::string readString()
Reads the content of the element as string.
std::uint64_t readUInteger()
Reads the content of the element as unsigned integer.
static std::uint8_t makeSizeDenotation(std::uint64_t size, char *buff)
Makes the size denotation for the specified size and stores it to buff.
void internalParse(Diagnostics &diag)
Parses the EBML element.
static std::uint8_t calculateUIntegerLength(std::uint64_t integer)
Returns the length of the specified unsigned integer in byte.
The GenericFileElement class helps to parse binary files which consist of an arboreal element structu...
ImplementationType * lastChild()
Returns the last child of the element.
std::uint64_t startOffset() const
Returns the start offset in the related stream.
std::uint32_t headerSize() const
Returns the header size of the element in byte.
const IdentifierType & id() const
Returns the element ID.
typename FileElementTraits< EbmlElement >::DataSizeType DataSizeType
Specifies the type used to store data sizes.
std::uint8_t level() const
Returns how deep the element is nested (0 for top-level elements, 1 for children of top-level element...
std::iostream & stream()
Returns the related stream.
ImplementationType * parent()
Returns the parent of the element.
ImplementationType * firstChild()
Returns the first child of the element.
std::uint32_t sizeLength() const
Returns the length of the size denotation of the element in byte.
std::unique_ptr< ImplementationType > m_firstChild
static constexpr std::uint32_t maximumIdLengthSupported()
Returns the maximum id length supported by the class in byte.
DataSizeType dataSize() const
Returns the data size of the element in byte.
std::uint64_t totalSize() const
Returns the total size of the element.
typename FileElementTraits< ImplementationType >::IdentifierType IdentifierType
Specifies the type used to store identifiers.
static constexpr std::uint32_t maximumSizeLengthSupported()
Returns the maximum size length supported by the class in byte.
std::uint64_t dataOffset() const
Returns the data offset of the element in the related stream.
ContainerType & container()
Returns the related container.
CppUtilities::BinaryReader & reader()
Returns the related BinaryReader.
std::uint64_t maxTotalSize() const
Returns maximum total size.
The exception that is thrown when the data to be parsed or to be made seems invalid and therefore can...
Definition exceptions.h:25
Implementation of GenericContainer<MediaFileInfo, MatroskaTag, MatroskaTrack, EbmlElement>.
The exception that is thrown when the data to be parsed is truncated and therefore can not be parsed ...
Definition exceptions.h:39
Contains all classes and functions of the TagInfo library.
Definition aaccodebook.h:10
TAG_PARSER_EXPORT MatroskaElementLevel matroskaIdLevel(std::uint32_t matroskaId)
Returns the level at which elements with the specified matroskaId are supposed to occur in a Matroska...