Tag Parser  7.0.0
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
ebmlelement.cpp
Go to the documentation of this file.
1 #include "./ebmlelement.h"
2 #include "./ebmlid.h"
3 #include "./matroskacontainer.h"
4 #include "./matroskaid.h"
5 
6 #include "../exceptions.h"
7 #include "../mediafileinfo.h"
8 
9 #include <c++utilities/conversion/binaryconversion.h>
10 #include <c++utilities/conversion/types.h>
11 #include <c++utilities/io/binaryreader.h>
12 #include <c++utilities/io/binarywriter.h>
13 
14 #include <cstring>
15 #include <memory>
16 #include <sstream>
17 #include <string>
18 
19 using namespace std;
20 using namespace IoUtilities;
21 using namespace ConversionUtilities;
22 
23 namespace TagParser {
24 
33 uint64 EbmlElement::bytesToBeSkipped = 0x4000;
34 
38 EbmlElement::EbmlElement(MatroskaContainer &container, uint64 startOffset)
39  : GenericFileElement<EbmlElement>(container, startOffset)
40 {
41 }
42 
46 EbmlElement::EbmlElement(MatroskaContainer &container, uint64 startOffset, uint64 maxSize)
47  : GenericFileElement<EbmlElement>(container, startOffset, maxSize)
48 {
49 }
50 
54 EbmlElement::EbmlElement(EbmlElement &parent, uint64 startOffset)
55  : GenericFileElement<EbmlElement>(parent, startOffset)
56 {
57 }
58 
62 string EbmlElement::parsingContext() const
63 {
64  return ("parsing header of EBML element " % idToString() % " at ") + startOffset();
65 }
66 
71 {
72  static const string context("parsing EBML element header");
73 
74  for (uint64 skipped = 0; skipped < bytesToBeSkipped; ++m_startOffset, --m_maxSize, ++skipped) {
75  // check whether max size is valid
76  if (maxTotalSize() < 2) {
77  diag.emplace_back(DiagLevel::Critical, argsToString("The EBML element at ", startOffset(), " is truncated or does not exist."), context);
78  throw TruncatedDataException();
79  }
80  stream().seekg(startOffset());
81 
82  // read ID
84  byte beg = static_cast<byte>(stream().peek()), mask = 0x80;
85  m_idLength = 1;
86  while (m_idLength <= maximumIdLengthSupported() && (beg & mask) == 0) {
87  ++m_idLength;
88  mask >>= 1;
89  }
91  if (!skipped) {
92  diag.emplace_back(
93  DiagLevel::Critical, argsToString("EBML ID length at ", startOffset(), " is not supported, trying to skip."), context);
94  }
95  continue; // try again
96  }
97  if (m_idLength > container().maxIdLength()) {
98  if (!skipped) {
99  diag.emplace_back(DiagLevel::Critical, argsToString("EBML ID length at ", startOffset(), " is invalid, trying to skip."), context);
100  }
101  continue; // try again
102  }
104  m_id = BE::toUInt32(buf);
105 
106  // check whether this element is actually a sibling of one of its parents rather then a child
107  // (might be the case if the parent's size is unknown and hence assumed to be the max file size)
108  if (m_parent && m_parent->m_sizeUnknown) {
109  // check at which level in the hierarchy the element is supposed to occour using its ID
110  // (the only chance to find out whether the element belongs higher up in the hierarchy)
111  const MatroskaElementLevel supposedLevel = matroskaIdLevel(m_id);
112  const byte actualLevel = level();
113  if (actualLevel > supposedLevel) {
114  // the file belongs higher up in the hierarchy so find a better parent
115  if (EbmlElement *betterParent = m_parent->parent(actualLevel - static_cast<byte>(supposedLevel))) {
116  // recompute the parent size (assumption - which was rest of the available space - was wrong)
117  m_parent->m_dataSize = m_startOffset - m_parent->m_startOffset - m_parent->headerSize();
118  m_parent->m_sizeUnknown = false;
119  // detatch from ...
120  if (m_parent->firstChild() == this) {
121  // ... parent
122  m_parent->m_firstChild.release();
123  m_parent->m_firstChild = move(m_nextSibling);
124  } else {
125  // ... previous sibling
126  for (EbmlElement *sibling = m_parent->firstChild(); sibling; sibling = sibling->nextSibling()) {
127  if (sibling->nextSibling() == this) {
128  sibling->m_nextSibling.release();
129  sibling->m_nextSibling = move(m_nextSibling);
130  break;
131  }
132  }
133  }
134  // insert as child of better parent
135  if (EbmlElement *previousSibling = betterParent->lastChild()) {
136  previousSibling->m_nextSibling.reset(this);
137  } else {
138  betterParent->m_firstChild.reset(this);
139  }
140  // update own reference to parent
141  m_parent = betterParent;
142  }
143  }
144  }
145 
146  // read size
147  beg = static_cast<byte>(stream().peek()), mask = 0x80;
148  m_sizeLength = 1;
149  if ((m_sizeUnknown = (beg == 0xFF))) {
150  // this indicates that the element size is unknown
151  // -> just assume the element takes the maximum available size
153  } else {
154  while (m_sizeLength <= maximumSizeLengthSupported() && (beg & mask) == 0) {
155  ++m_sizeLength;
156  mask >>= 1;
157  }
159  if (!skipped) {
160  diag.emplace_back(DiagLevel::Critical, "EBML size length is not supported.", parsingContext());
161  }
162  continue; // try again
163  }
164  if (m_sizeLength > container().maxSizeLength()) {
165  if (!skipped) {
166  diag.emplace_back(DiagLevel::Critical, "EBML size length is invalid.", parsingContext());
167  }
168  continue; // try again
169  }
170  // read size into buffer
171  memset(buf, 0, sizeof(DataSizeType)); // reset buffer
173  // xor the first byte in buffer which has been read from the file with mask
174  *(buf + (maximumSizeLengthSupported() - m_sizeLength)) ^= mask;
175  m_dataSize = ConversionUtilities::BE::toUInt64(buf);
176  // check if element is truncated
177  if (totalSize() > maxTotalSize()) {
178  if (m_idLength + m_sizeLength > maxTotalSize()) { // header truncated
179  if (!skipped) {
180  diag.emplace_back(DiagLevel::Critical, "EBML header seems to be truncated.", parsingContext());
181  }
182  continue; // try again
183  } else { // data truncated
184  diag.emplace_back(DiagLevel::Warning, "Data of EBML element seems to be truncated; unable to parse siblings of that element.",
185  parsingContext());
186  m_dataSize = maxTotalSize() - m_idLength - m_sizeLength; // using max size instead
187  }
188  }
189  }
190 
191  // check if there's a first child
192  const uint64 firstChildOffset = this->firstChildOffset();
193  if (firstChildOffset && firstChildOffset < totalSize()) {
194  m_firstChild.reset(new EbmlElement(static_cast<EbmlElement &>(*this), startOffset() + firstChildOffset));
195  } else {
196  m_firstChild.reset();
197  }
198 
199  // check if there's a sibling
200  if (totalSize() < maxTotalSize()) {
201  if (parent()) {
202  m_nextSibling.reset(new EbmlElement(*(parent()), startOffset() + totalSize()));
203  } else {
205  }
206  } else {
207  m_nextSibling.reset();
208  }
209 
210  // no critical errors occured
211  // -> add a warning if bytes have been skipped
212  if (skipped) {
213  diag.emplace_back(DiagLevel::Warning, argsToString(skipped, " bytes have been skipped"), parsingContext());
214  }
215  // -> don't need another try, return here
216  return;
217  }
218 
219  // critical errors occured and skipping some bytes wasn't successful
220  throw InvalidDataException();
221 }
222 
227 {
228  stream().seekg(dataOffset());
229  return reader().readString(dataSize());
230 }
231 
239 {
240  char buff[sizeof(uint64)] = { 0 };
241  int i = static_cast<int>(sizeof(buff)) - dataSize();
242  if (i < 0) {
243  i = 0;
244  }
245  stream().seekg(dataOffset(), ios_base::beg);
246  stream().read(buff + i, sizeof(buff) - i);
247  return BE::toUInt64(buff);
248 }
249 
255 {
256  stream().seekg(dataOffset());
257  switch (dataSize()) {
258  case sizeof(float32):
259  return reader().readFloat32BE();
260  case sizeof(float64):
261  return reader().readFloat64BE();
262  default:
263  return 0.0;
264  }
265 }
266 
272 {
273  if (id <= 0xFF) {
274  return 1;
275  } else if (id <= 0x7FFF) {
276  return 2;
277  } else if (id <= 0x3FFFFF) {
278  return 3;
279  } else if (id <= 0x1FFFFFFF) {
280  return 4;
281  } else {
282  throw InvalidDataException();
283  }
284 }
285 
291 {
292  if (size < 126) {
293  return 1;
294  } else if (size <= 16382ul) {
295  return 2;
296  } else if (size <= 2097150ul) {
297  return 3;
298  } else if (size <= 268435454ul) {
299  return 4;
300  } else if (size <= 34359738366ul) {
301  return 5;
302  } else if (size <= 4398046511102ul) {
303  return 6;
304  } else if (size <= 562949953421310ul) {
305  return 7;
306  } else if (size <= 72057594037927934ul) {
307  return 8;
308  } else {
309  throw InvalidDataException();
310  }
311 }
312 
320 {
321  if (id <= 0xFF) {
322  *buff = static_cast<byte>(id);
323  return 1;
324  } else if (id <= 0x7FFF) {
325  BE::getBytes(static_cast<uint16>(id), buff);
326  return 2;
327  } else if (id <= 0x3FFFFF) {
328  BE::getBytes(static_cast<uint32>(id << 0x8), buff);
329  return 3;
330  } else if (id <= 0x1FFFFFFF) {
331  BE::getBytes(static_cast<uint32>(id), buff);
332  return 4;
333  } else {
334  throw InvalidDataException();
335  }
336 }
337 
345 byte EbmlElement::makeSizeDenotation(uint64 size, char *buff)
346 {
347  if (size < 126) {
348  *buff = static_cast<byte>(size | 0x80);
349  return 1;
350  } else if (size <= 16382ul) {
351  BE::getBytes(static_cast<uint16>(size | 0x4000), buff);
352  return 2;
353  } else if (size <= 2097150ul) {
354  BE::getBytes(static_cast<uint32>((size | 0x200000) << 0x08), buff);
355  return 3;
356  } else if (size <= 268435454ul) {
357  BE::getBytes(static_cast<uint32>(size | 0x10000000), buff);
358  return 4;
359  } else if (size <= 34359738366ul) {
360  BE::getBytes(static_cast<uint64>((size | 0x800000000) << 0x18), buff);
361  return 5;
362  } else if (size <= 4398046511102ul) {
363  BE::getBytes(static_cast<uint64>((size | 0x40000000000) << 0x10), buff);
364  return 6;
365  } else if (size <= 562949953421310ul) {
366  BE::getBytes(static_cast<uint64>((size | 0x2000000000000) << 0x08), buff);
367  return 7;
368  } else if (size <= 72057594037927934ul) {
369  BE::getBytes(static_cast<uint64>(size | 0x100000000000000), buff);
370  return 8;
371  }
372  throw InvalidDataException();
373 }
374 
383 byte EbmlElement::makeSizeDenotation(uint64 size, char *buff, byte minBytes)
384 {
385  if (minBytes <= 1 && size < 126) {
386  *buff = static_cast<byte>(size | 0x80);
387  return 1;
388  } else if (minBytes <= 2 && size <= 16382ul) {
389  BE::getBytes(static_cast<uint16>(size | 0x4000), buff);
390  return 2;
391  } else if (minBytes <= 3 && size <= 2097150ul) {
392  BE::getBytes(static_cast<uint32>((size | 0x200000) << 0x08), buff);
393  return 3;
394  } else if (minBytes <= 4 && size <= 268435454ul) {
395  BE::getBytes(static_cast<uint32>(size | 0x10000000), buff);
396  return 4;
397  } else if (minBytes <= 5 && size <= 34359738366ul) {
398  BE::getBytes(static_cast<uint64>((size | 0x800000000) << 0x18), buff);
399  return 5;
400  } else if (minBytes <= 6 && size <= 4398046511102ul) {
401  BE::getBytes(static_cast<uint64>((size | 0x40000000000) << 0x10), buff);
402  return 6;
403  } else if (minBytes <= 7 && size <= 562949953421310ul) {
404  BE::getBytes(static_cast<uint64>((size | 0x2000000000000) << 0x08), buff);
405  return 7;
406  } else if (minBytes <= 8 && size <= 72057594037927934ul) {
407  BE::getBytes(static_cast<uint64>(size | 0x100000000000000), buff);
408  return 8;
409  }
410  throw InvalidDataException();
411 }
412 
418 {
419  if (integer <= 0xFFul) {
420  return 1;
421  } else if (integer <= 0xFFFFul) {
422  return 2;
423  } else if (integer <= 0xFFFFFFul) {
424  return 3;
425  } else if (integer <= 0xFFFFFFFFul) {
426  return 4;
427  } else if (integer <= 0xFFFFFFFFFFul) {
428  return 5;
429  } else if (integer <= 0xFFFFFFFFFFFFul) {
430  return 6;
431  } else if (integer <= 0xFFFFFFFFFFFFFFul) {
432  return 7;
433  } else {
434  return 8;
435  }
436 }
437 
442 byte EbmlElement::makeUInteger(uint64 value, char *buff)
443 {
444  if (value <= 0xFFul) {
445  *buff = static_cast<char>(value);
446  return 1;
447  } else if (value <= 0xFFFFul) {
448  BE::getBytes(static_cast<uint16>(value), buff);
449  return 2;
450  } else if (value <= 0xFFFFFFul) {
451  BE::getBytes(static_cast<uint32>(value << 0x08), buff);
452  return 3;
453  } else if (value <= 0xFFFFFFFFul) {
454  BE::getBytes(static_cast<uint32>(value), buff);
455  return 4;
456  } else if (value <= 0xFFFFFFFFFFul) {
457  BE::getBytes(static_cast<uint64>(value << 0x18), buff);
458  return 5;
459  } else if (value <= 0xFFFFFFFFFFFFul) {
460  BE::getBytes(static_cast<uint64>(value << 0x10), buff);
461  return 6;
462  } else if (value <= 0xFFFFFFFFFFFFFFul) {
463  BE::getBytes(static_cast<uint64>(value << 0x08), buff);
464  return 7;
465  } else {
466  BE::getBytes(static_cast<uint64>(value), buff);
467  return 8;
468  }
469 }
470 
478 byte EbmlElement::makeUInteger(uint64 value, char *buff, byte minBytes)
479 {
480  if (minBytes <= 1 && value <= 0xFFul) {
481  *buff = static_cast<char>(value);
482  return 1;
483  } else if (minBytes <= 2 && value <= 0xFFFFul) {
484  BE::getBytes(static_cast<uint16>(value), buff);
485  return 2;
486  } else if (minBytes <= 3 && value <= 0xFFFFFFul) {
487  BE::getBytes(static_cast<uint32>(value << 0x08), buff);
488  return 3;
489  } else if (minBytes <= 4 && value <= 0xFFFFFFFFul) {
490  BE::getBytes(static_cast<uint32>(value), buff);
491  return 4;
492  } else if (minBytes <= 5 && value <= 0xFFFFFFFFFFul) {
493  BE::getBytes(static_cast<uint64>(value << 0x18), buff);
494  return 5;
495  } else if (minBytes <= 6 && value <= 0xFFFFFFFFFFFFul) {
496  BE::getBytes(static_cast<uint64>(value << 0x10), buff);
497  return 6;
498  } else if (minBytes <= 7 && value <= 0xFFFFFFFFFFFFFFul) {
499  BE::getBytes(static_cast<uint64>(value << 0x08), buff);
500  return 7;
501  } else {
502  BE::getBytes(static_cast<uint64>(value), buff);
503  return 8;
504  }
505 }
506 
513 void EbmlElement::makeSimpleElement(ostream &stream, IdentifierType id, uint64 content)
514 {
515  char buff1[8];
516  char buff2[8];
517  byte sizeLength = EbmlElement::makeId(id, buff1);
518  stream.write(buff1, sizeLength);
519  byte elementSize = EbmlElement::makeUInteger(content, buff2);
520  sizeLength = EbmlElement::makeSizeDenotation(elementSize, buff1);
521  stream.write(buff1, sizeLength);
522  stream.write(buff2, elementSize);
523 }
524 
531 void EbmlElement::makeSimpleElement(std::ostream &stream, GenericFileElement::IdentifierType id, const std::string &content)
532 {
533  char buff1[8];
534  byte sizeLength = EbmlElement::makeId(id, buff1);
535  stream.write(buff1, sizeLength);
536  sizeLength = EbmlElement::makeSizeDenotation(content.size(), buff1);
537  stream.write(buff1, sizeLength);
538  stream.write(content.c_str(), content.size());
539 }
540 
548 void EbmlElement::makeSimpleElement(ostream &stream, GenericFileElement::IdentifierType id, const char *data, std::size_t dataSize)
549 {
550  char buff1[8];
551  byte sizeLength = EbmlElement::makeId(id, buff1);
552  stream.write(buff1, sizeLength);
554  stream.write(buff1, sizeLength);
555  stream.write(data, dataSize);
556 }
557 
558 } // namespace TagParser
TAG_PARSER_EXPORT MatroskaElementLevel matroskaIdLevel(uint32 matroskaId)
Returns the level at which elements with the specified matroskaId are supposed to occur in a Matroska...
Definition: matroskaid.cpp:523
static byte makeId(IdentifierType id, char *buff)
Stores the specified id in the specified buffer which must be at least 8 bytes long.
static void makeSimpleElement(std::ostream &stream, IdentifierType id, uint64 content)
Makes a simple EBML element.
std::unique_ptr< ImplementationType > m_firstChild
uint32 sizeLength() const
Returns the length of the size denotation of the element in byte.
static byte calculateSizeDenotationLength(uint64 size)
Returns the length of the size denotation for the specified size in byte.
static constexpr uint32 maximumIdLengthSupported()
Returns the maximum id length supported by the class in byte.
std::string readString()
Reads the content of the element as string.
std::unique_ptr< ImplementationType > m_nextSibling
DataSizeType dataSize() const
Returns the data size of the element in byte.
STL namespace.
uint64 startOffset() const
Returns the start offset in the related stream.
static byte calculateUIntegerLength(uint64 integer)
Returns the length of the specified unsigned integer in byte.
uint64 totalSize() const
Returns the total size of the element.
std::string idToString() const
Converts the specified EBML ID to a printable string.
Definition: ebmlelement.h:71
uint64 dataOffset() const
Returns the data offset of the element in the related stream.
IoUtilities::BinaryReader & reader()
Returns the related BinaryReader.
uint64 firstChildOffset() const
Returns the offset of the first child of the element.
Definition: ebmlelement.h:158
MatroskaElementLevel
Definition: matroskaid.h:406
Contains utility classes helping to read and write streams.
ContainerType & container()
Returns the related container.
static byte makeUInteger(uint64 value, char *buff)
Writes value to buff.
static byte makeSizeDenotation(uint64 size, char *buff)
Makes the size denotation for the specified size and stores it to buff.
uint64 readUInteger()
Reads the content of the element as unsigned integer.
uint32 headerSize() const
Returns the header size of the element in byte.
ImplementationType * lastChild()
Returns the last child of the element.
void internalParse(Diagnostics &diag)
Parses the EBML element.
Definition: ebmlelement.cpp:70
uint64 maxTotalSize() const
Returns maximum total size.
static byte calculateIdLength(IdentifierType id)
Returns the length of the specified id in byte.
float64 readFloat()
Reads the content of the element as float.
static uint64 bytesToBeSkipped
Specifies the number of bytes to be skipped till a valid EBML element is found in the stream...
Definition: ebmlelement.h:56
std::iostream & stream()
Returns the related stream.
static constexpr uint32 maximumSizeLengthSupported()
Returns the maximum size length supported by the class in byte.
const IdentifierType & id() const
Returns the element ID.
FileElementTraits< ImplementationType >::IdentifierType IdentifierType
Specifies the type used to store identifiers.
byte level() const
Returns how deep the element is nested (0 for top-level elements, 1 for children of top-level element...
FileElementTraits< ImplementationType >::DataSizeType DataSizeType
Specifies the type used to store data sizes.
ImplementationType * parent()
Returns the parent of the element.
EbmlElement(MatroskaContainer &container, uint64 startOffset)
Constructs a new top level element with the specified container at the specified startOffset.
Definition: ebmlelement.cpp:38