Tag Parser  8.0.0
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
ebmlelement.cpp
Go to the documentation of this file.
1 #include "./ebmlelement.h"
2 #include "./ebmlid.h"
3 #include "./matroskacontainer.h"
4 #include "./matroskaid.h"
5 
6 #include "../exceptions.h"
7 #include "../mediafileinfo.h"
8 
9 #include <c++utilities/conversion/binaryconversion.h>
10 #include <c++utilities/conversion/types.h>
11 #include <c++utilities/io/binaryreader.h>
12 #include <c++utilities/io/binarywriter.h>
13 
14 #include <cstring>
15 #include <memory>
16 #include <sstream>
17 #include <string>
18 
19 using namespace std;
20 using namespace IoUtilities;
21 using namespace ConversionUtilities;
22 
23 namespace TagParser {
24 
33 uint64 EbmlElement::bytesToBeSkipped = 0x4000;
34 
38 EbmlElement::EbmlElement(MatroskaContainer &container, uint64 startOffset)
39  : GenericFileElement<EbmlElement>(container, startOffset)
40 {
41 }
42 
46 EbmlElement::EbmlElement(MatroskaContainer &container, uint64 startOffset, uint64 maxSize)
47  : GenericFileElement<EbmlElement>(container, startOffset, maxSize)
48 {
49 }
50 
54 EbmlElement::EbmlElement(EbmlElement &parent, uint64 startOffset)
55  : GenericFileElement<EbmlElement>(parent, startOffset)
56 {
57 }
58 
62 string EbmlElement::parsingContext() const
63 {
64  return ("parsing header of EBML element " % idToString() % " at ") + startOffset();
65 }
66 
71 {
72  static const string context("parsing EBML element header");
73 
74  for (uint64 skipped = 0; skipped < bytesToBeSkipped; ++m_startOffset, --m_maxSize, ++skipped) {
75  // check whether max size is valid
76  if (maxTotalSize() < 2) {
77  diag.emplace_back(DiagLevel::Critical, argsToString("The EBML element at ", startOffset(), " is truncated or does not exist."), context);
78  throw TruncatedDataException();
79  }
80  stream().seekg(static_cast<streamoff>(startOffset()));
81 
82  // read ID
84  byte beg = static_cast<byte>(stream().peek()), mask = 0x80;
85  m_idLength = 1;
86  while (m_idLength <= maximumIdLengthSupported() && (beg & mask) == 0) {
87  ++m_idLength;
88  mask >>= 1;
89  }
91  if (!skipped) {
92  diag.emplace_back(
93  DiagLevel::Critical, argsToString("EBML ID length at ", startOffset(), " is not supported, trying to skip."), context);
94  }
95  continue; // try again
96  }
97  if (m_idLength > container().maxIdLength()) {
98  if (!skipped) {
99  diag.emplace_back(DiagLevel::Critical, argsToString("EBML ID length at ", startOffset(), " is invalid, trying to skip."), context);
100  }
101  continue; // try again
102  }
104  m_id = BE::toUInt32(buf);
105 
106  // check whether this element is actually a sibling of one of its parents rather then a child
107  // (might be the case if the parent's size is unknown and hence assumed to be the max file size)
108  if (m_parent && m_parent->m_sizeUnknown) {
109  // check at which level in the hierarchy the element is supposed to occour using its ID
110  // (the only chance to find out whether the element belongs higher up in the hierarchy)
111  const MatroskaElementLevel supposedLevel = matroskaIdLevel(m_id);
112  const byte actualLevel = level();
113  if (actualLevel > supposedLevel) {
114  // the file belongs higher up in the hierarchy so find a better parent
115  if (EbmlElement *betterParent = m_parent->parent(actualLevel - static_cast<byte>(supposedLevel))) {
116  // recompute the parent size (assumption - which was rest of the available space - was wrong)
117  m_parent->m_dataSize = m_startOffset - m_parent->m_startOffset - m_parent->headerSize();
118  m_parent->m_sizeUnknown = false;
119  // detatch from ...
120  if (m_parent->firstChild() == this) {
121  // ... parent
122  m_parent->m_firstChild.release();
123  m_parent->m_firstChild = move(m_nextSibling);
124  } else {
125  // ... previous sibling
126  for (EbmlElement *sibling = m_parent->firstChild(); sibling; sibling = sibling->nextSibling()) {
127  if (sibling->nextSibling() == this) {
128  sibling->m_nextSibling.release();
129  sibling->m_nextSibling = move(m_nextSibling);
130  break;
131  }
132  }
133  }
134  // insert as child of better parent
135  if (EbmlElement *previousSibling = betterParent->lastChild()) {
136  previousSibling->m_nextSibling.reset(this);
137  } else {
138  betterParent->m_firstChild.reset(this);
139  }
140  // update own reference to parent
141  m_parent = betterParent;
142  }
143  }
144  }
145 
146  // read size
147  beg = static_cast<byte>(stream().peek());
148  mask = 0x80;
149  m_sizeLength = 1;
150  if ((m_sizeUnknown = (beg == 0xFF))) {
151  // this indicates that the element size is unknown
152  // -> just assume the element takes the maximum available size
154  } else {
155  while (m_sizeLength <= maximumSizeLengthSupported() && (beg & mask) == 0) {
156  ++m_sizeLength;
157  mask >>= 1;
158  }
160  if (!skipped) {
161  diag.emplace_back(DiagLevel::Critical, "EBML size length is not supported.", parsingContext());
162  }
163  continue; // try again
164  }
165  if (m_sizeLength > container().maxSizeLength()) {
166  if (!skipped) {
167  diag.emplace_back(DiagLevel::Critical, "EBML size length is invalid.", parsingContext());
168  }
169  continue; // try again
170  }
171  // read size into buffer
172  memset(buf, 0, sizeof(DataSizeType)); // reset buffer
174  // xor the first byte in buffer which has been read from the file with mask
175  *(buf + (maximumSizeLengthSupported() - m_sizeLength)) ^= mask;
176  m_dataSize = ConversionUtilities::BE::toUInt64(buf);
177  // check if element is truncated
178  if (totalSize() > maxTotalSize()) {
179  if (m_idLength + m_sizeLength > maxTotalSize()) { // header truncated
180  if (!skipped) {
181  diag.emplace_back(DiagLevel::Critical, "EBML header seems to be truncated.", parsingContext());
182  }
183  continue; // try again
184  } else { // data truncated
185  diag.emplace_back(DiagLevel::Warning, "Data of EBML element seems to be truncated; unable to parse siblings of that element.",
186  parsingContext());
187  m_dataSize = maxTotalSize() - m_idLength - m_sizeLength; // using max size instead
188  }
189  }
190  }
191 
192  // check if there's a first child
193  const uint64 firstChildOffset = this->firstChildOffset();
194  if (firstChildOffset && firstChildOffset < totalSize()) {
195  m_firstChild.reset(new EbmlElement(static_cast<EbmlElement &>(*this), startOffset() + firstChildOffset));
196  } else {
197  m_firstChild.reset();
198  }
199 
200  // check if there's a sibling
201  if (totalSize() < maxTotalSize()) {
202  if (parent()) {
203  m_nextSibling.reset(new EbmlElement(*(parent()), startOffset() + totalSize()));
204  } else {
206  }
207  } else {
208  m_nextSibling.reset();
209  }
210 
211  // no critical errors occurred
212  // -> add a warning if bytes have been skipped
213  if (skipped) {
214  diag.emplace_back(DiagLevel::Warning, argsToString(skipped, " bytes have been skipped"), parsingContext());
215  }
216  // -> don't need another try, return here
217  return;
218  }
219 
220  // critical errors occurred and skipping some bytes wasn't successful
221  throw InvalidDataException();
222 }
223 
228 {
229  stream().seekg(static_cast<streamoff>(dataOffset()));
230  return reader().readString(dataSize());
231 }
232 
240 {
241  constexpr DataSizeType maxBytesToRead = 8;
242  char buff[maxBytesToRead] = { 0 };
243  const auto bytesToSkip = maxBytesToRead - min(dataSize(), maxBytesToRead);
244  stream().seekg(static_cast<streamoff>(dataOffset()), ios_base::beg);
245  stream().read(buff + bytesToSkip, static_cast<streamoff>(sizeof(buff) - bytesToSkip));
246  return BE::toUInt64(buff);
247 }
248 
254 {
255  stream().seekg(static_cast<streamoff>(dataOffset()));
256  switch (dataSize()) {
257  case sizeof(float32):
258  return static_cast<float64>(reader().readFloat32BE());
259  case sizeof(float64):
260  return reader().readFloat64BE();
261  default:
262  return 0.0;
263  }
264 }
265 
271 {
272  if (id <= 0xFF) {
273  return 1;
274  } else if (id <= 0x7FFF) {
275  return 2;
276  } else if (id <= 0x3FFFFF) {
277  return 3;
278  } else if (id <= 0x1FFFFFFF) {
279  return 4;
280  } else {
281  throw InvalidDataException();
282  }
283 }
284 
290 {
291  if (size < 126) {
292  return 1;
293  } else if (size <= 16382ul) {
294  return 2;
295  } else if (size <= 2097150ul) {
296  return 3;
297  } else if (size <= 268435454ul) {
298  return 4;
299  } else if (size <= 34359738366ul) {
300  return 5;
301  } else if (size <= 4398046511102ul) {
302  return 6;
303  } else if (size <= 562949953421310ul) {
304  return 7;
305  } else if (size <= 72057594037927934ul) {
306  return 8;
307  } else {
308  throw InvalidDataException();
309  }
310 }
311 
319 {
320  if (id <= 0xFF) {
321  *buff = static_cast<char>(id);
322  return 1;
323  } else if (id <= 0x7FFF) {
324  BE::getBytes(static_cast<uint16>(id), buff);
325  return 2;
326  } else if (id <= 0x3FFFFF) {
327  BE::getBytes(static_cast<uint32>(id << 0x8), buff);
328  return 3;
329  } else if (id <= 0x1FFFFFFF) {
330  BE::getBytes(static_cast<uint32>(id), buff);
331  return 4;
332  } else {
333  throw InvalidDataException();
334  }
335 }
336 
344 byte EbmlElement::makeSizeDenotation(uint64 size, char *buff)
345 {
346  if (size < 126) {
347  *buff = static_cast<char>(size | 0x80);
348  return 1;
349  } else if (size <= 16382ul) {
350  BE::getBytes(static_cast<uint16>(size | 0x4000), buff);
351  return 2;
352  } else if (size <= 2097150ul) {
353  BE::getBytes(static_cast<uint32>((size | 0x200000) << 0x08), buff);
354  return 3;
355  } else if (size <= 268435454ul) {
356  BE::getBytes(static_cast<uint32>(size | 0x10000000), buff);
357  return 4;
358  } else if (size <= 34359738366ul) {
359  BE::getBytes(static_cast<uint64>((size | 0x800000000) << 0x18), buff);
360  return 5;
361  } else if (size <= 4398046511102ul) {
362  BE::getBytes(static_cast<uint64>((size | 0x40000000000) << 0x10), buff);
363  return 6;
364  } else if (size <= 562949953421310ul) {
365  BE::getBytes(static_cast<uint64>((size | 0x2000000000000) << 0x08), buff);
366  return 7;
367  } else if (size <= 72057594037927934ul) {
368  BE::getBytes(static_cast<uint64>(size | 0x100000000000000), buff);
369  return 8;
370  }
371  throw InvalidDataException();
372 }
373 
382 byte EbmlElement::makeSizeDenotation(uint64 size, char *buff, byte minBytes)
383 {
384  if (minBytes <= 1 && size < 126) {
385  *buff = static_cast<char>(size | 0x80);
386  return 1;
387  } else if (minBytes <= 2 && size <= 16382ul) {
388  BE::getBytes(static_cast<uint16>(size | 0x4000), buff);
389  return 2;
390  } else if (minBytes <= 3 && size <= 2097150ul) {
391  BE::getBytes(static_cast<uint32>((size | 0x200000) << 0x08), buff);
392  return 3;
393  } else if (minBytes <= 4 && size <= 268435454ul) {
394  BE::getBytes(static_cast<uint32>(size | 0x10000000), buff);
395  return 4;
396  } else if (minBytes <= 5 && size <= 34359738366ul) {
397  BE::getBytes(static_cast<uint64>((size | 0x800000000) << 0x18), buff);
398  return 5;
399  } else if (minBytes <= 6 && size <= 4398046511102ul) {
400  BE::getBytes(static_cast<uint64>((size | 0x40000000000) << 0x10), buff);
401  return 6;
402  } else if (minBytes <= 7 && size <= 562949953421310ul) {
403  BE::getBytes(static_cast<uint64>((size | 0x2000000000000) << 0x08), buff);
404  return 7;
405  } else if (minBytes <= 8 && size <= 72057594037927934ul) {
406  BE::getBytes(static_cast<uint64>(size | 0x100000000000000), buff);
407  return 8;
408  }
409  throw InvalidDataException();
410 }
411 
417 {
418  if (integer <= 0xFFul) {
419  return 1;
420  } else if (integer <= 0xFFFFul) {
421  return 2;
422  } else if (integer <= 0xFFFFFFul) {
423  return 3;
424  } else if (integer <= 0xFFFFFFFFul) {
425  return 4;
426  } else if (integer <= 0xFFFFFFFFFFul) {
427  return 5;
428  } else if (integer <= 0xFFFFFFFFFFFFul) {
429  return 6;
430  } else if (integer <= 0xFFFFFFFFFFFFFFul) {
431  return 7;
432  } else {
433  return 8;
434  }
435 }
436 
441 byte EbmlElement::makeUInteger(uint64 value, char *buff)
442 {
443  if (value <= 0xFFul) {
444  *buff = static_cast<char>(value);
445  return 1;
446  } else if (value <= 0xFFFFul) {
447  BE::getBytes(static_cast<uint16>(value), buff);
448  return 2;
449  } else if (value <= 0xFFFFFFul) {
450  BE::getBytes(static_cast<uint32>(value << 0x08), buff);
451  return 3;
452  } else if (value <= 0xFFFFFFFFul) {
453  BE::getBytes(static_cast<uint32>(value), buff);
454  return 4;
455  } else if (value <= 0xFFFFFFFFFFul) {
456  BE::getBytes(static_cast<uint64>(value << 0x18), buff);
457  return 5;
458  } else if (value <= 0xFFFFFFFFFFFFul) {
459  BE::getBytes(static_cast<uint64>(value << 0x10), buff);
460  return 6;
461  } else if (value <= 0xFFFFFFFFFFFFFFul) {
462  BE::getBytes(static_cast<uint64>(value << 0x08), buff);
463  return 7;
464  } else {
465  BE::getBytes(static_cast<uint64>(value), buff);
466  return 8;
467  }
468 }
469 
479 byte EbmlElement::makeUInteger(uint64 value, char *buff, byte minBytes)
480 {
481  if (minBytes <= 1 && value <= 0xFFul) {
482  *buff = static_cast<char>(value);
483  return 1;
484  } else if (minBytes <= 2 && value <= 0xFFFFul) {
485  BE::getBytes(static_cast<uint16>(value), buff);
486  return 2;
487  } else if (minBytes <= 3 && value <= 0xFFFFFFul) {
488  BE::getBytes(static_cast<uint32>(value << 0x08), buff);
489  return 3;
490  } else if (minBytes <= 4 && value <= 0xFFFFFFFFul) {
491  BE::getBytes(static_cast<uint32>(value), buff);
492  return 4;
493  } else if (minBytes <= 5 && value <= 0xFFFFFFFFFFul) {
494  BE::getBytes(static_cast<uint64>(value << 0x18), buff);
495  return 5;
496  } else if (minBytes <= 6 && value <= 0xFFFFFFFFFFFFul) {
497  BE::getBytes(static_cast<uint64>(value << 0x10), buff);
498  return 6;
499  } else if (minBytes <= 7 && value <= 0xFFFFFFFFFFFFFFul) {
500  BE::getBytes(static_cast<uint64>(value << 0x08), buff);
501  return 7;
502  } else {
503  BE::getBytes(static_cast<uint64>(value), buff);
504  return 8;
505  }
506 }
507 
514 void EbmlElement::makeSimpleElement(ostream &stream, IdentifierType id, uint64 content)
515 {
516  char buff1[8];
517  char buff2[8];
518  byte sizeLength = EbmlElement::makeId(id, buff1);
519  stream.write(buff1, sizeLength);
520  byte elementSize = EbmlElement::makeUInteger(content, buff2);
521  sizeLength = EbmlElement::makeSizeDenotation(elementSize, buff1);
522  stream.write(buff1, sizeLength);
523  stream.write(buff2, elementSize);
524 }
525 
532 void EbmlElement::makeSimpleElement(std::ostream &stream, GenericFileElement::IdentifierType id, const std::string &content)
533 {
534  char buff1[8];
535  byte sizeLength = EbmlElement::makeId(id, buff1);
536  stream.write(buff1, sizeLength);
537  sizeLength = EbmlElement::makeSizeDenotation(content.size(), buff1);
538  stream.write(buff1, sizeLength);
539  stream.write(content.c_str(), content.size());
540 }
541 
549 void EbmlElement::makeSimpleElement(ostream &stream, GenericFileElement::IdentifierType id, const char *data, std::size_t dataSize)
550 {
551  char buff1[8];
552  byte sizeLength = EbmlElement::makeId(id, buff1);
553  stream.write(buff1, sizeLength);
555  stream.write(buff1, sizeLength);
556  stream.write(data, dataSize);
557 }
558 
559 } // namespace TagParser
TAG_PARSER_EXPORT MatroskaElementLevel matroskaIdLevel(uint32 matroskaId)
Returns the level at which elements with the specified matroskaId are supposed to occur in a Matroska...
Definition: matroskaid.cpp:523
static byte makeId(IdentifierType id, char *buff)
Stores the specified id in the specified buffer which must be at least 8 bytes long.
The exception that is thrown when the data to be parsed is truncated and therefore can not be parsed ...
Definition: exceptions.h:32
static void makeSimpleElement(std::ostream &stream, IdentifierType id, uint64 content)
Makes a simple EBML element.
std::unique_ptr< ImplementationType > m_firstChild
Implementation of GenericContainer<MediaFileInfo, MatroskaTag, MatroskaTrack, EbmlElement>.
uint32 sizeLength() const
Returns the length of the size denotation of the element in byte.
static byte calculateSizeDenotationLength(uint64 size)
Returns the length of the size denotation for the specified size in byte.
static constexpr uint32 maximumIdLengthSupported()
Returns the maximum id length supported by the class in byte.
std::string readString()
Reads the content of the element as string.
std::unique_ptr< ImplementationType > m_nextSibling
DataSizeType dataSize() const
Returns the data size of the element in byte.
STL namespace.
uint64 startOffset() const
Returns the start offset in the related stream.
static byte calculateUIntegerLength(uint64 integer)
Returns the length of the specified unsigned integer in byte.
The EbmlElement class helps to parse EBML files such as Matroska files.
Definition: ebmlelement.h:31
uint64 totalSize() const
Returns the total size of the element.
The GenericFileElement class helps to parse binary files which consist of an arboreal element strucut...
std::string idToString() const
Converts the specified EBML ID to a printable string.
Definition: ebmlelement.h:71
uint64 dataOffset() const
Returns the data offset of the element in the related stream.
IoUtilities::BinaryReader & reader()
Returns the related BinaryReader.
uint64 firstChildOffset() const
Returns the offset of the first child of the element.
Definition: ebmlelement.h:158
MatroskaElementLevel
Definition: matroskaid.h:406
Contains utility classes helping to read and write streams.
ContainerType & container()
Returns the related container.
static byte makeUInteger(uint64 value, char *buff)
Writes value to buff.
static byte makeSizeDenotation(uint64 size, char *buff)
Makes the size denotation for the specified size and stores it to buff.
uint64 readUInteger()
Reads the content of the element as unsigned integer.
uint32 headerSize() const
Returns the header size of the element in byte.
ImplementationType * lastChild()
Returns the last child of the element.
void internalParse(Diagnostics &diag)
Parses the EBML element.
Definition: ebmlelement.cpp:70
uint64 maxTotalSize() const
Returns maximum total size.
static byte calculateIdLength(IdentifierType id)
Returns the length of the specified id in byte.
The exception that is thrown when the data to be parsed or to be made seems invalid and therefore can...
Definition: exceptions.h:25
float64 readFloat()
Reads the content of the element as float.
static uint64 bytesToBeSkipped
Specifies the number of bytes to be skipped till a valid EBML element is found in the stream...
Definition: ebmlelement.h:56
std::iostream & stream()
Returns the related stream.
typename FileElementTraits< ImplementationType >::DataSizeType DataSizeType
Specifies the type used to store data sizes.
typename FileElementTraits< ImplementationType >::IdentifierType IdentifierType
Specifies the type used to store identifiers.
static constexpr uint32 maximumSizeLengthSupported()
Returns the maximum size length supported by the class in byte.
const IdentifierType & id() const
Returns the element ID.
Contains all classes and functions of the TagInfo library.
Definition: aaccodebook.h:9
byte level() const
Returns how deep the element is nested (0 for top-level elements, 1 for children of top-level element...
The Diagnostics class is a container for DiagMessage.
Definition: diagnostics.h:156
ImplementationType * parent()
Returns the parent of the element.
EbmlElement(MatroskaContainer &container, uint64 startOffset)
Constructs a new top level element with the specified container at the specified startOffset.
Definition: ebmlelement.cpp:38