Tag Parser  6.5.1
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
ebmlelement.cpp
Go to the documentation of this file.
1 #include "./ebmlelement.h"
2 #include "./ebmlid.h"
3 #include "./matroskacontainer.h"
4 #include "./matroskaid.h"
5 
6 #include "../mediafileinfo.h"
7 #include "../exceptions.h"
8 
9 #include <c++utilities/conversion/types.h>
10 #include <c++utilities/conversion/binaryconversion.h>
11 #include <c++utilities/io/binaryreader.h>
12 #include <c++utilities/io/binarywriter.h>
13 
14 #include <string>
15 #include <sstream>
16 #include <cstring>
17 #include <memory>
18 
19 using namespace std;
20 using namespace IoUtilities;
21 using namespace ConversionUtilities;
22 
23 
24 namespace Media {
25 
34 uint64 EbmlElement::bytesToBeSkipped = 0x4000;
35 
39 EbmlElement::EbmlElement(MatroskaContainer &container, uint64 startOffset) :
40  GenericFileElement<EbmlElement>(container, startOffset)
41 {}
42 
46 EbmlElement::EbmlElement(MatroskaContainer &container, uint64 startOffset, uint64 maxSize) :
47  GenericFileElement<EbmlElement>(container, startOffset, maxSize)
48 {}
49 
53 EbmlElement::EbmlElement(EbmlElement &parent, uint64 startOffset) :
54  GenericFileElement<EbmlElement>(parent, startOffset)
55 {}
56 
60 string EbmlElement::parsingContext() const
61 {
62  return ("parsing header of EBML element " % idToString() % " at ") + startOffset();
63 }
64 
69 {
71  static const string context("parsing EBML element header");
72 
73  for(uint64 skipped = 0; skipped < bytesToBeSkipped; ++m_startOffset, --m_maxSize, ++skipped) {
74  // check whether max size is valid
75  if(maxTotalSize() < 2) {
76  addNotification(NotificationType::Critical, argsToString("The EBML element at ", startOffset(), " is truncated or does not exist."), context);
77  throw TruncatedDataException();
78  }
79  stream().seekg(startOffset());
80  // read ID
82  byte beg = static_cast<byte>(stream().peek()), mask = 0x80;
83  m_idLength = 1;
85  ++m_idLength;
86  mask >>= 1;
87  }
89  if(!skipped) {
90  addNotification(NotificationType::Critical, argsToString("EBML ID length at ", startOffset(), " is not supported, trying to skip."), context);
91  }
92  continue; // try again
93  }
94  if(m_idLength > container().maxIdLength()) {
95  if(!skipped) {
96  addNotification(NotificationType::Critical, argsToString("EBML ID length at ", startOffset(), " is invalid, trying to skip."), context);
97  }
98  continue; // try again
99  }
101  m_id = BE::toUInt32(buf);
102 
103  // read size
104  beg = static_cast<byte>(stream().peek()), mask = 0x80;
105  m_sizeLength = 1;
106  if(beg == 0xFF) {
107  // this indicates that the element size is unknown
108  // -> just assume the element takes the maximum available size
110  } else {
112  ++m_sizeLength;
113  mask >>= 1;
114  }
116  if(!skipped) {
117  addNotification(NotificationType::Critical, "EBML size length is not supported.", parsingContext());
118  }
119  continue; // try again
120  }
121  if(m_sizeLength > container().maxSizeLength()) {
122  if(!skipped) {
123  addNotification(NotificationType::Critical, "EBML size length is invalid.", parsingContext());
124  }
125  continue; // try again
126  }
127  // read size into buffer
128  memset(buf, 0, sizeof(dataSizeType)); // reset buffer
130  *(buf + (GenericFileElement<implementationType>::maximumSizeLengthSupported() - m_sizeLength)) ^= mask; // xor the first byte in buffer which has been read from the file with mask
131  m_dataSize = ConversionUtilities::BE::toUInt64(buf);
132  // check if element is truncated
133  if(totalSize() > maxTotalSize()) {
134  if(m_idLength + m_sizeLength > maxTotalSize()) { // header truncated
135  if(!skipped) {
136  addNotification(NotificationType::Critical, "EBML header seems to be truncated.", parsingContext());
137  }
138  continue; // try again
139  } else { // data truncated
140  addNotification(NotificationType::Warning, "Data of EBML element seems to be truncated; unable to parse siblings of that element.", parsingContext());
141  m_dataSize = maxTotalSize() - m_idLength - m_sizeLength; // using max size instead
142  }
143  }
144  }
145 
146  // check if there's a first child
147  const uint64 firstChildOffset = this->firstChildOffset();
148  if(firstChildOffset && firstChildOffset < totalSize()) {
149  m_firstChild.reset(new EbmlElement(static_cast<EbmlElement &>(*this), startOffset() + firstChildOffset));
150  } else {
151  m_firstChild.reset();
152  }
153 
154  // check if there's a sibling
155  if(totalSize() < maxTotalSize()) {
156  if(parent()) {
157  m_nextSibling.reset(new EbmlElement(*(parent()), startOffset() + totalSize()));
158  } else {
160  }
161  } else {
162  m_nextSibling.reset();
163  }
164 
165  // no critical errors occured
166  // -> add a warning if bytes have been skipped
167  if(skipped) {
168  addNotification(NotificationType::Warning, argsToString(skipped, " bytes have been skipped"), parsingContext());
169  }
170  // -> don't need another try, return here
171  return;
172  }
173 
174  // critical errors occured and skipping some bytes wasn't successful
175  throw InvalidDataException();
176 }
177 
182 {
183  stream().seekg(dataOffset());
184  return reader().readString(dataSize());
185 }
186 
194 {
195  char buff[sizeof(uint64)] = {0};
196  int i = static_cast<int>(sizeof(buff)) - dataSize();
197  if(i < 0) {
198  i = 0;
199  }
200  stream().seekg(dataOffset(), ios_base::beg);
201  stream().read(buff + i, sizeof(buff) - i);
202  return BE::toUInt64(buff);
203 }
204 
210 {
211  stream().seekg(dataOffset());
212  switch(dataSize()) {
213  case sizeof(float32):
214  return reader().readFloat32BE();
215  case sizeof(float64):
216  return reader().readFloat64BE();
217  default:
218  return 0.0;
219  }
220 }
221 
227 {
228  if(id <= 0xFF) {
229  return 1;
230  } else if(id <= 0x7FFF) {
231  return 2;
232  } else if(id <= 0x3FFFFF) {
233  return 3;
234  } else if(id <= 0x1FFFFFFF) {
235  return 4;
236  } else {
237  throw InvalidDataException();
238  }
239 }
240 
246 {
247  if(size < 126) {
248  return 1;
249  } else if(size <= 16382ul) {
250  return 2;
251  } else if(size <= 2097150ul) {
252  return 3;
253  } else if(size <= 268435454ul) {
254  return 4;
255  } else if(size <= 34359738366ul) {
256  return 5;
257  } else if(size <= 4398046511102ul) {
258  return 6;
259  } else if(size <= 562949953421310ul) {
260  return 7;
261  } else if(size <= 72057594037927934ul) {
262  return 8;
263  } else {
264  throw InvalidDataException();
265  }
266 }
267 
275 {
276  if(id <= 0xFF) {
277  *buff = static_cast<byte>(id);
278  return 1;
279  } else if(id <= 0x7FFF) {
280  BE::getBytes(static_cast<uint16>(id), buff);
281  return 2;
282  } else if(id <= 0x3FFFFF) {
283  BE::getBytes(static_cast<uint32>(id << 0x8), buff);
284  return 3;
285  } else if(id <= 0x1FFFFFFF) {
286  BE::getBytes(static_cast<uint32>(id), buff);
287  return 4;
288  } else {
289  throw InvalidDataException();
290  }
291 }
292 
300 byte EbmlElement::makeSizeDenotation(uint64 size, char *buff)
301 {
302  if(size < 126) {
303  *buff = static_cast<byte>(size | 0x80);
304  return 1;
305  } else if(size <= 16382ul) {
306  BE::getBytes(static_cast<uint16>(size | 0x4000), buff);
307  return 2;
308  } else if(size <= 2097150ul) {
309  BE::getBytes(static_cast<uint32>((size | 0x200000) << 0x08), buff);
310  return 3;
311  } else if(size <= 268435454ul) {
312  BE::getBytes(static_cast<uint32>(size | 0x10000000), buff);
313  return 4;
314  } else if(size <= 34359738366ul) {
315  BE::getBytes(static_cast<uint64>((size | 0x800000000) << 0x18), buff);
316  return 5;
317  } else if(size <= 4398046511102ul) {
318  BE::getBytes(static_cast<uint64>((size | 0x40000000000) << 0x10), buff);
319  return 6;
320  } else if(size <= 562949953421310ul) {
321  BE::getBytes(static_cast<uint64>((size | 0x2000000000000) << 0x08), buff);
322  return 7;
323  } else if(size <= 72057594037927934ul) {
324  BE::getBytes(static_cast<uint64>(size | 0x100000000000000), buff);
325  return 8;
326  }
327  throw InvalidDataException();
328 }
329 
338 byte EbmlElement::makeSizeDenotation(uint64 size, char *buff, byte minBytes)
339 {
340  if(minBytes <= 1 && size < 126) {
341  *buff = static_cast<byte>(size | 0x80);
342  return 1;
343  } else if(minBytes <= 2 && size <= 16382ul) {
344  BE::getBytes(static_cast<uint16>(size | 0x4000), buff);
345  return 2;
346  } else if(minBytes <= 3 && size <= 2097150ul) {
347  BE::getBytes(static_cast<uint32>((size | 0x200000) << 0x08), buff);
348  return 3;
349  } else if(minBytes <= 4 && size <= 268435454ul) {
350  BE::getBytes(static_cast<uint32>(size | 0x10000000), buff);
351  return 4;
352  } else if(minBytes <= 5 && size <= 34359738366ul) {
353  BE::getBytes(static_cast<uint64>((size | 0x800000000) << 0x18), buff);
354  return 5;
355  } else if(minBytes <= 6 && size <= 4398046511102ul) {
356  BE::getBytes(static_cast<uint64>((size | 0x40000000000) << 0x10), buff);
357  return 6;
358  } else if(minBytes <= 7 && size <= 562949953421310ul) {
359  BE::getBytes(static_cast<uint64>((size | 0x2000000000000) << 0x08), buff);
360  return 7;
361  } else if(minBytes <= 8 && size <= 72057594037927934ul) {
362  BE::getBytes(static_cast<uint64>(size | 0x100000000000000), buff);
363  return 8;
364  }
365  throw InvalidDataException();
366 }
367 
373 {
374  if(integer <= 0xFFul) {
375  return 1;
376  } else if(integer <= 0xFFFFul) {
377  return 2;
378  } else if(integer <= 0xFFFFFFul) {
379  return 3;
380  } else if(integer <= 0xFFFFFFFFul) {
381  return 4;
382  } else if(integer <= 0xFFFFFFFFFFul) {
383  return 5;
384  } else if(integer <= 0xFFFFFFFFFFFFul) {
385  return 6;
386  } else if(integer <= 0xFFFFFFFFFFFFFFul) {
387  return 7;
388  } else {
389  return 8;
390  }
391 }
392 
397 byte EbmlElement::makeUInteger(uint64 value, char *buff)
398 {
399  if(value <= 0xFFul) {
400  *buff = static_cast<char>(value);
401  return 1;
402  } else if(value <= 0xFFFFul) {
403  BE::getBytes(static_cast<uint16>(value), buff);
404  return 2;
405  } else if(value <= 0xFFFFFFul) {
406  BE::getBytes(static_cast<uint32>(value << 0x08), buff);
407  return 3;
408  } else if(value <= 0xFFFFFFFFul) {
409  BE::getBytes(static_cast<uint32>(value), buff);
410  return 4;
411  } else if(value <= 0xFFFFFFFFFFul) {
412  BE::getBytes(static_cast<uint64>(value << 0x18), buff);
413  return 5;
414  } else if(value <= 0xFFFFFFFFFFFFul) {
415  BE::getBytes(static_cast<uint64>(value << 0x10), buff);
416  return 6;
417  } else if(value <= 0xFFFFFFFFFFFFFFul) {
418  BE::getBytes(static_cast<uint64>(value << 0x08), buff);
419  return 7;
420  } else {
421  BE::getBytes(static_cast<uint64>(value), buff);
422  return 8;
423  }
424 }
425 
433 byte EbmlElement::makeUInteger(uint64 value, char *buff, byte minBytes)
434 {
435  if(minBytes <= 1 && value <= 0xFFul) {
436  *buff = static_cast<char>(value);
437  return 1;
438  } else if(minBytes <= 2 && value <= 0xFFFFul) {
439  BE::getBytes(static_cast<uint16>(value), buff);
440  return 2;
441  } else if(minBytes <= 3 && value <= 0xFFFFFFul) {
442  BE::getBytes(static_cast<uint32>(value << 0x08), buff);
443  return 3;
444  } else if(minBytes <= 4 && value <= 0xFFFFFFFFul) {
445  BE::getBytes(static_cast<uint32>(value), buff);
446  return 4;
447  } else if(minBytes <= 5 && value <= 0xFFFFFFFFFFul) {
448  BE::getBytes(static_cast<uint64>(value << 0x18), buff);
449  return 5;
450  } else if(minBytes <= 6 && value <= 0xFFFFFFFFFFFFul) {
451  BE::getBytes(static_cast<uint64>(value << 0x10), buff);
452  return 6;
453  } else if(minBytes <= 7 && value <= 0xFFFFFFFFFFFFFFul) {
454  BE::getBytes(static_cast<uint64>(value << 0x08), buff);
455  return 7;
456  } else {
457  BE::getBytes(static_cast<uint64>(value), buff);
458  return 8;
459  }
460 }
461 
468 void EbmlElement::makeSimpleElement(ostream &stream, identifierType id, uint64 content)
469 {
470  char buff1[8];
471  char buff2[8];
472  byte sizeLength = EbmlElement::makeId(id, buff1);
473  stream.write(buff1, sizeLength);
474  byte elementSize = EbmlElement::makeUInteger(content, buff2);
475  sizeLength = EbmlElement::makeSizeDenotation(elementSize, buff1);
476  stream.write(buff1, sizeLength);
477  stream.write(buff2, elementSize);
478 }
479 
486 void EbmlElement::makeSimpleElement(std::ostream &stream, GenericFileElement::identifierType id, const std::string &content)
487 {
488  char buff1[8];
489  byte sizeLength = EbmlElement::makeId(id, buff1);
490  stream.write(buff1, sizeLength);
491  sizeLength = EbmlElement::makeSizeDenotation(content.size(), buff1);
492  stream.write(buff1, sizeLength);
493  stream.write(content.c_str(), content.size());
494 }
495 
503 void EbmlElement::makeSimpleElement(ostream &stream, GenericFileElement::identifierType id, const char *data, std::size_t dataSize)
504 {
505  char buff1[8];
506  byte sizeLength = EbmlElement::makeId(id, buff1);
507  stream.write(buff1, sizeLength);
509  stream.write(buff1, sizeLength);
510  stream.write(data, dataSize);
511 }
512 
513 }
514 
515 
516 
static constexpr uint32 maximumIdLengthSupported()
Returns the maximum id length supported by the class in byte.
uint64 startOffset() const
Returns the start offset in the related stream.
void invalidateStatus()
Invalidates the current status.
uint64 dataOffset() const
Returns the data offset of the element in the related stream.
std::iostream & stream()
Returns the related stream.
FileElementTraits< ImplementationType >::identifierType identifierType
Specifies the type used to store identifiers.
static byte calculateSizeDenotationLength(uint64 size)
Returns the length of the size denotation for the specified size in byte.
uint32 headerSize() const
Returns the header size of the element in byte.
Implementation of GenericContainer<MediaFileInfo, MatroskaTag, MatroskaTrack, EbmlElement>.
uint64 readUInteger()
Reads the content of the element as unsigned integer.
The EbmlElement class helps to parse EBML files such as Matroska files.
Definition: ebmlelement.h:50
IoUtilities::BinaryReader & reader()
Returns the related BinaryReader.
dataSizeType dataSize() const
Returns the data size of the element in byte.
static byte makeUInteger(uint64 value, char *buff)
Writes value to buff.
std::unique_ptr< implementationType > m_firstChild
EbmlElement(MatroskaContainer &container, uint64 startOffset)
Constructs a new top level element with the specified container at the specified startOffset.
Definition: ebmlelement.cpp:39
uint64 totalSize() const
Returns the total size of the element.
STL namespace.
void internalParse()
Parses the EBML element.
Definition: ebmlelement.cpp:68
void addNotification(const Notification &notification)
This method is meant to be called by the derived class to add a notification.
static void makeSimpleElement(std::ostream &stream, identifierType id, uint64 content)
Makes a simple EBML element.
FileElementTraits< EbmlElement >::dataSizeType dataSizeType
Specifies the type used to store data sizes.
static byte calculateUIntegerLength(uint64 integer)
Returns the length of the specified unsigned integer in byte.
std::unique_ptr< implementationType > m_nextSibling
static uint64 bytesToBeSkipped
Specifies the number of bytes to be skipped till a valid EBML element is found in the stream...
Definition: ebmlelement.h:76
static byte makeSizeDenotation(uint64 size, char *buff)
Makes the size denotation for the specified size and stores it to buff.
Contains utility classes helping to read and write streams.
The GenericFileElement class helps to parse binary files which consist of an arboreal element strucut...
The exception that is thrown when the data to be parsed or to be made seems invalid and therefore can...
Definition: exceptions.h:27
std::string readString()
Reads the content of the element as string.
std::string idToString() const
Converts the specified EBML ID to a printable string.
Definition: ebmlelement.h:91
static byte makeId(identifierType id, char *buff)
Stores the specified id in the specified buffer which must be at least 8 bytes long.
const identifierType & id() const
Returns the element ID.
static byte calculateIdLength(identifierType id)
Returns the length of the specified id in byte.
float64 readFloat()
Reads the content of the element as float.
uint64 maxTotalSize() const
Returns maximum total size.
static constexpr uint32 maximumSizeLengthSupported()
Returns the maximum size length supported by the class in byte.
The exception that is thrown when the data to be parsed is truncated and therefore can not be parsed ...
Definition: exceptions.h:35
implementationType * parent()
Returns the parent of the element.
uint64 firstChildOffset() const
Returns the offset of the first child of the element.
Definition: ebmlelement.h:142
Contains all classes and functions of the TagInfo library.
Definition: exceptions.h:9
containerType & container()
Returns the related container.
uint32 sizeLength() const
Returns the length of the size denotation of the element in byte.