Tag Parser  6.1.1
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
ebmlelement.cpp
Go to the documentation of this file.
1 #include "./ebmlelement.h"
2 #include "./ebmlid.h"
3 #include "./matroskacontainer.h"
4 #include "./matroskaid.h"
5 
6 #include "../exceptions.h"
7 
8 #include <c++utilities/conversion/types.h>
9 #include <c++utilities/conversion/stringconversion.h>
10 #include <c++utilities/conversion/binaryconversion.h>
11 #include <c++utilities/io/binaryreader.h>
12 #include <c++utilities/io/binarywriter.h>
13 #include <c++utilities/misc/memory.h>
14 
15 #include <string>
16 #include <sstream>
17 #include <cstring>
18 
19 using namespace std;
20 using namespace IoUtilities;
21 using namespace ConversionUtilities;
22 
23 
24 namespace Media {
25 
34 EbmlElement::EbmlElement(MatroskaContainer &container, uint64 startOffset) :
35  GenericFileElement<EbmlElement>(container, startOffset)
36 {}
37 
42  GenericFileElement<EbmlElement>(container, startOffset, maxSize)
43 {}
44 
49  GenericFileElement<EbmlElement>(parent, startOffset)
50 {}
51 
55 string EbmlElement::parsingContext() const
56 {
57  return "parsing header of EBML element " + idToString() + " at " + numberToString(startOffset());
58 }
59 
64 {
66  static const string context("parsing EBML element header");
67 
68  byte skipped;
69  for(skipped = 0; /* TODO: add a sane limit here */; ++m_startOffset, --m_maxSize, ++skipped) {
70  // check whether max size is valid
71  if(maxTotalSize() < 2) {
72  addNotification(NotificationType::Critical, "The EBML element at " + numberToString(startOffset()) + " is truncated or does not exist.", context);
73  throw TruncatedDataException();
74  }
75  stream().seekg(startOffset());
76  // read ID
78  byte beg, mask = 0x80;
79  beg = stream().peek();
80  m_idLength = 1;
82  ++m_idLength;
83  mask >>= 1;
84  }
86  if(!skipped) {
87  addNotification(NotificationType::Critical, "EBML ID length is not supported, trying to skip.", context);
88  }
89  continue; // try again
90  }
91  if(m_idLength > container().maxIdLength()) {
92  if(!skipped) {
93  addNotification(NotificationType::Critical, "EBML ID length is invalid.", context);
94  }
95  continue; // try again
96  }
98  m_id = BE::toUInt32(buf);
99 
100  // read size
101  mask = 0x80;
102  m_sizeLength = 1;
103  beg = stream().peek();
104  if(beg == 0xFF) {
105  // this indicates that the element size is unknown
106  // -> just assume the element takes the maximum available size
108  } else {
110  ++m_sizeLength;
111  mask >>= 1;
112  }
114  if(!skipped) {
115  addNotification(NotificationType::Critical, "EBML size length is not supported.", parsingContext());
116  }
117  continue; // try again
118  }
119  if(m_sizeLength > container().maxSizeLength()) {
120  if(!skipped) {
121  addNotification(NotificationType::Critical, "EBML size length is invalid.", parsingContext());
122  }
123  continue; // try again
124  }
125  // read size into buffer
126  memset(buf, 0, sizeof(dataSizeType)); // reset buffer
128  *(buf + (GenericFileElement<implementationType>::maximumSizeLengthSupported() - m_sizeLength)) ^= mask; // xor the first byte in buffer which has been read from the file with mask
129  m_dataSize = ConversionUtilities::BE::toUInt64(buf);
130  // check if element is truncated
131  if(totalSize() > maxTotalSize()) {
132  if(m_idLength + m_sizeLength > maxTotalSize()) { // header truncated
133  if(!skipped) {
134  addNotification(NotificationType::Critical, "EBML header seems to be truncated.", parsingContext());
135  }
136  continue; // try again
137  } else { // data truncated
138  addNotification(NotificationType::Warning, "Data of EBML element seems to be truncated; unable to parse siblings of that element.", parsingContext());
139  m_dataSize = maxTotalSize() - m_idLength - m_sizeLength; // using max size instead
140  }
141  }
142  }
143 
144  // check if there's a first child
145  if(const uint64 firstChildOffset = this->firstChildOffset()) {
146  if(firstChildOffset < dataSize()) {
147  m_firstChild.reset(new EbmlElement(static_cast<EbmlElement &>(*this), startOffset() + firstChildOffset));
148  } else {
149  m_firstChild.reset();
150  }
151  } else {
152  m_firstChild.reset();
153  }
154 
155  // check if there's a sibling
156  if(totalSize() < maxTotalSize()) {
157  if(parent()) {
158  m_nextSibling.reset(new EbmlElement(*(parent()), startOffset() + totalSize()));
159  } else {
161  }
162  } else {
163  m_nextSibling.reset();
164  }
165 
166  // no critical errors occured
167  // -> add a warning if bytes have been skipped
168  if(skipped) {
169  addNotification(NotificationType::Warning, numberToString<unsigned int>(skipped) + " bytes have been skipped", parsingContext());
170  }
171  // -> don't need another try, return here
172  return;
173  }
174 
175  // critical errors occured and skipping some bytes wasn't successful
176  throw InvalidDataException();
177 }
178 
183 {
184  stream().seekg(dataOffset());
185  return reader().readString(dataSize());
186 }
187 
195 {
196  char buff[sizeof(uint64)] = {0};
197  int i = static_cast<int>(sizeof(buff)) - dataSize();
198  if(i < 0) {
199  i = 0;
200  }
201  stream().seekg(dataOffset(), ios_base::beg);
202  stream().read(buff + i, sizeof(buff) - i);
203  return BE::toUInt64(buff);
204 }
205 
211 {
212  stream().seekg(dataOffset());
213  switch(dataSize()) {
214  case sizeof(float32):
215  return reader().readFloat32BE();
216  case sizeof(float64):
217  return reader().readFloat64BE();
218  default:
219  return 0.0;
220  }
221 }
222 
228 {
229  if(id <= 0xFF) {
230  return 1;
231  } else if(id <= 0x7FFF) {
232  return 2;
233  } else if(id <= 0x3FFFFF) {
234  return 3;
235  } else if(id <= 0x1FFFFFFF) {
236  return 4;
237  } else {
238  throw InvalidDataException();
239  }
240 }
241 
247 {
248  if(size < 126) {
249  return 1;
250  } else if(size <= 16382ul) {
251  return 2;
252  } else if(size <= 2097150ul) {
253  return 3;
254  } else if(size <= 268435454ul) {
255  return 4;
256  } else if(size <= 34359738366ul) {
257  return 5;
258  } else if(size <= 4398046511102ul) {
259  return 6;
260  } else if(size <= 562949953421310ul) {
261  return 7;
262  } else if(size <= 72057594037927934ul) {
263  return 8;
264  } else {
265  throw InvalidDataException();
266  }
267 }
268 
276 {
277  if(id <= 0xFF) {
278  *buff = static_cast<byte>(id);
279  return 1;
280  } else if(id <= 0x7FFF) {
281  BE::getBytes(static_cast<uint16>(id), buff);
282  return 2;
283  } else if(id <= 0x3FFFFF) {
284  BE::getBytes(static_cast<uint32>(id << 0x8), buff);
285  return 3;
286  } else if(id <= 0x1FFFFFFF) {
287  BE::getBytes(static_cast<uint32>(id), buff);
288  return 4;
289  } else {
290  throw InvalidDataException();
291  }
292 }
293 
301 byte EbmlElement::makeSizeDenotation(uint64 size, char *buff)
302 {
303  if(size < 126) {
304  *buff = static_cast<byte>(size | 0x80);
305  return 1;
306  } else if(size <= 16382ul) {
307  BE::getBytes(static_cast<uint16>(size | 0x4000), buff);
308  return 2;
309  } else if(size <= 2097150ul) {
310  BE::getBytes(static_cast<uint32>((size | 0x200000) << 0x08), buff);
311  return 3;
312  } else if(size <= 268435454ul) {
313  BE::getBytes(static_cast<uint32>(size | 0x10000000), buff);
314  return 4;
315  } else if(size <= 34359738366ul) {
316  BE::getBytes(static_cast<uint64>((size | 0x800000000) << 0x18), buff);
317  return 5;
318  } else if(size <= 4398046511102ul) {
319  BE::getBytes(static_cast<uint64>((size | 0x40000000000) << 0x10), buff);
320  return 6;
321  } else if(size <= 562949953421310ul) {
322  BE::getBytes(static_cast<uint64>((size | 0x2000000000000) << 0x08), buff);
323  return 7;
324  } else if(size <= 72057594037927934ul) {
325  BE::getBytes(static_cast<uint64>(size | 0x100000000000000), buff);
326  return 8;
327  }
328  throw InvalidDataException();
329 }
330 
339 byte EbmlElement::makeSizeDenotation(uint64 size, char *buff, byte minBytes)
340 {
341  if(minBytes <= 1 && size < 126) {
342  *buff = static_cast<byte>(size | 0x80);
343  return 1;
344  } else if(minBytes <= 2 && size <= 16382ul) {
345  BE::getBytes(static_cast<uint16>(size | 0x4000), buff);
346  return 2;
347  } else if(minBytes <= 3 && size <= 2097150ul) {
348  BE::getBytes(static_cast<uint32>((size | 0x200000) << 0x08), buff);
349  return 3;
350  } else if(minBytes <= 4 && size <= 268435454ul) {
351  BE::getBytes(static_cast<uint32>(size | 0x10000000), buff);
352  return 4;
353  } else if(minBytes <= 5 && size <= 34359738366ul) {
354  BE::getBytes(static_cast<uint64>((size | 0x800000000) << 0x18), buff);
355  return 5;
356  } else if(minBytes <= 6 && size <= 4398046511102ul) {
357  BE::getBytes(static_cast<uint64>((size | 0x40000000000) << 0x10), buff);
358  return 6;
359  } else if(minBytes <= 7 && size <= 562949953421310ul) {
360  BE::getBytes(static_cast<uint64>((size | 0x2000000000000) << 0x08), buff);
361  return 7;
362  } else if(minBytes <= 8 && size <= 72057594037927934ul) {
363  BE::getBytes(static_cast<uint64>(size | 0x100000000000000), buff);
364  return 8;
365  }
366  throw InvalidDataException();
367 }
368 
374 {
375  if(integer <= 0xFFul) {
376  return 1;
377  } else if(integer <= 0xFFFFul) {
378  return 2;
379  } else if(integer <= 0xFFFFFFul) {
380  return 3;
381  } else if(integer <= 0xFFFFFFFFul) {
382  return 4;
383  } else if(integer <= 0xFFFFFFFFFFul) {
384  return 5;
385  } else if(integer <= 0xFFFFFFFFFFFFul) {
386  return 6;
387  } else if(integer <= 0xFFFFFFFFFFFFFFul) {
388  return 7;
389  } else {
390  return 8;
391  }
392 }
393 
398 byte EbmlElement::makeUInteger(uint64 value, char *buff)
399 {
400  if(value <= 0xFFul) {
401  *buff = static_cast<char>(value);
402  return 1;
403  } else if(value <= 0xFFFFul) {
404  BE::getBytes(static_cast<uint16>(value), buff);
405  return 2;
406  } else if(value <= 0xFFFFFFul) {
407  BE::getBytes(static_cast<uint32>(value << 0x08), buff);
408  return 3;
409  } else if(value <= 0xFFFFFFFFul) {
410  BE::getBytes(static_cast<uint32>(value), buff);
411  return 4;
412  } else if(value <= 0xFFFFFFFFFFul) {
413  BE::getBytes(static_cast<uint64>(value << 0x18), buff);
414  return 5;
415  } else if(value <= 0xFFFFFFFFFFFFul) {
416  BE::getBytes(static_cast<uint64>(value << 0x10), buff);
417  return 6;
418  } else if(value <= 0xFFFFFFFFFFFFFFul) {
419  BE::getBytes(static_cast<uint64>(value << 0x08), buff);
420  return 7;
421  } else {
422  BE::getBytes(static_cast<uint64>(value), buff);
423  return 8;
424  }
425 }
426 
434 byte EbmlElement::makeUInteger(uint64 value, char *buff, byte minBytes)
435 {
436  if(minBytes <= 1 && value <= 0xFFul) {
437  *buff = static_cast<char>(value);
438  return 1;
439  } else if(minBytes <= 2 && value <= 0xFFFFul) {
440  BE::getBytes(static_cast<uint16>(value), buff);
441  return 2;
442  } else if(minBytes <= 3 && value <= 0xFFFFFFul) {
443  BE::getBytes(static_cast<uint32>(value << 0x08), buff);
444  return 3;
445  } else if(minBytes <= 4 && value <= 0xFFFFFFFFul) {
446  BE::getBytes(static_cast<uint32>(value), buff);
447  return 4;
448  } else if(minBytes <= 5 && value <= 0xFFFFFFFFFFul) {
449  BE::getBytes(static_cast<uint64>(value << 0x18), buff);
450  return 5;
451  } else if(minBytes <= 6 && value <= 0xFFFFFFFFFFFFul) {
452  BE::getBytes(static_cast<uint64>(value << 0x10), buff);
453  return 6;
454  } else if(minBytes <= 7 && value <= 0xFFFFFFFFFFFFFFul) {
455  BE::getBytes(static_cast<uint64>(value << 0x08), buff);
456  return 7;
457  } else {
458  BE::getBytes(static_cast<uint64>(value), buff);
459  return 8;
460  }
461 }
462 
469 void EbmlElement::makeSimpleElement(ostream &stream, identifierType id, uint64 content)
470 {
471  char buff1[8];
472  char buff2[8];
473  byte sizeLength = EbmlElement::makeId(id, buff1);
474  stream.write(buff1, sizeLength);
475  byte elementSize = EbmlElement::makeUInteger(content, buff2);
476  sizeLength = EbmlElement::makeSizeDenotation(elementSize, buff1);
477  stream.write(buff1, sizeLength);
478  stream.write(buff2, elementSize);
479 }
480 
487 void EbmlElement::makeSimpleElement(std::ostream &stream, GenericFileElement::identifierType id, const std::string &content)
488 {
489  char buff1[8];
490  byte sizeLength = EbmlElement::makeId(id, buff1);
491  stream.write(buff1, sizeLength);
492  sizeLength = EbmlElement::makeSizeDenotation(content.size(), buff1);
493  stream.write(buff1, sizeLength);
494  stream.write(content.c_str(), content.size());
495 }
496 
504 void EbmlElement::makeSimpleElement(ostream &stream, GenericFileElement::identifierType id, const char *data, std::size_t dataSize)
505 {
506  char buff1[8];
507  byte sizeLength = EbmlElement::makeId(id, buff1);
508  stream.write(buff1, sizeLength);
509  sizeLength = EbmlElement::makeSizeDenotation(dataSize, buff1);
510  stream.write(buff1, sizeLength);
511  stream.write(data, dataSize);
512 }
513 
514 }
515 
516 
517 
static constexpr uint32 maximumIdLengthSupported()
Returns the maximum id length supported by the class in byte.
uint64 startOffset() const
Returns the start offset in the related stream.
void invalidateStatus()
Invalidates the current status.
uint64 dataOffset() const
Returns the data offset of the element in the related stream.
std::iostream & stream()
Returns the related stream.
FileElementTraits< ImplementationType >::identifierType identifierType
Specifies the type used to store identifiers.
static byte calculateSizeDenotationLength(uint64 size)
Returns the length of the size denotation for the specified size in byte.
uint32 headerSize() const
Returns the header size of the element in byte.
Implementation of GenericContainer<MediaFileInfo, MatroskaTag, MatroskaTrack, EbmlElement>.
uint64 readUInteger()
Reads the content of the element as unsigned integer.
The EbmlElement class helps to parse EBML files such as Matroska files.
Definition: ebmlelement.h:50
IoUtilities::BinaryReader & reader()
Returns the related BinaryReader.
dataSizeType dataSize() const
Returns the data size of the element in byte.
static byte makeUInteger(uint64 value, char *buff)
Writes value to buff.
std::unique_ptr< implementationType > m_firstChild
EbmlElement(MatroskaContainer &container, uint64 startOffset)
Constructs a new top level element with the specified container at the specified startOffset.
Definition: ebmlelement.cpp:34
uint64 totalSize() const
Returns the total size of the element.
STL namespace.
void internalParse()
Parses the EBML element.
Definition: ebmlelement.cpp:63
void addNotification(const Notification &notification)
This protected method is meant to be called by the derived class to add a notification.
static void makeSimpleElement(std::ostream &stream, identifierType id, uint64 content)
Makes a simple EBML element.
FileElementTraits< EbmlElement >::dataSizeType dataSizeType
Specifies the type used to store data sizes.
static byte calculateUIntegerLength(uint64 integer)
Returns the length of the specified unsigned integer in byte.
std::unique_ptr< implementationType > m_nextSibling
static byte makeSizeDenotation(uint64 size, char *buff)
Makes the size denotation for the specified size and stores it to buff.
Contains utility classes helping to read and write streams.
The GenericFileElement class helps to parse binary files which consist of an arboreal element strucut...
The exception that is thrown when the data to be parsed or to be made seems invalid and therefore can...
Definition: exceptions.h:27
std::string readString()
Reads the content of the element as string.
std::string idToString() const
Converts the specified EBML ID to a printable string.
Definition: ebmlelement.h:90
static byte makeId(identifierType id, char *buff)
Stores the specified id in the specified buffer which must be at least 8 bytes long.
const identifierType & id() const
Returns the element ID.
static byte calculateIdLength(identifierType id)
Returns the length of the specified id in byte.
float64 readFloat()
Reads the content of the element as float.
uint64 maxTotalSize() const
Returns maximum total size.
static constexpr uint32 maximumSizeLengthSupported()
Returns the maximum size length supported by the class in byte.
The exception that is thrown when the data to be parsed is truncated and therefore can not be parsed ...
Definition: exceptions.h:35
implementationType * parent()
Returns the parent of the element.
uint64 firstChildOffset() const
Returns the offset of the first child of the element.
Definition: ebmlelement.h:141
Contains all classes and functions of the TagInfo library.
Definition: exceptions.h:9
containerType & container()
Returns the related container.
uint32 sizeLength() const
Returns the length of the size denotation of the element in byte.