Tag Parser  6.3.0
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
ebmlelement.cpp
Go to the documentation of this file.
1 #include "./ebmlelement.h"
2 #include "./ebmlid.h"
3 #include "./matroskacontainer.h"
4 #include "./matroskaid.h"
5 
6 #include "../exceptions.h"
7 
8 #include <c++utilities/conversion/types.h>
9 #include <c++utilities/conversion/binaryconversion.h>
10 #include <c++utilities/io/binaryreader.h>
11 #include <c++utilities/io/binarywriter.h>
12 
13 #include <string>
14 #include <sstream>
15 #include <cstring>
16 #include <memory>
17 
18 using namespace std;
19 using namespace IoUtilities;
20 using namespace ConversionUtilities;
21 
22 
23 namespace Media {
24 
33 uint64 EbmlElement::bytesToBeSkipped = 0x4000;
34 
38 EbmlElement::EbmlElement(MatroskaContainer &container, uint64 startOffset) :
39  GenericFileElement<EbmlElement>(container, startOffset)
40 {}
41 
46  GenericFileElement<EbmlElement>(container, startOffset, maxSize)
47 {}
48 
53  GenericFileElement<EbmlElement>(parent, startOffset)
54 {}
55 
59 string EbmlElement::parsingContext() const
60 {
61  return ("parsing header of EBML element " % idToString() % " at ") + startOffset();
62 }
63 
68 {
70  static const string context("parsing EBML element header");
71 
72  for(uint64 skipped = 0; skipped < bytesToBeSkipped; ++m_startOffset, --m_maxSize, ++skipped) {
73  // check whether max size is valid
74  if(maxTotalSize() < 2) {
75  addNotification(NotificationType::Critical, argsToString("The EBML element at ", startOffset(), " is truncated or does not exist."), context);
76  throw TruncatedDataException();
77  }
78  stream().seekg(startOffset());
79  // read ID
81  byte beg = stream().peek(), mask = 0x80;
82  m_idLength = 1;
84  ++m_idLength;
85  mask >>= 1;
86  }
88  if(!skipped) {
89  addNotification(NotificationType::Critical, "EBML ID length is not supported, trying to skip.", context);
90  }
91  continue; // try again
92  }
93  if(m_idLength > container().maxIdLength()) {
94  if(!skipped) {
95  addNotification(NotificationType::Critical, "EBML ID length is invalid.", context);
96  }
97  continue; // try again
98  }
100  m_id = BE::toUInt32(buf);
101 
102  // read size
103  beg = stream().peek(), mask = 0x80;
104  m_sizeLength = 1;
105  if(beg == 0xFF) {
106  // this indicates that the element size is unknown
107  // -> just assume the element takes the maximum available size
109  } else {
111  ++m_sizeLength;
112  mask >>= 1;
113  }
115  if(!skipped) {
116  addNotification(NotificationType::Critical, "EBML size length is not supported.", parsingContext());
117  }
118  continue; // try again
119  }
120  if(m_sizeLength > container().maxSizeLength()) {
121  if(!skipped) {
122  addNotification(NotificationType::Critical, "EBML size length is invalid.", parsingContext());
123  }
124  continue; // try again
125  }
126  // read size into buffer
127  memset(buf, 0, sizeof(dataSizeType)); // reset buffer
129  *(buf + (GenericFileElement<implementationType>::maximumSizeLengthSupported() - m_sizeLength)) ^= mask; // xor the first byte in buffer which has been read from the file with mask
130  m_dataSize = ConversionUtilities::BE::toUInt64(buf);
131  // check if element is truncated
132  if(totalSize() > maxTotalSize()) {
133  if(m_idLength + m_sizeLength > maxTotalSize()) { // header truncated
134  if(!skipped) {
135  addNotification(NotificationType::Critical, "EBML header seems to be truncated.", parsingContext());
136  }
137  continue; // try again
138  } else { // data truncated
139  addNotification(NotificationType::Warning, "Data of EBML element seems to be truncated; unable to parse siblings of that element.", parsingContext());
140  m_dataSize = maxTotalSize() - m_idLength - m_sizeLength; // using max size instead
141  }
142  }
143  }
144 
145  // check if there's a first child
146  const uint64 firstChildOffset = this->firstChildOffset();
147  if(firstChildOffset && firstChildOffset < totalSize()) {
148  m_firstChild.reset(new EbmlElement(static_cast<EbmlElement &>(*this), startOffset() + firstChildOffset));
149  } else {
150  m_firstChild.reset();
151  }
152 
153  // check if there's a sibling
154  if(totalSize() < maxTotalSize()) {
155  if(parent()) {
156  m_nextSibling.reset(new EbmlElement(*(parent()), startOffset() + totalSize()));
157  } else {
159  }
160  } else {
161  m_nextSibling.reset();
162  }
163 
164  // no critical errors occured
165  // -> add a warning if bytes have been skipped
166  if(skipped) {
167  addNotification(NotificationType::Warning, numberToString<unsigned int>(skipped) + " bytes have been skipped", parsingContext());
168  }
169  // -> don't need another try, return here
170  return;
171  }
172 
173  // critical errors occured and skipping some bytes wasn't successful
174  throw InvalidDataException();
175 }
176 
181 {
182  stream().seekg(dataOffset());
183  return reader().readString(dataSize());
184 }
185 
193 {
194  char buff[sizeof(uint64)] = {0};
195  int i = static_cast<int>(sizeof(buff)) - dataSize();
196  if(i < 0) {
197  i = 0;
198  }
199  stream().seekg(dataOffset(), ios_base::beg);
200  stream().read(buff + i, sizeof(buff) - i);
201  return BE::toUInt64(buff);
202 }
203 
209 {
210  stream().seekg(dataOffset());
211  switch(dataSize()) {
212  case sizeof(float32):
213  return reader().readFloat32BE();
214  case sizeof(float64):
215  return reader().readFloat64BE();
216  default:
217  return 0.0;
218  }
219 }
220 
226 {
227  if(id <= 0xFF) {
228  return 1;
229  } else if(id <= 0x7FFF) {
230  return 2;
231  } else if(id <= 0x3FFFFF) {
232  return 3;
233  } else if(id <= 0x1FFFFFFF) {
234  return 4;
235  } else {
236  throw InvalidDataException();
237  }
238 }
239 
245 {
246  if(size < 126) {
247  return 1;
248  } else if(size <= 16382ul) {
249  return 2;
250  } else if(size <= 2097150ul) {
251  return 3;
252  } else if(size <= 268435454ul) {
253  return 4;
254  } else if(size <= 34359738366ul) {
255  return 5;
256  } else if(size <= 4398046511102ul) {
257  return 6;
258  } else if(size <= 562949953421310ul) {
259  return 7;
260  } else if(size <= 72057594037927934ul) {
261  return 8;
262  } else {
263  throw InvalidDataException();
264  }
265 }
266 
274 {
275  if(id <= 0xFF) {
276  *buff = static_cast<byte>(id);
277  return 1;
278  } else if(id <= 0x7FFF) {
279  BE::getBytes(static_cast<uint16>(id), buff);
280  return 2;
281  } else if(id <= 0x3FFFFF) {
282  BE::getBytes(static_cast<uint32>(id << 0x8), buff);
283  return 3;
284  } else if(id <= 0x1FFFFFFF) {
285  BE::getBytes(static_cast<uint32>(id), buff);
286  return 4;
287  } else {
288  throw InvalidDataException();
289  }
290 }
291 
299 byte EbmlElement::makeSizeDenotation(uint64 size, char *buff)
300 {
301  if(size < 126) {
302  *buff = static_cast<byte>(size | 0x80);
303  return 1;
304  } else if(size <= 16382ul) {
305  BE::getBytes(static_cast<uint16>(size | 0x4000), buff);
306  return 2;
307  } else if(size <= 2097150ul) {
308  BE::getBytes(static_cast<uint32>((size | 0x200000) << 0x08), buff);
309  return 3;
310  } else if(size <= 268435454ul) {
311  BE::getBytes(static_cast<uint32>(size | 0x10000000), buff);
312  return 4;
313  } else if(size <= 34359738366ul) {
314  BE::getBytes(static_cast<uint64>((size | 0x800000000) << 0x18), buff);
315  return 5;
316  } else if(size <= 4398046511102ul) {
317  BE::getBytes(static_cast<uint64>((size | 0x40000000000) << 0x10), buff);
318  return 6;
319  } else if(size <= 562949953421310ul) {
320  BE::getBytes(static_cast<uint64>((size | 0x2000000000000) << 0x08), buff);
321  return 7;
322  } else if(size <= 72057594037927934ul) {
323  BE::getBytes(static_cast<uint64>(size | 0x100000000000000), buff);
324  return 8;
325  }
326  throw InvalidDataException();
327 }
328 
337 byte EbmlElement::makeSizeDenotation(uint64 size, char *buff, byte minBytes)
338 {
339  if(minBytes <= 1 && size < 126) {
340  *buff = static_cast<byte>(size | 0x80);
341  return 1;
342  } else if(minBytes <= 2 && size <= 16382ul) {
343  BE::getBytes(static_cast<uint16>(size | 0x4000), buff);
344  return 2;
345  } else if(minBytes <= 3 && size <= 2097150ul) {
346  BE::getBytes(static_cast<uint32>((size | 0x200000) << 0x08), buff);
347  return 3;
348  } else if(minBytes <= 4 && size <= 268435454ul) {
349  BE::getBytes(static_cast<uint32>(size | 0x10000000), buff);
350  return 4;
351  } else if(minBytes <= 5 && size <= 34359738366ul) {
352  BE::getBytes(static_cast<uint64>((size | 0x800000000) << 0x18), buff);
353  return 5;
354  } else if(minBytes <= 6 && size <= 4398046511102ul) {
355  BE::getBytes(static_cast<uint64>((size | 0x40000000000) << 0x10), buff);
356  return 6;
357  } else if(minBytes <= 7 && size <= 562949953421310ul) {
358  BE::getBytes(static_cast<uint64>((size | 0x2000000000000) << 0x08), buff);
359  return 7;
360  } else if(minBytes <= 8 && size <= 72057594037927934ul) {
361  BE::getBytes(static_cast<uint64>(size | 0x100000000000000), buff);
362  return 8;
363  }
364  throw InvalidDataException();
365 }
366 
372 {
373  if(integer <= 0xFFul) {
374  return 1;
375  } else if(integer <= 0xFFFFul) {
376  return 2;
377  } else if(integer <= 0xFFFFFFul) {
378  return 3;
379  } else if(integer <= 0xFFFFFFFFul) {
380  return 4;
381  } else if(integer <= 0xFFFFFFFFFFul) {
382  return 5;
383  } else if(integer <= 0xFFFFFFFFFFFFul) {
384  return 6;
385  } else if(integer <= 0xFFFFFFFFFFFFFFul) {
386  return 7;
387  } else {
388  return 8;
389  }
390 }
391 
396 byte EbmlElement::makeUInteger(uint64 value, char *buff)
397 {
398  if(value <= 0xFFul) {
399  *buff = static_cast<char>(value);
400  return 1;
401  } else if(value <= 0xFFFFul) {
402  BE::getBytes(static_cast<uint16>(value), buff);
403  return 2;
404  } else if(value <= 0xFFFFFFul) {
405  BE::getBytes(static_cast<uint32>(value << 0x08), buff);
406  return 3;
407  } else if(value <= 0xFFFFFFFFul) {
408  BE::getBytes(static_cast<uint32>(value), buff);
409  return 4;
410  } else if(value <= 0xFFFFFFFFFFul) {
411  BE::getBytes(static_cast<uint64>(value << 0x18), buff);
412  return 5;
413  } else if(value <= 0xFFFFFFFFFFFFul) {
414  BE::getBytes(static_cast<uint64>(value << 0x10), buff);
415  return 6;
416  } else if(value <= 0xFFFFFFFFFFFFFFul) {
417  BE::getBytes(static_cast<uint64>(value << 0x08), buff);
418  return 7;
419  } else {
420  BE::getBytes(static_cast<uint64>(value), buff);
421  return 8;
422  }
423 }
424 
432 byte EbmlElement::makeUInteger(uint64 value, char *buff, byte minBytes)
433 {
434  if(minBytes <= 1 && value <= 0xFFul) {
435  *buff = static_cast<char>(value);
436  return 1;
437  } else if(minBytes <= 2 && value <= 0xFFFFul) {
438  BE::getBytes(static_cast<uint16>(value), buff);
439  return 2;
440  } else if(minBytes <= 3 && value <= 0xFFFFFFul) {
441  BE::getBytes(static_cast<uint32>(value << 0x08), buff);
442  return 3;
443  } else if(minBytes <= 4 && value <= 0xFFFFFFFFul) {
444  BE::getBytes(static_cast<uint32>(value), buff);
445  return 4;
446  } else if(minBytes <= 5 && value <= 0xFFFFFFFFFFul) {
447  BE::getBytes(static_cast<uint64>(value << 0x18), buff);
448  return 5;
449  } else if(minBytes <= 6 && value <= 0xFFFFFFFFFFFFul) {
450  BE::getBytes(static_cast<uint64>(value << 0x10), buff);
451  return 6;
452  } else if(minBytes <= 7 && value <= 0xFFFFFFFFFFFFFFul) {
453  BE::getBytes(static_cast<uint64>(value << 0x08), buff);
454  return 7;
455  } else {
456  BE::getBytes(static_cast<uint64>(value), buff);
457  return 8;
458  }
459 }
460 
467 void EbmlElement::makeSimpleElement(ostream &stream, identifierType id, uint64 content)
468 {
469  char buff1[8];
470  char buff2[8];
471  byte sizeLength = EbmlElement::makeId(id, buff1);
472  stream.write(buff1, sizeLength);
473  byte elementSize = EbmlElement::makeUInteger(content, buff2);
474  sizeLength = EbmlElement::makeSizeDenotation(elementSize, buff1);
475  stream.write(buff1, sizeLength);
476  stream.write(buff2, elementSize);
477 }
478 
485 void EbmlElement::makeSimpleElement(std::ostream &stream, GenericFileElement::identifierType id, const std::string &content)
486 {
487  char buff1[8];
488  byte sizeLength = EbmlElement::makeId(id, buff1);
489  stream.write(buff1, sizeLength);
490  sizeLength = EbmlElement::makeSizeDenotation(content.size(), buff1);
491  stream.write(buff1, sizeLength);
492  stream.write(content.c_str(), content.size());
493 }
494 
502 void EbmlElement::makeSimpleElement(ostream &stream, GenericFileElement::identifierType id, const char *data, std::size_t dataSize)
503 {
504  char buff1[8];
505  byte sizeLength = EbmlElement::makeId(id, buff1);
506  stream.write(buff1, sizeLength);
507  sizeLength = EbmlElement::makeSizeDenotation(dataSize, buff1);
508  stream.write(buff1, sizeLength);
509  stream.write(data, dataSize);
510 }
511 
512 }
513 
514 
515 
static constexpr uint32 maximumIdLengthSupported()
Returns the maximum id length supported by the class in byte.
uint64 startOffset() const
Returns the start offset in the related stream.
void invalidateStatus()
Invalidates the current status.
uint64 dataOffset() const
Returns the data offset of the element in the related stream.
std::iostream & stream()
Returns the related stream.
FileElementTraits< ImplementationType >::identifierType identifierType
Specifies the type used to store identifiers.
static byte calculateSizeDenotationLength(uint64 size)
Returns the length of the size denotation for the specified size in byte.
uint32 headerSize() const
Returns the header size of the element in byte.
Implementation of GenericContainer<MediaFileInfo, MatroskaTag, MatroskaTrack, EbmlElement>.
uint64 readUInteger()
Reads the content of the element as unsigned integer.
The EbmlElement class helps to parse EBML files such as Matroska files.
Definition: ebmlelement.h:50
IoUtilities::BinaryReader & reader()
Returns the related BinaryReader.
dataSizeType dataSize() const
Returns the data size of the element in byte.
static byte makeUInteger(uint64 value, char *buff)
Writes value to buff.
std::unique_ptr< implementationType > m_firstChild
EbmlElement(MatroskaContainer &container, uint64 startOffset)
Constructs a new top level element with the specified container at the specified startOffset.
Definition: ebmlelement.cpp:38
uint64 totalSize() const
Returns the total size of the element.
STL namespace.
void internalParse()
Parses the EBML element.
Definition: ebmlelement.cpp:67
void addNotification(const Notification &notification)
This protected method is meant to be called by the derived class to add a notification.
static void makeSimpleElement(std::ostream &stream, identifierType id, uint64 content)
Makes a simple EBML element.
FileElementTraits< EbmlElement >::dataSizeType dataSizeType
Specifies the type used to store data sizes.
static byte calculateUIntegerLength(uint64 integer)
Returns the length of the specified unsigned integer in byte.
std::unique_ptr< implementationType > m_nextSibling
static uint64 bytesToBeSkipped
Specifies the number of bytes to be skipped till a valid EBML element is found in the stream...
Definition: ebmlelement.h:76
static byte makeSizeDenotation(uint64 size, char *buff)
Makes the size denotation for the specified size and stores it to buff.
Contains utility classes helping to read and write streams.
The GenericFileElement class helps to parse binary files which consist of an arboreal element strucut...
The exception that is thrown when the data to be parsed or to be made seems invalid and therefore can...
Definition: exceptions.h:27
std::string readString()
Reads the content of the element as string.
std::string idToString() const
Converts the specified EBML ID to a printable string.
Definition: ebmlelement.h:91
static byte makeId(identifierType id, char *buff)
Stores the specified id in the specified buffer which must be at least 8 bytes long.
const identifierType & id() const
Returns the element ID.
static byte calculateIdLength(identifierType id)
Returns the length of the specified id in byte.
float64 readFloat()
Reads the content of the element as float.
uint64 maxTotalSize() const
Returns maximum total size.
static constexpr uint32 maximumSizeLengthSupported()
Returns the maximum size length supported by the class in byte.
The exception that is thrown when the data to be parsed is truncated and therefore can not be parsed ...
Definition: exceptions.h:35
implementationType * parent()
Returns the parent of the element.
uint64 firstChildOffset() const
Returns the offset of the first child of the element.
Definition: ebmlelement.h:142
Contains all classes and functions of the TagInfo library.
Definition: exceptions.h:9
containerType & container()
Returns the related container.
uint32 sizeLength() const
Returns the length of the size denotation of the element in byte.