Tag Parser  6.2.2
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
ebmlelement.cpp
Go to the documentation of this file.
1 #include "./ebmlelement.h"
2 #include "./ebmlid.h"
3 #include "./matroskacontainer.h"
4 #include "./matroskaid.h"
5 
6 #include "../exceptions.h"
7 
8 #include <c++utilities/conversion/types.h>
9 #include <c++utilities/conversion/binaryconversion.h>
10 #include <c++utilities/io/binaryreader.h>
11 #include <c++utilities/io/binarywriter.h>
12 
13 #include <string>
14 #include <sstream>
15 #include <cstring>
16 #include <memory>
17 
18 using namespace std;
19 using namespace IoUtilities;
20 using namespace ConversionUtilities;
21 
22 
23 namespace Media {
24 
33 EbmlElement::EbmlElement(MatroskaContainer &container, uint64 startOffset) :
34  GenericFileElement<EbmlElement>(container, startOffset)
35 {}
36 
41  GenericFileElement<EbmlElement>(container, startOffset, maxSize)
42 {}
43 
48  GenericFileElement<EbmlElement>(parent, startOffset)
49 {}
50 
54 string EbmlElement::parsingContext() const
55 {
56  return "parsing header of EBML element " % idToString() % " at " + numberToString(startOffset());
57 }
58 
63 {
65  static const string context("parsing EBML element header");
66 
67  for(byte skipped = 0; /* TODO: add a sane limit here */; ++m_startOffset, --m_maxSize, ++skipped) {
68  // check whether max size is valid
69  if(maxTotalSize() < 2) {
70  addNotification(NotificationType::Critical, argsToString("The EBML element at ", startOffset(), " is truncated or does not exist."), context);
71  throw TruncatedDataException();
72  }
73  stream().seekg(startOffset());
74  // read ID
76  byte beg = stream().peek(), mask = 0x80;
77  m_idLength = 1;
79  ++m_idLength;
80  mask >>= 1;
81  }
83  if(!skipped) {
84  addNotification(NotificationType::Critical, "EBML ID length is not supported, trying to skip.", context);
85  }
86  continue; // try again
87  }
88  if(m_idLength > container().maxIdLength()) {
89  if(!skipped) {
90  addNotification(NotificationType::Critical, "EBML ID length is invalid.", context);
91  }
92  continue; // try again
93  }
95  m_id = BE::toUInt32(buf);
96 
97  // read size
98  beg = stream().peek(), mask = 0x80;
99  m_sizeLength = 1;
100  if(beg == 0xFF) {
101  // this indicates that the element size is unknown
102  // -> just assume the element takes the maximum available size
104  } else {
106  ++m_sizeLength;
107  mask >>= 1;
108  }
110  if(!skipped) {
111  addNotification(NotificationType::Critical, "EBML size length is not supported.", parsingContext());
112  }
113  continue; // try again
114  }
115  if(m_sizeLength > container().maxSizeLength()) {
116  if(!skipped) {
117  addNotification(NotificationType::Critical, "EBML size length is invalid.", parsingContext());
118  }
119  continue; // try again
120  }
121  // read size into buffer
122  memset(buf, 0, sizeof(dataSizeType)); // reset buffer
124  *(buf + (GenericFileElement<implementationType>::maximumSizeLengthSupported() - m_sizeLength)) ^= mask; // xor the first byte in buffer which has been read from the file with mask
125  m_dataSize = ConversionUtilities::BE::toUInt64(buf);
126  // check if element is truncated
127  if(totalSize() > maxTotalSize()) {
128  if(m_idLength + m_sizeLength > maxTotalSize()) { // header truncated
129  if(!skipped) {
130  addNotification(NotificationType::Critical, "EBML header seems to be truncated.", parsingContext());
131  }
132  continue; // try again
133  } else { // data truncated
134  addNotification(NotificationType::Warning, "Data of EBML element seems to be truncated; unable to parse siblings of that element.", parsingContext());
135  m_dataSize = maxTotalSize() - m_idLength - m_sizeLength; // using max size instead
136  }
137  }
138  }
139 
140  // check if there's a first child
141  const uint64 firstChildOffset = this->firstChildOffset();
142  if(firstChildOffset && firstChildOffset < totalSize()) {
143  m_firstChild.reset(new EbmlElement(static_cast<EbmlElement &>(*this), startOffset() + firstChildOffset));
144  } else {
145  m_firstChild.reset();
146  }
147 
148  // check if there's a sibling
149  if(totalSize() < maxTotalSize()) {
150  if(parent()) {
151  m_nextSibling.reset(new EbmlElement(*(parent()), startOffset() + totalSize()));
152  } else {
154  }
155  } else {
156  m_nextSibling.reset();
157  }
158 
159  // no critical errors occured
160  // -> add a warning if bytes have been skipped
161  if(skipped) {
162  addNotification(NotificationType::Warning, numberToString<unsigned int>(skipped) + " bytes have been skipped", parsingContext());
163  }
164  // -> don't need another try, return here
165  return;
166  }
167 
168  // critical errors occured and skipping some bytes wasn't successful
169  throw InvalidDataException();
170 }
171 
176 {
177  stream().seekg(dataOffset());
178  return reader().readString(dataSize());
179 }
180 
188 {
189  char buff[sizeof(uint64)] = {0};
190  int i = static_cast<int>(sizeof(buff)) - dataSize();
191  if(i < 0) {
192  i = 0;
193  }
194  stream().seekg(dataOffset(), ios_base::beg);
195  stream().read(buff + i, sizeof(buff) - i);
196  return BE::toUInt64(buff);
197 }
198 
204 {
205  stream().seekg(dataOffset());
206  switch(dataSize()) {
207  case sizeof(float32):
208  return reader().readFloat32BE();
209  case sizeof(float64):
210  return reader().readFloat64BE();
211  default:
212  return 0.0;
213  }
214 }
215 
221 {
222  if(id <= 0xFF) {
223  return 1;
224  } else if(id <= 0x7FFF) {
225  return 2;
226  } else if(id <= 0x3FFFFF) {
227  return 3;
228  } else if(id <= 0x1FFFFFFF) {
229  return 4;
230  } else {
231  throw InvalidDataException();
232  }
233 }
234 
240 {
241  if(size < 126) {
242  return 1;
243  } else if(size <= 16382ul) {
244  return 2;
245  } else if(size <= 2097150ul) {
246  return 3;
247  } else if(size <= 268435454ul) {
248  return 4;
249  } else if(size <= 34359738366ul) {
250  return 5;
251  } else if(size <= 4398046511102ul) {
252  return 6;
253  } else if(size <= 562949953421310ul) {
254  return 7;
255  } else if(size <= 72057594037927934ul) {
256  return 8;
257  } else {
258  throw InvalidDataException();
259  }
260 }
261 
269 {
270  if(id <= 0xFF) {
271  *buff = static_cast<byte>(id);
272  return 1;
273  } else if(id <= 0x7FFF) {
274  BE::getBytes(static_cast<uint16>(id), buff);
275  return 2;
276  } else if(id <= 0x3FFFFF) {
277  BE::getBytes(static_cast<uint32>(id << 0x8), buff);
278  return 3;
279  } else if(id <= 0x1FFFFFFF) {
280  BE::getBytes(static_cast<uint32>(id), buff);
281  return 4;
282  } else {
283  throw InvalidDataException();
284  }
285 }
286 
294 byte EbmlElement::makeSizeDenotation(uint64 size, char *buff)
295 {
296  if(size < 126) {
297  *buff = static_cast<byte>(size | 0x80);
298  return 1;
299  } else if(size <= 16382ul) {
300  BE::getBytes(static_cast<uint16>(size | 0x4000), buff);
301  return 2;
302  } else if(size <= 2097150ul) {
303  BE::getBytes(static_cast<uint32>((size | 0x200000) << 0x08), buff);
304  return 3;
305  } else if(size <= 268435454ul) {
306  BE::getBytes(static_cast<uint32>(size | 0x10000000), buff);
307  return 4;
308  } else if(size <= 34359738366ul) {
309  BE::getBytes(static_cast<uint64>((size | 0x800000000) << 0x18), buff);
310  return 5;
311  } else if(size <= 4398046511102ul) {
312  BE::getBytes(static_cast<uint64>((size | 0x40000000000) << 0x10), buff);
313  return 6;
314  } else if(size <= 562949953421310ul) {
315  BE::getBytes(static_cast<uint64>((size | 0x2000000000000) << 0x08), buff);
316  return 7;
317  } else if(size <= 72057594037927934ul) {
318  BE::getBytes(static_cast<uint64>(size | 0x100000000000000), buff);
319  return 8;
320  }
321  throw InvalidDataException();
322 }
323 
332 byte EbmlElement::makeSizeDenotation(uint64 size, char *buff, byte minBytes)
333 {
334  if(minBytes <= 1 && size < 126) {
335  *buff = static_cast<byte>(size | 0x80);
336  return 1;
337  } else if(minBytes <= 2 && size <= 16382ul) {
338  BE::getBytes(static_cast<uint16>(size | 0x4000), buff);
339  return 2;
340  } else if(minBytes <= 3 && size <= 2097150ul) {
341  BE::getBytes(static_cast<uint32>((size | 0x200000) << 0x08), buff);
342  return 3;
343  } else if(minBytes <= 4 && size <= 268435454ul) {
344  BE::getBytes(static_cast<uint32>(size | 0x10000000), buff);
345  return 4;
346  } else if(minBytes <= 5 && size <= 34359738366ul) {
347  BE::getBytes(static_cast<uint64>((size | 0x800000000) << 0x18), buff);
348  return 5;
349  } else if(minBytes <= 6 && size <= 4398046511102ul) {
350  BE::getBytes(static_cast<uint64>((size | 0x40000000000) << 0x10), buff);
351  return 6;
352  } else if(minBytes <= 7 && size <= 562949953421310ul) {
353  BE::getBytes(static_cast<uint64>((size | 0x2000000000000) << 0x08), buff);
354  return 7;
355  } else if(minBytes <= 8 && size <= 72057594037927934ul) {
356  BE::getBytes(static_cast<uint64>(size | 0x100000000000000), buff);
357  return 8;
358  }
359  throw InvalidDataException();
360 }
361 
367 {
368  if(integer <= 0xFFul) {
369  return 1;
370  } else if(integer <= 0xFFFFul) {
371  return 2;
372  } else if(integer <= 0xFFFFFFul) {
373  return 3;
374  } else if(integer <= 0xFFFFFFFFul) {
375  return 4;
376  } else if(integer <= 0xFFFFFFFFFFul) {
377  return 5;
378  } else if(integer <= 0xFFFFFFFFFFFFul) {
379  return 6;
380  } else if(integer <= 0xFFFFFFFFFFFFFFul) {
381  return 7;
382  } else {
383  return 8;
384  }
385 }
386 
391 byte EbmlElement::makeUInteger(uint64 value, char *buff)
392 {
393  if(value <= 0xFFul) {
394  *buff = static_cast<char>(value);
395  return 1;
396  } else if(value <= 0xFFFFul) {
397  BE::getBytes(static_cast<uint16>(value), buff);
398  return 2;
399  } else if(value <= 0xFFFFFFul) {
400  BE::getBytes(static_cast<uint32>(value << 0x08), buff);
401  return 3;
402  } else if(value <= 0xFFFFFFFFul) {
403  BE::getBytes(static_cast<uint32>(value), buff);
404  return 4;
405  } else if(value <= 0xFFFFFFFFFFul) {
406  BE::getBytes(static_cast<uint64>(value << 0x18), buff);
407  return 5;
408  } else if(value <= 0xFFFFFFFFFFFFul) {
409  BE::getBytes(static_cast<uint64>(value << 0x10), buff);
410  return 6;
411  } else if(value <= 0xFFFFFFFFFFFFFFul) {
412  BE::getBytes(static_cast<uint64>(value << 0x08), buff);
413  return 7;
414  } else {
415  BE::getBytes(static_cast<uint64>(value), buff);
416  return 8;
417  }
418 }
419 
427 byte EbmlElement::makeUInteger(uint64 value, char *buff, byte minBytes)
428 {
429  if(minBytes <= 1 && value <= 0xFFul) {
430  *buff = static_cast<char>(value);
431  return 1;
432  } else if(minBytes <= 2 && value <= 0xFFFFul) {
433  BE::getBytes(static_cast<uint16>(value), buff);
434  return 2;
435  } else if(minBytes <= 3 && value <= 0xFFFFFFul) {
436  BE::getBytes(static_cast<uint32>(value << 0x08), buff);
437  return 3;
438  } else if(minBytes <= 4 && value <= 0xFFFFFFFFul) {
439  BE::getBytes(static_cast<uint32>(value), buff);
440  return 4;
441  } else if(minBytes <= 5 && value <= 0xFFFFFFFFFFul) {
442  BE::getBytes(static_cast<uint64>(value << 0x18), buff);
443  return 5;
444  } else if(minBytes <= 6 && value <= 0xFFFFFFFFFFFFul) {
445  BE::getBytes(static_cast<uint64>(value << 0x10), buff);
446  return 6;
447  } else if(minBytes <= 7 && value <= 0xFFFFFFFFFFFFFFul) {
448  BE::getBytes(static_cast<uint64>(value << 0x08), buff);
449  return 7;
450  } else {
451  BE::getBytes(static_cast<uint64>(value), buff);
452  return 8;
453  }
454 }
455 
462 void EbmlElement::makeSimpleElement(ostream &stream, identifierType id, uint64 content)
463 {
464  char buff1[8];
465  char buff2[8];
466  byte sizeLength = EbmlElement::makeId(id, buff1);
467  stream.write(buff1, sizeLength);
468  byte elementSize = EbmlElement::makeUInteger(content, buff2);
469  sizeLength = EbmlElement::makeSizeDenotation(elementSize, buff1);
470  stream.write(buff1, sizeLength);
471  stream.write(buff2, elementSize);
472 }
473 
480 void EbmlElement::makeSimpleElement(std::ostream &stream, GenericFileElement::identifierType id, const std::string &content)
481 {
482  char buff1[8];
483  byte sizeLength = EbmlElement::makeId(id, buff1);
484  stream.write(buff1, sizeLength);
485  sizeLength = EbmlElement::makeSizeDenotation(content.size(), buff1);
486  stream.write(buff1, sizeLength);
487  stream.write(content.c_str(), content.size());
488 }
489 
497 void EbmlElement::makeSimpleElement(ostream &stream, GenericFileElement::identifierType id, const char *data, std::size_t dataSize)
498 {
499  char buff1[8];
500  byte sizeLength = EbmlElement::makeId(id, buff1);
501  stream.write(buff1, sizeLength);
502  sizeLength = EbmlElement::makeSizeDenotation(dataSize, buff1);
503  stream.write(buff1, sizeLength);
504  stream.write(data, dataSize);
505 }
506 
507 }
508 
509 
510 
static constexpr uint32 maximumIdLengthSupported()
Returns the maximum id length supported by the class in byte.
uint64 startOffset() const
Returns the start offset in the related stream.
void invalidateStatus()
Invalidates the current status.
uint64 dataOffset() const
Returns the data offset of the element in the related stream.
std::iostream & stream()
Returns the related stream.
FileElementTraits< ImplementationType >::identifierType identifierType
Specifies the type used to store identifiers.
static byte calculateSizeDenotationLength(uint64 size)
Returns the length of the size denotation for the specified size in byte.
uint32 headerSize() const
Returns the header size of the element in byte.
Implementation of GenericContainer<MediaFileInfo, MatroskaTag, MatroskaTrack, EbmlElement>.
uint64 readUInteger()
Reads the content of the element as unsigned integer.
The EbmlElement class helps to parse EBML files such as Matroska files.
Definition: ebmlelement.h:50
IoUtilities::BinaryReader & reader()
Returns the related BinaryReader.
dataSizeType dataSize() const
Returns the data size of the element in byte.
static byte makeUInteger(uint64 value, char *buff)
Writes value to buff.
std::unique_ptr< implementationType > m_firstChild
EbmlElement(MatroskaContainer &container, uint64 startOffset)
Constructs a new top level element with the specified container at the specified startOffset.
Definition: ebmlelement.cpp:33
uint64 totalSize() const
Returns the total size of the element.
STL namespace.
void internalParse()
Parses the EBML element.
Definition: ebmlelement.cpp:62
void addNotification(const Notification &notification)
This protected method is meant to be called by the derived class to add a notification.
static void makeSimpleElement(std::ostream &stream, identifierType id, uint64 content)
Makes a simple EBML element.
FileElementTraits< EbmlElement >::dataSizeType dataSizeType
Specifies the type used to store data sizes.
static byte calculateUIntegerLength(uint64 integer)
Returns the length of the specified unsigned integer in byte.
std::unique_ptr< implementationType > m_nextSibling
static byte makeSizeDenotation(uint64 size, char *buff)
Makes the size denotation for the specified size and stores it to buff.
Contains utility classes helping to read and write streams.
The GenericFileElement class helps to parse binary files which consist of an arboreal element strucut...
The exception that is thrown when the data to be parsed or to be made seems invalid and therefore can...
Definition: exceptions.h:27
std::string readString()
Reads the content of the element as string.
std::string idToString() const
Converts the specified EBML ID to a printable string.
Definition: ebmlelement.h:90
static byte makeId(identifierType id, char *buff)
Stores the specified id in the specified buffer which must be at least 8 bytes long.
const identifierType & id() const
Returns the element ID.
static byte calculateIdLength(identifierType id)
Returns the length of the specified id in byte.
float64 readFloat()
Reads the content of the element as float.
uint64 maxTotalSize() const
Returns maximum total size.
static constexpr uint32 maximumSizeLengthSupported()
Returns the maximum size length supported by the class in byte.
The exception that is thrown when the data to be parsed is truncated and therefore can not be parsed ...
Definition: exceptions.h:35
implementationType * parent()
Returns the parent of the element.
uint64 firstChildOffset() const
Returns the offset of the first child of the element.
Definition: ebmlelement.h:141
Contains all classes and functions of the TagInfo library.
Definition: exceptions.h:9
containerType & container()
Returns the related container.
uint32 sizeLength() const
Returns the length of the size denotation of the element in byte.