Tag Parser  10.0.1
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
matroskacontainer.cpp
Go to the documentation of this file.
1 #include "./matroskacontainer.h"
2 #include "./ebmlid.h"
3 #include "./matroskacues.h"
5 #include "./matroskaid.h"
6 #include "./matroskaseekinfo.h"
7 
8 #include "../backuphelper.h"
9 #include "../exceptions.h"
10 #include "../mediafileinfo.h"
11 
12 #include "resources/config.h"
13 
14 #include <c++utilities/conversion/stringbuilder.h>
15 #include <c++utilities/conversion/stringconversion.h>
16 
17 #include <unistd.h>
18 
19 #include <chrono>
20 #include <functional>
21 #include <initializer_list>
22 #include <limits>
23 #include <memory>
24 #include <random>
25 #include <unordered_set>
26 
27 using namespace std;
28 using namespace std::placeholders;
29 using namespace CppUtilities;
30 
31 namespace TagParser {
32 
38 std::uint64_t MatroskaContainer::m_maxFullParseSize = 0x3200000;
39 
43 MatroskaContainer::MatroskaContainer(MediaFileInfo &fileInfo, std::uint64_t startOffset)
45  , m_maxIdLength(4)
46  , m_maxSizeLength(8)
47  , m_segmentCount(0)
48 {
49  m_version = 1;
50  m_readVersion = 1;
51  m_doctype = "matroska";
52  m_doctypeVersion = 1;
54 }
55 
57 {
58 }
59 
61 {
63  m_maxIdLength = 4;
64  m_maxSizeLength = 8;
65  m_version = 1;
66  m_readVersion = 1;
67  m_doctype = "matroska";
68  m_doctypeVersion = 1;
70  m_tracksElements.clear();
71  m_segmentInfoElements.clear();
72  m_tagsElements.clear();
73  m_chaptersElements.clear();
74  m_attachmentsElements.clear();
75  m_seekInfos.clear();
76  m_editionEntries.clear();
77  m_attachments.clear();
78  m_segmentCount = 0;
79 }
80 
86 {
87  static const string context("validating Matroska file index (cues)");
88  bool cuesElementsFound = false;
89  if (m_firstElement) {
90  unordered_set<EbmlElement::IdentifierType> ids;
91  bool cueTimeFound = false, cueTrackPositionsFound = false;
92  unique_ptr<EbmlElement> clusterElement;
93  std::uint64_t pos, prevClusterSize = 0, currentOffset = 0;
94  // iterate through all segments
95  for (EbmlElement *segmentElement = m_firstElement->siblingById(MatroskaIds::Segment, diag); segmentElement;
96  segmentElement = segmentElement->siblingById(MatroskaIds::Segment, diag)) {
97  segmentElement->parse(diag);
98  // iterate through all child elements of the segment (only "Cues"- and "Cluster"-elements are relevant for this method)
99  for (EbmlElement *segmentChildElement = segmentElement->firstChild(); segmentChildElement;
100  segmentChildElement = segmentChildElement->nextSibling()) {
101  progress.stopIfAborted();
102  segmentChildElement->parse(diag);
103  switch (segmentChildElement->id()) {
104  case EbmlIds::Void:
105  case EbmlIds::Crc32:
106  break;
107  case MatroskaIds::Cues:
108  cuesElementsFound = true;
109  // parse children of "Cues"-element ("CuePoint"-elements)
110  for (EbmlElement *cuePointElement = segmentChildElement->firstChild(); cuePointElement;
111  cuePointElement = cuePointElement->nextSibling()) {
112  progress.stopIfAborted();
113  cuePointElement->parse(diag);
114  cueTimeFound = cueTrackPositionsFound = false; // to validate quantity of these elements
115  switch (cuePointElement->id()) {
116  case EbmlIds::Void:
117  case EbmlIds::Crc32:
118  break;
120  // parse children of "CuePoint"-element
121  for (EbmlElement *cuePointChildElement = cuePointElement->firstChild(); cuePointChildElement;
122  cuePointChildElement = cuePointChildElement->nextSibling()) {
123  cuePointChildElement->parse(diag);
124  switch (cuePointChildElement->id()) {
126  // validate uniqueness
127  if (cueTimeFound) {
128  diag.emplace_back(
129  DiagLevel::Warning, "\"CuePoint\"-element contains multiple \"CueTime\" elements.", context);
130  } else {
131  cueTimeFound = true;
132  }
133  break;
135  cueTrackPositionsFound = true;
136  ids.clear();
137  clusterElement.reset();
138  for (EbmlElement *subElement = cuePointChildElement->firstChild(); subElement;
139  subElement = subElement->nextSibling()) {
140  subElement->parse(diag);
141  switch (subElement->id()) {
148  // validate uniqueness
149  if (ids.count(subElement->id())) {
150  diag.emplace_back(DiagLevel::Warning,
151  "\"CueTrackPositions\"-element contains multiple \"" % subElement->idToString() + "\" elements.",
152  context);
153  } else {
154  ids.insert(subElement->id());
155  }
156  break;
157  case EbmlIds::Crc32:
158  case EbmlIds::Void:
160  break;
161  default:
162  diag.emplace_back(DiagLevel::Warning,
163  "\"CueTrackPositions\"-element contains unknown element \"" % subElement->idToString() + "\".",
164  context);
165  }
166  switch (subElement->id()) {
167  case EbmlIds::Void:
168  case EbmlIds::Crc32:
170  break;
172  // validate "Cluster" position denoted by "CueClusterPosition"-element
173  clusterElement = make_unique<EbmlElement>(
174  *this, segmentElement->dataOffset() + subElement->readUInteger() - currentOffset);
175  try {
176  clusterElement->parse(diag);
177  if (clusterElement->id() != MatroskaIds::Cluster) {
178  diag.emplace_back(DiagLevel::Critical,
179  "\"CueClusterPosition\" element at " % numberToString(subElement->startOffset())
180  + " does not point to \"Cluster\"-element (points to "
181  + numberToString(clusterElement->startOffset()) + ").",
182  context);
183  }
184  } catch (const Failure &) {
185  }
186  break;
188  // read "Block" position denoted by "CueRelativePosition"-element (validate later since the "Cluster"-element is needed to validate)
189  pos = subElement->readUInteger();
190  break;
192  break;
194  break;
196  break;
198  break;
199  default:;
200  }
201  }
202  // validate existence of mandatory elements
203  if (!ids.count(MatroskaIds::CueTrack)) {
204  diag.emplace_back(DiagLevel::Warning,
205  "\"CueTrackPositions\"-element does not contain mandatory element \"CueTrack\".", context);
206  }
207  if (!clusterElement) {
208  diag.emplace_back(DiagLevel::Warning,
209  "\"CueTrackPositions\"-element does not contain mandatory element \"CueClusterPosition\".", context);
210  } else if (ids.count(MatroskaIds::CueRelativePosition)) {
211  // validate "Block" position denoted by "CueRelativePosition"-element
212  EbmlElement referenceElement(*this, clusterElement->dataOffset() + pos);
213  try {
214  referenceElement.parse(diag);
215  switch (referenceElement.id()) {
217  case MatroskaIds::Block:
219  break;
220  default:
221  diag.emplace_back(DiagLevel::Critical,
222  "\"CueRelativePosition\" element does not point to \"Block\"-, \"BlockGroup\", or "
223  "\"SimpleBlock\"-element (points to "
224  % numberToString(referenceElement.startOffset())
225  + ").",
226  context);
227  }
228  } catch (const Failure &) {
229  }
230  }
231  break;
232  case EbmlIds::Crc32:
233  case EbmlIds::Void:
234  break;
235  default:
236  diag.emplace_back(DiagLevel::Warning,
237  "\"CuePoint\"-element contains unknown element \"" % cuePointElement->idToString() + "\".", context);
238  }
239  }
240  // validate existence of mandatory elements
241  if (!cueTimeFound) {
242  diag.emplace_back(
243  DiagLevel::Warning, "\"CuePoint\"-element does not contain mandatory element \"CueTime\".", context);
244  }
245  if (!cueTrackPositionsFound) {
246  diag.emplace_back(
247  DiagLevel::Warning, "\"CuePoint\"-element does not contain mandatory element \"CueClusterPosition\".", context);
248  }
249  break;
250  default:;
251  }
252  }
253  break;
255  // parse children of "Cluster"-element
256  for (EbmlElement *clusterElementChild = segmentChildElement->firstChild(); clusterElementChild;
257  clusterElementChild = clusterElementChild->nextSibling()) {
258  clusterElementChild->parse(diag);
259  switch (clusterElementChild->id()) {
260  case EbmlIds::Void:
261  case EbmlIds::Crc32:
262  break;
264  // validate position
265  if ((pos = clusterElementChild->readUInteger()) > 0
266  && (segmentChildElement->startOffset() - segmentElement->dataOffset() + currentOffset) != pos) {
267  diag.emplace_back(DiagLevel::Critical,
268  argsToString("\"Position\"-element at ", clusterElementChild->startOffset(), " points to ", pos,
269  " which is not the offset of the containing \"Cluster\"-element."),
270  context);
271  }
272  break;
274  // validate prev size
275  if ((pos = clusterElementChild->readUInteger()) != prevClusterSize) {
276  diag.emplace_back(DiagLevel::Critical,
277  argsToString("\"PrevSize\"-element at ", clusterElementChild->startOffset(), " should be ", prevClusterSize,
278  " but is ", pos, "."),
279  context);
280  }
281  break;
282  default:;
283  }
284  }
285  prevClusterSize = segmentChildElement->totalSize();
286  break;
287  default:;
288  }
289  }
290  currentOffset += segmentElement->totalSize();
291  }
292  }
293  // add a warning when no index could be found
294  if (!cuesElementsFound) {
295  diag.emplace_back(DiagLevel::Information, "No \"Cues\"-elements (index) found.", context);
296  }
297 }
298 
302 bool sameOffset(std::uint64_t offset, const EbmlElement *element)
303 {
304  return element->startOffset() == offset;
305 }
306 
311 inline bool excludesOffset(const vector<EbmlElement *> &elements, std::uint64_t offset)
312 {
313  return find_if(elements.cbegin(), elements.cend(), std::bind(sameOffset, offset, _1)) == elements.cend();
314 }
315 
317 {
318  for (const auto &entry : m_editionEntries) {
319  const auto &chapters = entry->chapters();
320  if (index < chapters.size()) {
321  return chapters[index].get();
322  } else {
323  index -= chapters.size();
324  }
325  }
326  return nullptr;
327 }
328 
330 {
331  size_t count = 0;
332  for (const auto &entry : m_editionEntries) {
333  count += entry->chapters().size();
334  }
335  return count;
336 }
337 
339 {
340  // generate unique ID
341  static const auto randomEngine(
342  default_random_engine(static_cast<default_random_engine::result_type>(chrono::system_clock::now().time_since_epoch().count())));
343  std::uint64_t attachmentId;
344  auto dice(bind(uniform_int_distribution<decltype(attachmentId)>(), randomEngine));
345  std::uint8_t tries = 0;
346 generateRandomId:
347  attachmentId = dice();
348  if (tries < 0xFF) {
349  for (const auto &attachment : m_attachments) {
350  if (attachmentId == attachment->id()) {
351  ++tries;
352  goto generateRandomId;
353  }
354  }
355  }
356  // create new attachment, set ID
357  m_attachments.emplace_back(make_unique<MatroskaAttachment>());
358  auto &attachment = m_attachments.back();
359  attachment->setId(attachmentId);
360  return attachment.get();
361 }
362 
368 {
369  if (!m_firstElement || m_segmentCount != 1) {
370  return ElementPosition::Keep;
371  }
372  const auto *const segmentElement = m_firstElement->siblingByIdIncludingThis(MatroskaIds::Segment, diag);
373  if (!segmentElement) {
374  return ElementPosition::Keep;
375  }
376  for (const EbmlElement *childElement = segmentElement->firstChild(); childElement; childElement = childElement->nextSibling()) {
377  if (childElement->id() == elementId) {
379  } else if (childElement->id() == MatroskaIds::Cluster) {
380  for (const auto &seekInfo : m_seekInfos) {
381  for (const auto &info : seekInfo->info()) {
382  if (info.first == elementId) {
384  }
385  }
386  }
387  return ElementPosition::Keep;
388  }
389  }
390  return ElementPosition::Keep;
391 }
392 
394 {
396 }
397 
399 {
401 }
402 
404 {
405  CPP_UTILITIES_UNUSED(progress)
406 
407  static const string context("parsing header of Matroska container");
408  // reset old results
409  m_firstElement = make_unique<EbmlElement>(*this, startOffset());
410  m_additionalElements.clear();
411  m_tracksElements.clear();
412  m_segmentInfoElements.clear();
413  m_tagsElements.clear();
414  m_seekInfos.clear();
415  m_segmentCount = 0;
416  std::uint64_t currentOffset = 0;
417  vector<MatroskaSeekInfo>::difference_type seekInfosIndex = 0;
418 
419  // loop through all top level elements
420  for (EbmlElement *topLevelElement = m_firstElement.get(); topLevelElement; topLevelElement = topLevelElement->nextSibling()) {
421  try {
422  topLevelElement->parse(diag);
423  switch (topLevelElement->id()) {
424  case EbmlIds::Header:
425  for (EbmlElement *subElement = topLevelElement->firstChild(); subElement; subElement = subElement->nextSibling()) {
426  try {
427  subElement->parse(diag);
428  switch (subElement->id()) {
429  case EbmlIds::Version:
430  m_version = subElement->readUInteger();
431  break;
433  m_readVersion = subElement->readUInteger();
434  break;
435  case EbmlIds::DocType:
436  m_doctype = subElement->readString();
437  break;
439  m_doctypeVersion = subElement->readUInteger();
440  break;
442  m_doctypeReadVersion = subElement->readUInteger();
443  break;
445  m_maxIdLength = subElement->readUInteger();
446  if (m_maxIdLength > EbmlElement::maximumIdLengthSupported()) {
447  diag.emplace_back(DiagLevel::Critical,
448  argsToString("Maximum EBML element ID length greater than ", EbmlElement::maximumIdLengthSupported(),
449  " bytes is not supported."),
450  context);
451  throw InvalidDataException();
452  }
453  break;
455  m_maxSizeLength = subElement->readUInteger();
456  if (m_maxSizeLength > EbmlElement::maximumSizeLengthSupported()) {
457  diag.emplace_back(DiagLevel::Critical,
458  argsToString("Maximum EBML element size length greater than ", EbmlElement::maximumSizeLengthSupported(),
459  " bytes is not supported."),
460  context);
461  throw InvalidDataException();
462  }
463  break;
464  }
465  } catch (const Failure &) {
466  diag.emplace_back(DiagLevel::Critical, "Unable to parse all children of EBML header.", context);
467  break;
468  }
469  }
470  break;
472  ++m_segmentCount;
473  for (EbmlElement *subElement = topLevelElement->firstChild(); subElement; subElement = subElement->nextSibling()) {
474  try {
475  subElement->parse(diag);
476  switch (subElement->id()) {
478  m_seekInfos.emplace_back(make_unique<MatroskaSeekInfo>());
479  m_seekInfos.back()->parse(subElement, diag);
480  break;
481  case MatroskaIds::Tracks:
482  if (excludesOffset(m_tracksElements, subElement->startOffset())) {
483  m_tracksElements.push_back(subElement);
484  }
485  break;
487  if (excludesOffset(m_segmentInfoElements, subElement->startOffset())) {
488  m_segmentInfoElements.push_back(subElement);
489  }
490  break;
491  case MatroskaIds::Tags:
492  if (excludesOffset(m_tagsElements, subElement->startOffset())) {
493  m_tagsElements.push_back(subElement);
494  }
495  break;
497  if (excludesOffset(m_chaptersElements, subElement->startOffset())) {
498  m_chaptersElements.push_back(subElement);
499  }
500  break;
502  if (excludesOffset(m_attachmentsElements, subElement->startOffset())) {
503  m_attachmentsElements.push_back(subElement);
504  }
505  break;
507  // stop as soon as the first cluster has been reached if all relevant information has been gathered
508  // -> take elements from seek tables within this segment into account
509  for (auto i = m_seekInfos.cbegin() + seekInfosIndex, end = m_seekInfos.cend(); i != end; ++i, ++seekInfosIndex) {
510  for (const auto &infoPair : (*i)->info()) {
511  std::uint64_t offset = currentOffset + topLevelElement->dataOffset() + infoPair.second;
512  if (offset >= fileInfo().size()) {
513  diag.emplace_back(DiagLevel::Critical,
514  argsToString("Offset (", offset, ") denoted by \"SeekHead\" element is invalid."), context);
515  } else {
516  auto element = make_unique<EbmlElement>(*this, offset);
517  try {
518  element->parse(diag);
519  if (element->id() != infoPair.first) {
520  diag.emplace_back(DiagLevel::Critical,
521  argsToString("ID of element ", element->idToString(), " at ", offset,
522  " does not match the ID denoted in the \"SeekHead\" element (0x",
523  numberToString(infoPair.first, 16u), ")."),
524  context);
525  }
526  switch (element->id()) {
528  if (excludesOffset(m_segmentInfoElements, offset)) {
529  m_additionalElements.emplace_back(move(element));
530  m_segmentInfoElements.emplace_back(m_additionalElements.back().get());
531  }
532  break;
533  case MatroskaIds::Tracks:
534  if (excludesOffset(m_tracksElements, offset)) {
535  m_additionalElements.emplace_back(move(element));
536  m_tracksElements.emplace_back(m_additionalElements.back().get());
537  }
538  break;
539  case MatroskaIds::Tags:
540  if (excludesOffset(m_tagsElements, offset)) {
541  m_additionalElements.emplace_back(move(element));
542  m_tagsElements.emplace_back(m_additionalElements.back().get());
543  }
544  break;
546  if (excludesOffset(m_chaptersElements, offset)) {
547  m_additionalElements.emplace_back(move(element));
548  m_chaptersElements.emplace_back(m_additionalElements.back().get());
549  }
550  break;
552  if (excludesOffset(m_attachmentsElements, offset)) {
553  m_additionalElements.emplace_back(move(element));
554  m_attachmentsElements.emplace_back(m_additionalElements.back().get());
555  }
556  break;
557  default:;
558  }
559  } catch (const Failure &) {
560  diag.emplace_back(DiagLevel::Critical,
561  argsToString("Can not parse element at ", offset, " (denoted using \"SeekHead\" element)."), context);
562  }
563  }
564  }
565  }
566  // -> stop if tracks and tags have been found or the file exceeds the max. size to fully process
567  if (((!m_tracksElements.empty() && !m_tagsElements.empty()) || fileInfo().size() > m_maxFullParseSize)
568  && !m_segmentInfoElements.empty()) {
569  goto finish;
570  }
571  break;
572  }
573  } catch (const Failure &) {
574  diag.emplace_back(DiagLevel::Critical, "Unable to parse all children of \"Segment\"-element.", context);
575  break;
576  }
577  }
578  currentOffset += topLevelElement->totalSize();
579  break;
580  default:;
581  }
582  } catch (const Failure &) {
583  diag.emplace_back(
584  DiagLevel::Critical, argsToString("Unable to parse top-level element at ", topLevelElement->startOffset(), '.'), context);
585  break;
586  }
587  }
588 
589  // finally parse the "Info"-element and fetch "EditionEntry"-elements
590 finish:
591  try {
592  parseSegmentInfo(diag);
593  } catch (const Failure &) {
594  diag.emplace_back(DiagLevel::Critical, "Unable to parse EBML (segment) \"Info\"-element.", context);
595  }
596 }
597 
607 void MatroskaContainer::parseSegmentInfo(Diagnostics &diag)
608 {
609  if (m_segmentInfoElements.empty()) {
610  throw NoDataFoundException();
611  }
612  m_duration = TimeSpan();
613  for (EbmlElement *element : m_segmentInfoElements) {
614  element->parse(diag);
615  EbmlElement *subElement = element->firstChild();
616  double rawDuration = 0.0;
617  std::uint64_t timeScale = 1000000;
618  bool hasTitle = false;
619  while (subElement) {
620  subElement->parse(diag);
621  switch (subElement->id()) {
622  case MatroskaIds::Title:
623  m_titles.emplace_back(subElement->readString());
624  hasTitle = true;
625  break;
627  rawDuration = subElement->readFloat();
628  break;
630  timeScale = subElement->readUInteger();
631  break;
632  }
633  subElement = subElement->nextSibling();
634  }
635  // add empty string as title for segment if no
636  // "Title"-element has been specified
637  if (!hasTitle) {
638  m_titles.emplace_back();
639  }
640  if (rawDuration > 0.0) {
641  m_duration += TimeSpan::fromSeconds(rawDuration * static_cast<double>(timeScale) / 1000000000.0);
642  }
643  }
644 }
645 
651 void MatroskaContainer::readTrackStatisticsFromTags(Diagnostics &diag)
652 {
653  if (tracks().empty() || tags().empty()) {
654  return;
655  }
656  for (const auto &track : tracks()) {
658  }
659 }
660 
662 {
663  CPP_UTILITIES_UNUSED(progress)
664 
665  static const string context("parsing tags of Matroska container");
666  for (EbmlElement *element : m_tagsElements) {
667  try {
668  element->parse(diag);
669  for (EbmlElement *subElement = element->firstChild(); subElement; subElement = subElement->nextSibling()) {
670  subElement->parse(diag);
671  switch (subElement->id()) {
672  case MatroskaIds::Tag:
673  m_tags.emplace_back(make_unique<MatroskaTag>());
674  try {
675  m_tags.back()->parse(*subElement, diag);
676  } catch (const NoDataFoundException &) {
677  m_tags.pop_back();
678  } catch (const Failure &) {
679  diag.emplace_back(DiagLevel::Critical, argsToString("Unable to parse tag ", m_tags.size(), '.'), context);
680  }
681  break;
682  case EbmlIds::Crc32:
683  case EbmlIds::Void:
684  break;
685  default:
686  diag.emplace_back(DiagLevel::Warning, "\"Tags\"-element contains unknown child. It will be ignored.", context);
687  }
688  }
689  } catch (const Failure &) {
690  diag.emplace_back(DiagLevel::Critical, "Element structure seems to be invalid.", context);
691  readTrackStatisticsFromTags(diag);
692  throw;
693  }
694  }
695  readTrackStatisticsFromTags(diag);
696 }
697 
699 {
700  static const string context("parsing tracks of Matroska container");
701  for (EbmlElement *element : m_tracksElements) {
702  try {
703  element->parse(diag);
704  for (EbmlElement *subElement = element->firstChild(); subElement; subElement = subElement->nextSibling()) {
705  subElement->parse(diag);
706  switch (subElement->id()) {
708  m_tracks.emplace_back(make_unique<MatroskaTrack>(*subElement));
709  try {
710  m_tracks.back()->parseHeader(diag, progress);
711  } catch (const NoDataFoundException &) {
712  m_tracks.pop_back();
713  } catch (const Failure &) {
714  diag.emplace_back(DiagLevel::Critical, argsToString("Unable to parse track ", m_tracks.size(), '.'), context);
715  }
716  break;
717  case EbmlIds::Crc32:
718  case EbmlIds::Void:
719  break;
720  default:
721  diag.emplace_back(DiagLevel::Warning,
722  "\"Tracks\"-element contains unknown child element \"" % subElement->idToString() + "\". It will be ignored.", context);
723  }
724  }
725  } catch (const Failure &) {
726  diag.emplace_back(DiagLevel::Critical, "Element structure seems to be invalid.", context);
727  readTrackStatisticsFromTags(diag);
728  throw;
729  }
730  }
731  readTrackStatisticsFromTags(diag);
732 }
733 
735 {
736  static const string context("parsing editions/chapters of Matroska container");
737  for (EbmlElement *element : m_chaptersElements) {
738  try {
739  element->parse(diag);
740  for (EbmlElement *subElement = element->firstChild(); subElement; subElement = subElement->nextSibling()) {
741  subElement->parse(diag);
742  switch (subElement->id()) {
744  m_editionEntries.emplace_back(make_unique<MatroskaEditionEntry>(subElement));
745  try {
746  m_editionEntries.back()->parseNested(diag, progress);
747  } catch (const NoDataFoundException &) {
748  m_editionEntries.pop_back();
749  } catch (const Failure &) {
750  diag.emplace_back(DiagLevel::Critical, argsToString("Unable to parse edition entry ", m_editionEntries.size(), '.'), context);
751  }
752  break;
753  case EbmlIds::Crc32:
754  case EbmlIds::Void:
755  break;
756  default:
757  diag.emplace_back(DiagLevel::Warning,
758  "\"Chapters\"-element contains unknown child element \"" % subElement->idToString() + "\". It will be ignored.", context);
759  }
760  }
761  } catch (const Failure &) {
762  diag.emplace_back(DiagLevel::Critical, "Element structure seems to be invalid.", context);
763  throw;
764  }
765  }
766 }
767 
769 {
770  CPP_UTILITIES_UNUSED(progress)
771 
772  static const string context("parsing attachments of Matroska container");
773  for (EbmlElement *element : m_attachmentsElements) {
774  try {
775  element->parse(diag);
776  for (EbmlElement *subElement = element->firstChild(); subElement; subElement = subElement->nextSibling()) {
777  subElement->parse(diag);
778  switch (subElement->id()) {
780  m_attachments.emplace_back(make_unique<MatroskaAttachment>());
781  try {
782  m_attachments.back()->parse(subElement, diag);
783  } catch (const NoDataFoundException &) {
784  m_attachments.pop_back();
785  } catch (const Failure &) {
786  diag.emplace_back(DiagLevel::Critical, argsToString("Unable to parse attached file ", m_attachments.size(), '.'), context);
787  }
788  break;
789  case EbmlIds::Crc32:
790  case EbmlIds::Void:
791  break;
792  default:
793  diag.emplace_back(DiagLevel::Warning,
794  "\"Attachments\"-element contains unknown child element \"" % subElement->idToString() + "\". It will be ignored.", context);
795  }
796  }
797  } catch (const Failure &) {
798  diag.emplace_back(DiagLevel::Critical, "Element structure seems to be invalid.", context);
799  throw;
800  }
801  }
802 }
803 
805 struct SegmentData {
808  : hasCrc32(false)
809  , cuesElement(nullptr)
810  , infoDataSize(0)
811  , firstClusterElement(nullptr)
812  , clusterEndOffset(0)
813  , startOffset(0)
814  , newPadding(0)
815  , totalDataSize(0)
816  , totalSize(0)
817  , newDataOffset(0)
819  {
820  }
821 
823  bool hasCrc32;
831  std::uint64_t infoDataSize;
833  vector<std::uint64_t> clusterSizes;
837  std::uint64_t clusterEndOffset;
839  std::uint64_t startOffset;
841  std::uint64_t newPadding;
843  std::uint64_t totalDataSize;
845  std::uint64_t totalSize;
847  std::uint64_t newDataOffset;
849  std::uint8_t sizeDenotationLength;
850 };
851 
853 {
854  static const string context("making Matroska container");
855  progress.updateStep("Calculating element sizes ...");
856 
857  // basic validation of original file
858  if (!isHeaderParsed()) {
859  diag.emplace_back(DiagLevel::Critical, "The header has not been parsed yet.", context);
860  throw InvalidDataException();
861  }
862  switch (fileInfo().attachmentsParsingStatus()) {
863  case ParsingStatus::Ok:
865  break;
866  default:
867  diag.emplace_back(DiagLevel::Critical, "Attachments have to be parsed without critical errors before changes can be applied.", context);
868  throw InvalidDataException();
869  }
870 
871  // define variables for parsing the elements of the original file
872  EbmlElement *level0Element = firstElement();
873  if (!level0Element) {
874  diag.emplace_back(DiagLevel::Critical, "No EBML elements could be found.", context);
875  throw InvalidDataException();
876  }
877  EbmlElement *level1Element, *level2Element;
878 
879  // define variables needed for precalculation of "Tags"- and "Attachments"-element
880  vector<MatroskaTagMaker> tagMaker;
881  tagMaker.reserve(tags().size());
882  std::uint64_t tagElementsSize = 0;
883  std::uint64_t tagsSize;
884  vector<MatroskaAttachmentMaker> attachmentMaker;
885  attachmentMaker.reserve(m_attachments.size());
886  std::uint64_t attachedFileElementsSize = 0;
887  std::uint64_t attachmentsSize;
888  vector<MatroskaTrackHeaderMaker> trackHeaderMaker;
889  trackHeaderMaker.reserve(tracks().size());
890  std::uint64_t trackHeaderElementsSize = 0;
891  std::uint64_t trackHeaderSize;
892 
893  // define variables to store sizes, offsets and other information required to make a header and "Segment"-elements
894  // current segment index
895  unsigned int segmentIndex = 0;
896  // segment specific data
897  vector<SegmentData> segmentData;
898  // offset of the segment which is currently written / offset of "Cues"-element in segment
899  std::uint64_t offset;
900  // current total offset (including EBML header)
901  std::uint64_t totalOffset;
902  // current write offset (used to calculate positions)
903  std::uint64_t currentPosition = 0;
904  // holds the offsets of all CRC-32 elements and the length of the enclosing block
905  vector<tuple<std::uint64_t, std::uint64_t>> crc32Offsets;
906  // size length used to make size denotations
907  std::uint8_t sizeLength;
908  // sizes and offsets for cluster calculation
909  std::uint64_t clusterSize, clusterReadSize, clusterReadOffset;
910 
911  // define variables needed to manage file layout
912  // -> use the preferred tag position by default (might be changed later if not forced)
913  ElementPosition newTagPos = fileInfo().tagPosition();
914  // -> current tag position (determined later)
915  ElementPosition currentTagPos = ElementPosition::Keep;
916  // -> use the preferred cue position by default (might be changed later if not forced)
917  ElementPosition newCuesPos = fileInfo().indexPosition();
918  // --> current cue position (determined later)
919  ElementPosition currentCuesPos = ElementPosition::Keep;
920  // -> index of the last segment
921  unsigned int lastSegmentIndex = numeric_limits<unsigned int>::max();
922  // -> holds new padding
923  std::uint64_t newPadding;
924  // -> whether rewrite is required (always required when forced to rewrite)
925  bool rewriteRequired = fileInfo().isForcingRewrite() || !fileInfo().saveFilePath().empty();
926 
927  // calculate EBML header size
928  // -> sub element ID sizes
929  std::uint64_t ebmlHeaderDataSize = 2 * 7;
930  // -> content and size denotation length of numeric sub elements
931  for (auto headerValue :
932  initializer_list<std::uint64_t>{ m_version, m_readVersion, m_maxIdLength, m_maxSizeLength, m_doctypeVersion, m_doctypeReadVersion }) {
933  ebmlHeaderDataSize += sizeLength = EbmlElement::calculateUIntegerLength(headerValue);
934  ebmlHeaderDataSize += EbmlElement::calculateSizeDenotationLength(sizeLength);
935  }
936  // -> content and size denotation length of string sub elements
937  ebmlHeaderDataSize += m_doctype.size();
938  ebmlHeaderDataSize += EbmlElement::calculateSizeDenotationLength(m_doctype.size());
939  const std::uint64_t ebmlHeaderSize = 4 + EbmlElement::calculateSizeDenotationLength(ebmlHeaderDataSize) + ebmlHeaderDataSize;
940 
941  // calculate size of "WritingLib"-element
942  constexpr std::string_view muxingAppName = APP_NAME " v" APP_VERSION;
943  constexpr std::uint64_t muxingAppElementTotalSize = 2 + 1 + muxingAppName.size();
944 
945  // calculate size of "WritingApp"-element
946  const std::uint64_t writingAppElementDataSize
947  = fileInfo().writingApplication().empty() ? muxingAppName.size() : fileInfo().writingApplication().size();
948  const std::uint64_t writingAppElementTotalSize = 2 + 1 + writingAppElementDataSize;
949 
950  try {
951  // calculate size of "Tags"-element
952  for (auto &tag : tags()) {
953  try {
954  tagMaker.emplace_back(tag->prepareMaking(diag));
955  if (tagMaker.back().requiredSize() > 3) {
956  // a tag of 3 bytes size is empty and can be skipped
957  tagElementsSize += tagMaker.back().requiredSize();
958  }
959  } catch (const Failure &) {
960  }
961  }
962  tagsSize = tagElementsSize ? 4 + EbmlElement::calculateSizeDenotationLength(tagElementsSize) + tagElementsSize : 0;
963 
964  // calculate size of "Attachments"-element
965  for (auto &attachment : m_attachments) {
966  if (!attachment->isIgnored()) {
967  try {
968  attachmentMaker.emplace_back(attachment->prepareMaking(diag));
969  if (attachmentMaker.back().requiredSize() > 3) {
970  // an attachment of 3 bytes size is empty and can be skipped
971  attachedFileElementsSize += attachmentMaker.back().requiredSize();
972  }
973  } catch (const Failure &) {
974  }
975  }
976  }
977  attachmentsSize
978  = attachedFileElementsSize ? 4 + EbmlElement::calculateSizeDenotationLength(attachedFileElementsSize) + attachedFileElementsSize : 0;
979 
980  // calculate size of "Tracks"-element
981  for (auto &track : tracks()) {
982  try {
983  trackHeaderMaker.emplace_back(track->prepareMakingHeader(diag));
984  if (trackHeaderMaker.back().requiredSize() > 3) {
985  // a track header of 3 bytes size is empty and can be skipped
986  trackHeaderElementsSize += trackHeaderMaker.back().requiredSize();
987  }
988  } catch (const Failure &) {
989  }
990  }
991  trackHeaderSize
992  = trackHeaderElementsSize ? 4 + EbmlElement::calculateSizeDenotationLength(trackHeaderElementsSize) + trackHeaderElementsSize : 0;
993 
994  // inspect layout of original file
995  // - number of segments
996  // - position of tags relative to the media data
997  try {
998  for (bool firstClusterFound = false, firstTagFound = false; level0Element; level0Element = level0Element->nextSibling()) {
999  level0Element->parse(diag);
1000  switch (level0Element->id()) {
1001  case MatroskaIds::Segment:
1002  ++lastSegmentIndex;
1003  for (level1Element = level0Element->firstChild(); level1Element && !firstClusterFound && !firstTagFound;
1004  level1Element = level1Element->nextSibling()) {
1005  level1Element->parse(diag);
1006  switch (level1Element->id()) {
1007  case MatroskaIds::Tags:
1009  firstTagFound = true;
1010  break;
1011  case MatroskaIds::Cluster:
1012  firstClusterFound = true;
1013  }
1014  }
1015  if (firstTagFound) {
1016  currentTagPos = ElementPosition::BeforeData;
1017  } else if (firstClusterFound) {
1018  currentTagPos = ElementPosition::AfterData;
1019  }
1020  }
1021  }
1022 
1023  // now the number of segments is known -> allocate segment specific data
1024  segmentData.resize(lastSegmentIndex + 1);
1025 
1026  // now the current tag/cue position might be known
1027  if (newTagPos == ElementPosition::Keep) {
1028  if ((newTagPos = currentTagPos) == ElementPosition::Keep) {
1029  newTagPos = ElementPosition::BeforeData;
1030  }
1031  }
1032 
1033  } catch (const Failure &) {
1034  diag.emplace_back(DiagLevel::Critical,
1035  "Unable to parse content in top-level element at " % numberToString(level0Element->startOffset()) + " of original file.", context);
1036  throw;
1037  }
1038 
1039  progress.nextStepOrStop("Calculating offsets of elements before cluster ...");
1040  calculateSegmentData:
1041  // define variables to store sizes, offsets and other information required to make a header and "Segment"-elements
1042  // -> current "pretent" write offset
1043  std::uint64_t currentOffset = ebmlHeaderSize;
1044  // -> current read offset (used to calculate positions)
1045  std::uint64_t readOffset = 0;
1046  // -> index of current element during iteration
1047  unsigned int index;
1048 
1049  // if rewriting is required always use the preferred tag/cue position
1050  if (rewriteRequired) {
1051  newTagPos = fileInfo().tagPosition();
1052  if (newTagPos == ElementPosition::Keep) {
1053  if ((newTagPos = currentTagPos) == ElementPosition::Keep) {
1054  newTagPos = ElementPosition::BeforeData;
1055  }
1056  }
1057  newCuesPos = fileInfo().indexPosition();
1058  }
1059 
1060  // calculate sizes and other information required to make segments
1061  for (level0Element = firstElement(), currentPosition = newPadding = segmentIndex = 0; level0Element;
1062  level0Element = level0Element->nextSibling()) {
1063  switch (level0Element->id()) {
1064  case EbmlIds::Header:
1065  // header size has already been calculated
1066  break;
1067 
1068  case EbmlIds::Void:
1069  case EbmlIds::Crc32:
1070  // level 0 "Void"- and "Checksum"-elements are omitted
1071  break;
1072 
1073  case MatroskaIds::Segment: {
1074  // get reference to the current segment data instance
1075  SegmentData &segment = segmentData[segmentIndex];
1076 
1077  // parse original "Cues"-element (if present)
1078  if (!segment.cuesElement && (segment.cuesElement = level0Element->childById(MatroskaIds::Cues, diag))) {
1079  segment.cuesUpdater.parse(segment.cuesElement, diag);
1080  }
1081 
1082  // get first "Cluster"-element
1083  if (!segment.firstClusterElement) {
1084  segment.firstClusterElement = level0Element->childById(MatroskaIds::Cluster, diag);
1085  }
1086 
1087  // determine current/new cue position
1088  if (segment.cuesElement && segment.firstClusterElement) {
1089  currentCuesPos = segment.cuesElement->startOffset() < segment.firstClusterElement->startOffset() ? ElementPosition::BeforeData
1091  if (newCuesPos == ElementPosition::Keep) {
1092  newCuesPos = currentCuesPos;
1093  }
1094  } else if (newCuesPos == ElementPosition::Keep) {
1095  newCuesPos = ElementPosition::BeforeData;
1096  }
1097 
1098  // set start offset of the segment in the new file
1099  segment.startOffset = currentOffset;
1100 
1101  // check whether the segment has a CRC-32 element
1102  segment.hasCrc32 = level0Element->firstChild() && level0Element->firstChild()->id() == EbmlIds::Crc32;
1103 
1104  // precalculate the size of the segment
1105  calculateSegmentSize:
1106 
1107  // pretent writing "CRC-32"-element (which either present and 6 byte long or omitted)
1108  segment.totalDataSize = segment.hasCrc32 ? 6 : 0;
1109 
1110  // pretend writing "SeekHead"-element
1111  segment.totalDataSize += segment.seekInfo.actualSize();
1112 
1113  // pretend writing "SegmentInfo"-element
1114  for (level1Element = level0Element->childById(MatroskaIds::SegmentInfo, diag), index = 0; level1Element;
1115  level1Element = level1Element->siblingById(MatroskaIds::SegmentInfo, diag), ++index) {
1116  // update offset in "SeekHead"-element
1117  if (segment.seekInfo.push(index, MatroskaIds::SegmentInfo, currentPosition + segment.totalDataSize)) {
1118  goto calculateSegmentSize;
1119  } else {
1120  // add size of "SegmentInfo"-element
1121  // -> size of "MuxingApp"- and "WritingApp"-element
1122  segment.infoDataSize = muxingAppElementTotalSize + writingAppElementTotalSize;
1123  // -> add size of "Title"-element
1124  if (segmentIndex < m_titles.size()) {
1125  const auto &title = m_titles[segmentIndex];
1126  if (!title.empty()) {
1127  segment.infoDataSize += 2 + EbmlElement::calculateSizeDenotationLength(title.size()) + title.size();
1128  }
1129  }
1130  // -> add size of other children
1131  for (level2Element = level1Element->firstChild(); level2Element; level2Element = level2Element->nextSibling()) {
1132  level2Element->parse(diag);
1133  switch (level2Element->id()) {
1134  case EbmlIds::Void: // skipped
1135  case EbmlIds::Crc32: // skipped
1136  case MatroskaIds::Title: // calculated separately
1137  case MatroskaIds::MuxingApp: // calculated separately
1138  case MatroskaIds::WrittingApp: // calculated separately
1139  break;
1140  default:
1141  level2Element->makeBuffer();
1142  segment.infoDataSize += level2Element->totalSize();
1143  }
1144  }
1145  // -> calculate total size
1147  }
1148  }
1149 
1150  // pretend writing "Tracks"-element
1151  if (trackHeaderSize) {
1152  // update offsets in "SeekHead"-element
1153  if (segment.seekInfo.push(0, MatroskaIds::Tracks, currentPosition + segment.totalDataSize)) {
1154  goto calculateSegmentSize;
1155  } else {
1156  // add size of "Tracks"-element
1157  segment.totalDataSize += trackHeaderSize;
1158  }
1159  }
1160 
1161  // pretend writing "Chapters"-element
1162  for (level1Element = level0Element->childById(MatroskaIds::Chapters, diag), index = 0; level1Element;
1163  level1Element = level1Element->siblingById(MatroskaIds::Chapters, diag), ++index) {
1164  // update offset in "SeekHead"-element
1165  if (segment.seekInfo.push(index, MatroskaIds::Chapters, currentPosition + segment.totalDataSize)) {
1166  goto calculateSegmentSize;
1167  } else {
1168  // add size of element
1169  level1Element->makeBuffer();
1170  segment.totalDataSize += level1Element->totalSize();
1171  }
1172  }
1173 
1174  // "Tags"- and "Attachments"-element are written in either the first or the last segment
1175  // and either before "Cues"- and "Cluster"-elements or after these elements
1176  // depending on the desired tag position (at the front/at the end)
1177  if (newTagPos == ElementPosition::BeforeData && segmentIndex == 0) {
1178  // pretend writing "Tags"-element
1179  if (tagsSize) {
1180  // update offsets in "SeekHead"-element
1181  if (segment.seekInfo.push(0, MatroskaIds::Tags, currentPosition + segment.totalDataSize)) {
1182  goto calculateSegmentSize;
1183  } else {
1184  // add size of "Tags"-element
1185  segment.totalDataSize += tagsSize;
1186  }
1187  }
1188  // pretend writing "Attachments"-element
1189  if (attachmentsSize) {
1190  // update offsets in "SeekHead"-element
1191  if (segment.seekInfo.push(0, MatroskaIds::Attachments, currentPosition + segment.totalDataSize)) {
1192  goto calculateSegmentSize;
1193  } else {
1194  // add size of "Attachments"-element
1195  segment.totalDataSize += attachmentsSize;
1196  }
1197  }
1198  }
1199 
1200  offset = segment.totalDataSize; // save current offset (offset before "Cues"-element)
1201 
1202  // pretend writing "Cues"-element
1203  if (newCuesPos == ElementPosition::BeforeData && segment.cuesElement) {
1204  // update offset of "Cues"-element in "SeekHead"-element
1205  if (segment.seekInfo.push(0, MatroskaIds::Cues, currentPosition + segment.totalDataSize)) {
1206  goto calculateSegmentSize;
1207  } else {
1208  // add size of "Cues"-element
1209  progress.updateStep("Calculating cluster offsets and index size ...");
1210  addCuesElementSize:
1211  segment.totalDataSize += segment.cuesUpdater.totalSize();
1212  }
1213  } else {
1214  progress.updateStep("Calculating cluster offsets ...");
1215  }
1216 
1217  // decided whether it is necessary to rewrite the entire file (if not already rewriting)
1218  if (!rewriteRequired) {
1219  // find first "Cluster"-element
1220  if ((level1Element = segment.firstClusterElement)) {
1221  // just before the first "Cluster"-element
1222  // -> calculate total offset (excluding size denotation and incomplete index)
1223  totalOffset = currentOffset + 4 + segment.totalDataSize;
1224 
1225  if (totalOffset <= segment.firstClusterElement->startOffset()) {
1226  // the padding might be big enough, but
1227  // - the segment might become bigger (subsequent tags and attachments)
1228  // - the header size hasn't been taken into account yet
1229  // - seek information for first cluster and subsequent tags and attachments hasn't been taken into account
1230 
1231  // assume the size denotation length doesn't change -> use length from original file
1232  if (level0Element->headerSize() <= 4 || level0Element->headerSize() > 12) {
1233  // validate original header size
1234  diag.emplace_back(DiagLevel::Critical, "Header size of \"Segment\"-element from original file is invalid.", context);
1235  throw InvalidDataException();
1236  }
1237  segment.sizeDenotationLength = static_cast<std::uint8_t>(level0Element->headerSize() - 4u);
1238 
1239  nonRewriteCalculations:
1240  // pretend writing "Cluster"-elements assuming there is no rewrite required
1241  // -> update offset in "SeakHead"-element
1242  if (segment.seekInfo.push(
1243  0, MatroskaIds::Cluster, level1Element->startOffset() - 4 - segment.sizeDenotationLength - ebmlHeaderSize)) {
1244  goto calculateSegmentSize;
1245  }
1246  // -> update offset of "Cluster"-element in "Cues"-element and get end offset of last "Cluster"-element
1247  bool cuesInvalidated = false;
1248  for (index = 0; level1Element; level1Element = level1Element->siblingById(MatroskaIds::Cluster, diag), ++index) {
1249  clusterReadOffset = level1Element->startOffset() - level0Element->dataOffset() + readOffset;
1250  segment.clusterEndOffset = level1Element->endOffset();
1251  if (segment.cuesElement
1252  && segment.cuesUpdater.updateOffsets(
1253  clusterReadOffset, level1Element->startOffset() - 4 - segment.sizeDenotationLength - ebmlHeaderSize)
1254  && newCuesPos == ElementPosition::BeforeData) {
1255  cuesInvalidated = true;
1256  }
1257  // check whether aborted (because this loop might take some seconds to process)
1258  progress.stopIfAborted();
1259  // update the progress percentage (using offset / file size should be accurate enough)
1260  if (index % 50 == 0) {
1261  progress.updateStepPercentage(static_cast<std::uint8_t>(level1Element->dataOffset() * 100 / fileInfo().size()));
1262  }
1263  }
1264  if (cuesInvalidated) {
1265  segment.totalDataSize = offset;
1266  goto addCuesElementSize;
1267  }
1268  segment.totalDataSize = segment.clusterEndOffset - currentOffset - 4 - segment.sizeDenotationLength;
1269 
1270  // pretend writing "Cues"-element
1271  progress.updateStep("Calculating offsets of elements after cluster ...");
1272  if (newCuesPos == ElementPosition::AfterData && segment.cuesElement) {
1273  // update offset of "Cues"-element in "SeekHead"-element
1274  if (segment.seekInfo.push(0, MatroskaIds::Cues, currentPosition + segment.totalDataSize)) {
1275  goto calculateSegmentSize;
1276  } else {
1277  // add size of "Cues"-element
1278  segment.totalDataSize += segment.cuesUpdater.totalSize();
1279  }
1280  }
1281 
1282  if (newTagPos == ElementPosition::AfterData && segmentIndex == lastSegmentIndex) {
1283  // pretend writing "Tags"-element
1284  if (tagsSize) {
1285  // update offsets in "SeekHead"-element
1286  if (segment.seekInfo.push(0, MatroskaIds::Tags, currentPosition + segment.totalDataSize)) {
1287  goto calculateSegmentSize;
1288  } else {
1289  // add size of "Tags"-element
1290  segment.totalDataSize += tagsSize;
1291  }
1292  }
1293  // pretend writing "Attachments"-element
1294  if (attachmentsSize) {
1295  // update offsets in "SeekHead"-element
1296  if (segment.seekInfo.push(0, MatroskaIds::Attachments, currentPosition + segment.totalDataSize)) {
1297  goto calculateSegmentSize;
1298  } else {
1299  // add size of "Attachments"-element
1300  segment.totalDataSize += attachmentsSize;
1301  }
1302  }
1303  }
1304 
1305  // calculate total offset again (taking everything into account)
1306  // -> check whether assumed size denotation was correct
1307  if (segment.sizeDenotationLength != (sizeLength = EbmlElement::calculateSizeDenotationLength(segment.totalDataSize))) {
1308  // assumption was wrong -> recalculate with new length
1309  segment.sizeDenotationLength = sizeLength;
1310  level1Element = segment.firstClusterElement;
1311  goto nonRewriteCalculations;
1312  }
1313 
1314  totalOffset = currentOffset + 4 + sizeLength + offset;
1315  // offset does not include size of "Cues"-element
1316  if (newCuesPos == ElementPosition::BeforeData) {
1317  totalOffset += segment.cuesUpdater.totalSize();
1318  }
1319  if (totalOffset <= segment.firstClusterElement->startOffset()) {
1320  // calculate new padding
1321  if (segment.newPadding != 1) {
1322  // "Void"-element is at least 2 byte long -> can't add 1 byte padding
1323  newPadding += (segment.newPadding = segment.firstClusterElement->startOffset() - totalOffset);
1324  } else {
1325  rewriteRequired = true;
1326  }
1327  } else {
1328  rewriteRequired = true;
1329  }
1330  } else {
1331  rewriteRequired = true;
1332  }
1333  } else {
1334  diag.emplace_back(DiagLevel::Warning, argsToString("There are no clusters in segment ", segmentIndex, "."), context);
1335  }
1336 
1337  if (rewriteRequired) {
1338  if (newTagPos != ElementPosition::AfterData
1339  && (!fileInfo().forceTagPosition()
1340  || (fileInfo().tagPosition() == ElementPosition::Keep && currentTagPos == ElementPosition::Keep))) {
1341  // rewriting might be avoided by writing the tags at the end
1342  newTagPos = ElementPosition::AfterData;
1343  rewriteRequired = false;
1344  } else if (newCuesPos != ElementPosition::AfterData
1345  && (!fileInfo().forceIndexPosition()
1346  || (fileInfo().indexPosition() == ElementPosition::Keep && currentCuesPos == ElementPosition::Keep))) {
1347  // rewriting might be avoided by writing the cues at the end
1348  newCuesPos = ElementPosition::AfterData;
1349  rewriteRequired = false;
1350  }
1351  // do calculations again for rewriting / changed element order
1352  goto calculateSegmentData;
1353  }
1354  } else {
1355  // if rewrite is required, pretend writing the remaining elements to compute total segment size
1356 
1357  // pretend writing "Void"-element (only if there is at least one "Cluster"-element in the segment)
1358  if (!segmentIndex && rewriteRequired && (level1Element = level0Element->childById(MatroskaIds::Cluster, diag))) {
1359  // simply use the preferred padding
1360  segment.totalDataSize += (segment.newPadding = newPadding = fileInfo().preferredPadding());
1361  }
1362 
1363  // pretend writing "Cluster"-element
1364  segment.clusterSizes.clear();
1365  bool cuesInvalidated = false;
1366  for (index = 0; level1Element; level1Element = level1Element->siblingById(MatroskaIds::Cluster, diag), ++index) {
1367  // update offset of "Cluster"-element in "Cues"-element
1368  clusterReadOffset = level1Element->startOffset() - level0Element->dataOffset() + readOffset;
1369  if (segment.cuesElement && segment.cuesUpdater.updateOffsets(clusterReadOffset, currentPosition + segment.totalDataSize)
1370  && newCuesPos == ElementPosition::BeforeData) {
1371  cuesInvalidated = true;
1372  } else {
1373  if (index == 0 && segment.seekInfo.push(index, MatroskaIds::Cluster, currentPosition + segment.totalDataSize)) {
1374  goto calculateSegmentSize;
1375  } else {
1376  // add size of "Cluster"-element
1377  clusterSize = clusterReadSize = 0;
1378  for (level2Element = level1Element->firstChild(); level2Element; level2Element = level2Element->nextSibling()) {
1379  level2Element->parse(diag);
1380  if (segment.cuesElement
1381  && segment.cuesUpdater.updateRelativeOffsets(clusterReadOffset, clusterReadSize, clusterSize)
1382  && newCuesPos == ElementPosition::BeforeData) {
1383  cuesInvalidated = true;
1384  }
1385  switch (level2Element->id()) {
1386  case EbmlIds::Void:
1387  case EbmlIds::Crc32:
1388  break;
1389  case MatroskaIds::Position:
1390  clusterSize += 1u + 1u + EbmlElement::calculateUIntegerLength(currentPosition + segment.totalDataSize);
1391  break;
1392  default:
1393  clusterSize += level2Element->totalSize();
1394  }
1395  clusterReadSize += level2Element->totalSize();
1396  }
1397  segment.clusterSizes.push_back(clusterSize);
1398  segment.totalDataSize += 4u + EbmlElement::calculateSizeDenotationLength(clusterSize) + clusterSize;
1399  }
1400  }
1401  // check whether aborted (because this loop might take some seconds to process)
1402  progress.stopIfAborted();
1403  // update the progress percentage (using offset / file size should be accurate enough)
1404  if ((index % 50 == 0) && fileInfo().size()) {
1405  progress.updateStepPercentage(static_cast<std::uint8_t>(level1Element->dataOffset() * 100 / fileInfo().size()));
1406  }
1407  // TODO: reduce code duplication for aborting and progress updates
1408  }
1409  // check whether the total size of the "Cues"-element has been invalidated and recompute cluster if required
1410  if (cuesInvalidated) {
1411  // reset element size to previously saved offset of "Cues"-element
1412  segment.totalDataSize = offset;
1413  goto addCuesElementSize;
1414  }
1415 
1416  // pretend writing "Cues"-element
1417  progress.updateStep("Calculating offsets of elements after cluster ...");
1418  if (newCuesPos == ElementPosition::AfterData && segment.cuesElement) {
1419  // update offset of "Cues"-element in "SeekHead"-element
1420  if (segment.seekInfo.push(0, MatroskaIds::Cues, currentPosition + segment.totalDataSize)) {
1421  goto calculateSegmentSize;
1422  } else {
1423  // add size of "Cues"-element
1424  segment.totalDataSize += segment.cuesUpdater.totalSize();
1425  }
1426  }
1427 
1428  // "Tags"- and "Attachments"-element are written in either the first or the last segment
1429  // and either before "Cues"- and "Cluster"-elements or after these elements
1430  // depending on the desired tag position (at the front/at the end)
1431  if (newTagPos == ElementPosition::AfterData && segmentIndex == lastSegmentIndex) {
1432  // pretend writing "Tags"-element
1433  if (tagsSize) {
1434  // update offsets in "SeekHead"-element
1435  if (segment.seekInfo.push(0, MatroskaIds::Tags, currentPosition + segment.totalDataSize)) {
1436  goto calculateSegmentSize;
1437  } else {
1438  // add size of "Tags"-element
1439  segment.totalDataSize += tagsSize;
1440  }
1441  }
1442  // pretend writing "Attachments"-element
1443  if (attachmentsSize) {
1444  // update offsets in "SeekHead"-element
1445  if (segment.seekInfo.push(0, MatroskaIds::Attachments, currentPosition + segment.totalDataSize)) {
1446  goto calculateSegmentSize;
1447  } else {
1448  // add size of "Attachments"-element
1449  segment.totalDataSize += attachmentsSize;
1450  }
1451  }
1452  }
1453  }
1454 
1455  // increase the current segment index
1456  ++segmentIndex;
1457 
1458  // increase write offsets by the size of the segment which size has just been computed
1460  currentPosition += segment.totalSize;
1461  currentOffset += segment.totalSize;
1462 
1463  // increase the read offset by the size of the segment read from the original file
1464  readOffset += level0Element->totalSize();
1465 
1466  break;
1467  }
1468  default:
1469  // just copy any unknown top-level elements
1470  diag.emplace_back(DiagLevel::Warning,
1471  "The top-level element \"" % level0Element->idToString() + "\" of the original file is unknown and will just be copied.",
1472  context);
1473  currentOffset += level0Element->totalSize();
1474  readOffset += level0Element->totalSize();
1475  }
1476  }
1477 
1478  if (!rewriteRequired) {
1479  // check whether the new padding is ok according to specifications
1480  if ((rewriteRequired = (newPadding > fileInfo().maxPadding() || newPadding < fileInfo().minPadding()))) {
1481  // need to recalculate segment data for rewrite
1482  goto calculateSegmentData;
1483  }
1484  }
1485 
1486  } catch (const OperationAbortedException &) {
1487  diag.emplace_back(DiagLevel::Information, "Applying new tag information has been aborted.", context);
1488  throw;
1489  } catch (const Failure &) {
1490  diag.emplace_back(DiagLevel::Critical, "Parsing the original file failed.", context);
1491  throw;
1492  } catch (const std::ios_base::failure &failure) {
1493  diag.emplace_back(DiagLevel::Critical, argsToString("An IO error occurred when parsing the original file: ", failure.what()), context);
1494  throw;
1495  }
1496 
1497  // setup stream(s) for writing
1498  // -> update status
1499  progress.nextStepOrStop("Preparing streams ...");
1500 
1501  // -> define variables needed to handle output stream and backup stream (required when rewriting the file)
1502  string backupPath;
1503  NativeFileStream &outputStream = fileInfo().stream();
1504  NativeFileStream backupStream; // create a stream to open the backup/original file for the case rewriting the file is required
1505  BinaryWriter outputWriter(&outputStream);
1506  char buff[8]; // buffer used to make size denotations
1507 
1508  if (rewriteRequired) {
1509  if (fileInfo().saveFilePath().empty()) {
1510  // move current file to temp dir and reopen it as backupStream, recreate original file
1511  try {
1512  BackupHelper::createBackupFile(fileInfo().backupDirectory(), fileInfo().path(), backupPath, outputStream, backupStream);
1513  // recreate original file, define buffer variables
1514  outputStream.open(BasicFileInfo::pathForOpen(fileInfo().path()).data(), ios_base::out | ios_base::binary | ios_base::trunc);
1515  } catch (const std::ios_base::failure &failure) {
1516  diag.emplace_back(
1517  DiagLevel::Critical, argsToString("Creation of temporary file (to rewrite the original file) failed: ", failure.what()), context);
1518  throw;
1519  }
1520  } else {
1521  // open the current file as backupStream and create a new outputStream at the specified "save file path"
1522  try {
1523  backupStream.exceptions(ios_base::badbit | ios_base::failbit);
1524  backupStream.open(BasicFileInfo::pathForOpen(fileInfo().path()).data(), ios_base::in | ios_base::binary);
1525  fileInfo().close();
1526  outputStream.open(BasicFileInfo::pathForOpen(fileInfo().saveFilePath()).data(), ios_base::out | ios_base::binary | ios_base::trunc);
1527  } catch (const std::ios_base::failure &failure) {
1528  diag.emplace_back(DiagLevel::Critical, argsToString("Opening streams to write output file failed: ", failure.what()), context);
1529  throw;
1530  }
1531  }
1532 
1533  // set backup stream as associated input stream since we need the original elements to write the new file
1534  setStream(backupStream);
1535 
1536  // TODO: reduce code duplication
1537 
1538  } else { // !rewriteRequired
1539  // buffer currently assigned attachments
1540  for (auto &maker : attachmentMaker) {
1541  maker.bufferCurrentAttachments(diag);
1542  }
1543 
1544  // reopen original file to ensure it is opened for writing
1545  try {
1546  fileInfo().close();
1547  outputStream.open(fileInfo().path(), ios_base::in | ios_base::out | ios_base::binary);
1548  } catch (const std::ios_base::failure &failure) {
1549  diag.emplace_back(DiagLevel::Critical, argsToString("Opening the file with write permissions failed: ", failure.what()), context);
1550  throw;
1551  }
1552  }
1553 
1554  // start actual writing
1555  try {
1556  // write EBML header
1557  progress.nextStepOrStop("Writing EBML header ...");
1558  outputWriter.writeUInt32BE(EbmlIds::Header);
1559  sizeLength = EbmlElement::makeSizeDenotation(ebmlHeaderDataSize, buff);
1560  outputStream.write(buff, sizeLength);
1563  EbmlElement::makeSimpleElement(outputStream, EbmlIds::MaxIdLength, m_maxIdLength);
1564  EbmlElement::makeSimpleElement(outputStream, EbmlIds::MaxSizeLength, m_maxSizeLength);
1568 
1569  // iterates through all level 0 elements of the original file
1570  for (level0Element = firstElement(), segmentIndex = 0, currentPosition = 0; level0Element; level0Element = level0Element->nextSibling()) {
1571 
1572  // write all level 0 elements of the original file
1573  switch (level0Element->id()) {
1574  case EbmlIds::Header:
1575  // header has already been written -> skip it here
1576  break;
1577 
1578  case EbmlIds::Void:
1579  case EbmlIds::Crc32:
1580  // level 0 "Void"- and "Checksum"-elements are omitted
1581  break;
1582 
1583  case MatroskaIds::Segment: {
1584  // get reference to the current segment data instance
1585  SegmentData &segment = segmentData[segmentIndex];
1586 
1587  // write "Segment"-element actually
1588  progress.updateStep("Writing segment header ...");
1589  outputWriter.writeUInt32BE(MatroskaIds::Segment);
1590  sizeLength = EbmlElement::makeSizeDenotation(segment.totalDataSize, buff);
1591  outputStream.write(buff, sizeLength);
1592  segment.newDataOffset = offset = static_cast<std::uint64_t>(outputStream.tellp()); // store segment data offset here
1593 
1594  // write CRC-32 element ...
1595  if (segment.hasCrc32) {
1596  // ... if the original element had a CRC-32 element
1597  *buff = static_cast<char>(EbmlIds::Crc32);
1598  *(buff + 1) = static_cast<char>(0x84); // length denotation: 4 byte
1599  // set the value after writing the element
1600  crc32Offsets.emplace_back(outputStream.tellp(), segment.totalDataSize);
1601  outputStream.write(buff, 6);
1602  }
1603 
1604  // write "SeekHead"-element (except there is no seek information for the current segment)
1605  segment.seekInfo.make(outputStream, diag);
1606 
1607  // write "SegmentInfo"-element
1608  for (level1Element = level0Element->childById(MatroskaIds::SegmentInfo, diag); level1Element;
1609  level1Element = level1Element->siblingById(MatroskaIds::SegmentInfo, diag)) {
1610  // -> write ID and size
1611  outputWriter.writeUInt32BE(MatroskaIds::SegmentInfo);
1612  sizeLength = EbmlElement::makeSizeDenotation(segment.infoDataSize, buff);
1613  outputStream.write(buff, sizeLength);
1614  // -> write children
1615  for (level2Element = level1Element->firstChild(); level2Element; level2Element = level2Element->nextSibling()) {
1616  switch (level2Element->id()) {
1617  case EbmlIds::Void: // skipped
1618  case EbmlIds::Crc32: // skipped
1619  case MatroskaIds::Title: // written separately
1620  case MatroskaIds::MuxingApp: // written separately
1621  case MatroskaIds::WrittingApp: // written separately
1622  break;
1623  default:
1624  level2Element->copyBuffer(outputStream);
1625  level2Element->discardBuffer();
1626  }
1627  }
1628  // -> write "Title"-element
1629  if (segmentIndex < m_titles.size()) {
1630  const auto &title = m_titles[segmentIndex];
1631  if (!title.empty()) {
1633  }
1634  }
1635  // -> write "MuxingApp"- and "WritingApp"-element
1636  EbmlElement::makeSimpleElement(outputStream, MatroskaIds::MuxingApp, muxingAppName);
1638  fileInfo().writingApplication().empty() ? muxingAppName : fileInfo().writingApplication());
1639  }
1640 
1641  // write "Tracks"-element
1642  if (trackHeaderElementsSize) {
1643  outputWriter.writeUInt32BE(MatroskaIds::Tracks);
1644  sizeLength = EbmlElement::makeSizeDenotation(trackHeaderElementsSize, buff);
1645  outputStream.write(buff, sizeLength);
1646  for (auto &maker : trackHeaderMaker) {
1647  maker.make(outputStream);
1648  }
1649  }
1650 
1651  // write "Chapters"-element
1652  for (level1Element = level0Element->childById(MatroskaIds::Chapters, diag); level1Element;
1653  level1Element = level1Element->siblingById(MatroskaIds::Chapters, diag)) {
1654  level1Element->copyBuffer(outputStream);
1655  level1Element->discardBuffer();
1656  }
1657 
1658  if (newTagPos == ElementPosition::BeforeData && segmentIndex == 0) {
1659  // write "Tags"-element
1660  if (tagsSize) {
1661  outputWriter.writeUInt32BE(MatroskaIds::Tags);
1662  sizeLength = EbmlElement::makeSizeDenotation(tagElementsSize, buff);
1663  outputStream.write(buff, sizeLength);
1664  for (auto &maker : tagMaker) {
1665  maker.make(outputStream);
1666  }
1667  }
1668  // write "Attachments"-element
1669  if (attachmentsSize) {
1670  outputWriter.writeUInt32BE(MatroskaIds::Attachments);
1671  sizeLength = EbmlElement::makeSizeDenotation(attachedFileElementsSize, buff);
1672  outputStream.write(buff, sizeLength);
1673  for (auto &maker : attachmentMaker) {
1674  maker.make(outputStream, diag);
1675  }
1676  }
1677  }
1678 
1679  // write "Cues"-element
1680  if (newCuesPos == ElementPosition::BeforeData && segment.cuesElement) {
1681  segment.cuesUpdater.make(outputStream, diag);
1682  }
1683 
1684  // write padding / "Void"-element
1685  if (segment.newPadding) {
1686  // calculate length
1687  std::uint64_t voidLength;
1688  if (segment.newPadding < 64) {
1689  sizeLength = 1;
1690  *buff = static_cast<char>(voidLength = segment.newPadding - 2) | static_cast<char>(0x80);
1691  } else {
1692  sizeLength = 8;
1693  BE::getBytes(static_cast<std::uint64_t>((voidLength = segment.newPadding - 9) | 0x100000000000000), buff);
1694  }
1695  // write header
1696  outputWriter.writeByte(EbmlIds::Void);
1697  outputStream.write(buff, sizeLength);
1698  // write zeroes
1699  for (; voidLength; --voidLength) {
1700  outputStream.put(0);
1701  }
1702  }
1703 
1704  // write media data / "Cluster"-elements
1705  level1Element = level0Element->childById(MatroskaIds::Cluster, diag);
1706  if (rewriteRequired) {
1707  // update status, check whether the operation has been aborted
1708  progress.nextStepOrStop("Writing cluster ...",
1709  static_cast<std::uint8_t>((static_cast<std::uint64_t>(outputStream.tellp()) - offset) * 100 / segment.totalDataSize));
1710  // write "Cluster"-element
1711  auto clusterSizesIterator = segment.clusterSizes.cbegin();
1712  unsigned int index = 0;
1713  for (; level1Element; level1Element = level1Element->siblingById(MatroskaIds::Cluster, diag), ++clusterSizesIterator, ++index) {
1714  // calculate position of cluster in segment
1715  clusterSize = currentPosition + (static_cast<std::uint64_t>(outputStream.tellp()) - offset);
1716  // write header; checking whether clusterSizesIterator is valid shouldn't be necessary
1717  outputWriter.writeUInt32BE(MatroskaIds::Cluster);
1718  sizeLength = EbmlElement::makeSizeDenotation(*clusterSizesIterator, buff);
1719  outputStream.write(buff, sizeLength);
1720  // write children
1721  for (level2Element = level1Element->firstChild(); level2Element; level2Element = level2Element->nextSibling()) {
1722  switch (level2Element->id()) {
1723  case EbmlIds::Void:
1724  case EbmlIds::Crc32:
1725  break;
1726  case MatroskaIds::Position:
1727  EbmlElement::makeSimpleElement(outputStream, MatroskaIds::Position, clusterSize);
1728  break;
1729  default:
1730  level2Element->copyEntirely(outputStream, diag, nullptr);
1731  }
1732  }
1733  // update percentage, check whether the operation has been aborted
1734  progress.stopIfAborted();
1735  if (index % 50 == 0) {
1736  progress.updateStepPercentage(
1737  static_cast<std::uint8_t>((static_cast<std::uint64_t>(outputStream.tellp()) - offset) * 100 / segment.totalDataSize));
1738  }
1739  }
1740  } else {
1741  // can't just skip existing "Cluster"-elements: "Position"-elements must be updated
1742  progress.nextStepOrStop("Updating cluster ...",
1743  static_cast<std::uint8_t>((static_cast<std::uint64_t>(outputStream.tellp()) - offset) * 100 / segment.totalDataSize));
1744  for (; level1Element; level1Element = level1Element->nextSibling()) {
1745  for (level2Element = level1Element->firstChild(); level2Element; level2Element = level2Element->nextSibling()) {
1746  switch (level2Element->id()) {
1747  case MatroskaIds::Position:
1748  // calculate new position
1749  sizeLength = EbmlElement::makeUInteger(level1Element->startOffset() - segmentData.front().newDataOffset, buff,
1750  level2Element->dataSize() > 8 ? 8 : static_cast<std::uint8_t>(level2Element->dataSize()));
1751  // new position can only applied if it doesn't need more bytes than the previous position
1752  if (level2Element->dataSize() < sizeLength) {
1753  // can't update position -> void position elements ("Position"-elements seem a bit useless anyways)
1754  outputStream.seekp(static_cast<streamoff>(level2Element->startOffset()));
1755  outputStream.put(static_cast<char>(EbmlIds::Void));
1756  } else {
1757  // update position
1758  outputStream.seekp(static_cast<streamoff>(level2Element->dataOffset()));
1759  outputStream.write(buff, sizeLength);
1760  }
1761  break;
1762  default:;
1763  }
1764  }
1765  }
1766  // skip existing "Cluster"-elements
1767  outputStream.seekp(static_cast<streamoff>(segment.clusterEndOffset));
1768  }
1769 
1770  progress.updateStep("Writing segment tail ...");
1771 
1772  // write "Cues"-element
1773  if (newCuesPos == ElementPosition::AfterData && segment.cuesElement) {
1774  segment.cuesUpdater.make(outputStream, diag);
1775  }
1776 
1777  if (newTagPos == ElementPosition::AfterData && segmentIndex == lastSegmentIndex) {
1778  // write "Tags"-element
1779  if (tagsSize) {
1780  outputWriter.writeUInt32BE(MatroskaIds::Tags);
1781  sizeLength = EbmlElement::makeSizeDenotation(tagElementsSize, buff);
1782  outputStream.write(buff, sizeLength);
1783  for (auto &maker : tagMaker) {
1784  maker.make(outputStream);
1785  }
1786  }
1787  // write "Attachments"-element
1788  if (attachmentsSize) {
1789  outputWriter.writeUInt32BE(MatroskaIds::Attachments);
1790  sizeLength = EbmlElement::makeSizeDenotation(attachedFileElementsSize, buff);
1791  outputStream.write(buff, sizeLength);
1792  for (auto &maker : attachmentMaker) {
1793  maker.make(outputStream, diag);
1794  }
1795  }
1796  }
1797 
1798  // increase the current segment index
1799  ++segmentIndex;
1800 
1801  // increase write offsets by the size of the segment which has just been written
1802  currentPosition += segment.totalSize;
1803 
1804  break;
1805  }
1806  default:
1807  // just copy any unknown top-level elements
1808  level0Element->copyEntirely(outputStream, diag, nullptr);
1809  currentPosition += level0Element->totalSize();
1810  }
1811  }
1812 
1813  // reparse what is written so far
1814  progress.updateStep("Reparsing output file ...");
1815  if (rewriteRequired) {
1816  // report new size
1817  fileInfo().reportSizeChanged(static_cast<std::uint64_t>(outputStream.tellp()));
1818 
1819  // "save as path" is now the regular path
1820  if (!fileInfo().saveFilePath().empty()) {
1821  fileInfo().reportPathChanged(fileInfo().saveFilePath());
1822  fileInfo().setSaveFilePath(string());
1823  }
1824 
1825  // the outputStream needs to be reopened to be able to read again
1826  outputStream.close();
1827  outputStream.open(fileInfo().path(), ios_base::in | ios_base::out | ios_base::binary);
1828  setStream(outputStream);
1829  } else {
1830  const auto newSize = static_cast<std::uint64_t>(outputStream.tellp());
1831  if (newSize < fileInfo().size()) {
1832  // file is smaller after the modification -> truncate
1833  // -> close stream before truncating
1834  outputStream.close();
1835  // -> truncate file
1836  if (truncate(fileInfo().path().c_str(), static_cast<iostream::off_type>(newSize)) == 0) {
1837  fileInfo().reportSizeChanged(newSize);
1838  } else {
1839  diag.emplace_back(DiagLevel::Critical, "Unable to truncate the file.", context);
1840  }
1841  // -> reopen the stream again
1842  outputStream.open(fileInfo().path(), ios_base::in | ios_base::out | ios_base::binary);
1843  } else {
1844  // file is longer after the modification -> just report new size
1845  fileInfo().reportSizeChanged(newSize);
1846  }
1847  }
1848  reset();
1849  try {
1850  parseHeader(diag, progress);
1851  } catch (const OperationAbortedException &) {
1852  throw;
1853  } catch (const Failure &) {
1854  diag.emplace_back(DiagLevel::Critical, "Unable to reparse the header of the new file.", context);
1855  throw;
1856  }
1857 
1858  // update CRC-32 checksums
1859  if (!crc32Offsets.empty()) {
1860  progress.updateStep("Updating CRC-32 checksums ...");
1861  for (const auto &crc32Offset : crc32Offsets) {
1862  outputStream.seekg(static_cast<streamoff>(get<0>(crc32Offset) + 6));
1863  outputStream.seekp(static_cast<streamoff>(get<0>(crc32Offset) + 2));
1864  writer().writeUInt32LE(reader().readCrc32(get<1>(crc32Offset) - 6));
1865  }
1866  }
1867 
1868  // prevent deferring final write operations (to catch and handle possible errors here)
1869  outputStream.flush();
1870 
1871  // handle errors (which might have been occurred after renaming/creating backup file)
1872  } catch (...) {
1873  BackupHelper::handleFailureAfterFileModified(fileInfo(), backupPath, outputStream, backupStream, diag, context);
1874  }
1875 }
1876 
1877 } // namespace TagParser
The AbortableProgressFeedback class provides feedback about an ongoing operation via callbacks.
void stopIfAborted() const
Throws an OperationAbortedException if aborted.
void nextStepOrStop(const std::string &step, std::uint8_t stepPercentage=0)
Throws an OperationAbortedException if aborted; otherwise the data for the next step is set.
std::uint64_t id() const
Returns the ID of the attachment.
bool isIgnored() const
Returns whether the attachment is ignored/omitted when rewriting the container.
void setId(std::uint64_t id)
Sets the ID of the attachment.
std::uint64_t startOffset() const
Returns the start offset in the related stream.
std::vector< std::string > m_titles
bool isHeaderParsed() const
Returns an indication whether the header has been parsed yet.
void setStream(std::iostream &stream)
Sets the related stream.
std::uint32_t timeScale() const
Returns the time scale of the file if known; otherwise returns 0.
CppUtilities::BinaryWriter & writer()
Returns the related BinaryWriter.
void parseHeader(Diagnostics &diag, AbortableProgressFeedback &progress)
Parses the header if not parsed yet.
CppUtilities::BinaryReader & reader()
Returns the related BinaryReader.
CppUtilities::TimeSpan m_duration
void reportPathChanged(std::string_view newPath)
Call this function to report that the path changed.
std::uint64_t size() const
Returns size of the current file in bytes.
CppUtilities::NativeFileStream & stream()
Returns the std::fstream for the current instance.
Definition: basicfileinfo.h:85
void close()
A possibly opened std::fstream will be closed.
static std::string_view pathForOpen(std::string_view url)
Returns removes the "file:/" prefix from url to be able to pass it to functions like open(),...
void reportSizeChanged(std::uint64_t newSize)
Call this function to report that the size changed.
void updateStep(const std::string &step, std::uint8_t stepPercentage=0)
Updates the current step and invokes the first callback specified on construction.
void updateStepPercentage(std::uint8_t stepPercentage)
Updates the current step percentage and invokes the second callback specified on construction (or the...
The Diagnostics class is a container for DiagMessage.
Definition: diagnostics.h:156
The EbmlElement class helps to parse EBML files such as Matroska files.
Definition: ebmlelement.h:32
static std::uint8_t makeUInteger(std::uint64_t value, char *buff)
Writes value to buff.
static void makeSimpleElement(std::ostream &stream, IdentifierType id, std::uint64_t content)
Makes a simple EBML element.
static std::uint8_t calculateSizeDenotationLength(std::uint64_t size)
Returns the length of the size denotation for the specified size in byte.
std::string idToString() const
Converts the specified EBML ID to a printable string.
Definition: ebmlelement.h:71
static std::uint8_t makeSizeDenotation(std::uint64_t size, char *buff)
Makes the size denotation for the specified size and stores it to buff.
static std::uint8_t calculateUIntegerLength(std::uint64_t integer)
Returns the length of the specified unsigned integer in byte.
The class inherits from std::exception and serves as base class for exceptions thrown by the elements...
Definition: exceptions.h:11
The GenericContainer class helps parsing header, track, tag and chapter information of a file.
const std::vector< std::unique_ptr< MatroskaTrack > > & tracks() const
Returns the tracks of the file.
const std::vector< std::unique_ptr< MatroskaTag > > & tags() const
Returns the tags of the file.
EbmlElement * firstElement() const
Returns the first element of the file if available; otherwiese returns nullptr.
void reset() override
Discards all parsing results.
std::uint64_t startOffset() const
Returns the start offset in the related stream.
void discardBuffer()
Discards buffered data.
void copyEntirely(std::ostream &targetStream, Diagnostics &diag, AbortableProgressFeedback *progress)
Writes the entire element including all children to the specified targetStream.
std::uint32_t headerSize() const
Returns the header size of the element in byte.
std::uint64_t endOffset() const
Returns the offset of the first byte which doesn't belong to this element anymore.
const IdentifierType & id() const
Returns the element ID.
void copyBuffer(std::ostream &targetStream)
Copies buffered data to targetStream.
ImplementationType * childById(const IdentifierType &id, Diagnostics &diag)
Returns the first child with the specified id.
ImplementationType * nextSibling()
Returns the next sibling of the element.
ImplementationType * firstChild()
Returns the first child of the element.
static constexpr std::uint32_t maximumIdLengthSupported()
Returns the maximum id length supported by the class in byte.
DataSizeType dataSize() const
Returns the data size of the element in byte.
std::uint64_t totalSize() const
Returns the total size of the element.
void parse(Diagnostics &diag)
Parses the header information of the element which is read from the related stream at the start offse...
static constexpr std::uint32_t maximumSizeLengthSupported()
Returns the maximum size length supported by the class in byte.
std::uint64_t dataOffset() const
Returns the data offset of the element in the related stream.
void makeBuffer()
Buffers the element (header and data).
ImplementationType * siblingById(const IdentifierType &id, Diagnostics &diag)
Returns the first sibling with the specified id.
The exception that is thrown when the data to be parsed or to be made seems invalid and therefore can...
Definition: exceptions.h:25
Implementation of TagParser::AbstractAttachment for the Matroska container.
MatroskaAttachmentMaker prepareMaking(Diagnostics &diag)
Prepares making.
The MatroskaChapter class provides an implementation of AbstractAttachment for Matroska files.
void internalParseChapters(Diagnostics &diag, AbortableProgressFeedback &progress) override
Internally called to parse the chapters.
void internalParseHeader(Diagnostics &diag, AbortableProgressFeedback &progress) override
Internally called to parse the header.
std::size_t chapterCount() const override
Returns the number of chapters the container holds.
void internalParseTracks(Diagnostics &diag, AbortableProgressFeedback &progress) override
Internally called to parse the tracks.
ElementPosition determineTagPosition(Diagnostics &diag) const override
Determines the position of the tags inside the file.
MatroskaAttachment * createAttachment() override
Creates and returns a new attachment.
void reset() override
Discards all parsing results.
MatroskaAttachment * attachment(std::size_t index) override
Returns the attachment with the specified index.
void internalMakeFile(Diagnostics &diag, AbortableProgressFeedback &progress) override
Internally called to make the file.
MatroskaChapter * chapter(std::size_t index) override
Returns the chapter with the specified index.
ElementPosition determineIndexPosition(Diagnostics &diag) const override
Determines the position of the index.
void internalParseAttachments(Diagnostics &diag, AbortableProgressFeedback &progress) override
Internally called to parse the attachments.
ElementPosition determineElementPosition(std::uint64_t elementId, Diagnostics &diag) const
Determines the position of the element with the specified elementId.
void validateIndex(Diagnostics &diag, AbortableProgressFeedback &progress)
Validates the file index (cue entries).
void internalParseTags(Diagnostics &diag, AbortableProgressFeedback &progress) override
Internally called to parse the tags.
The MatroskaCuePositionUpdater class helps to rewrite the "Cues"-element with shifted positions.
Definition: matroskacues.h:64
std::uint64_t totalSize() const
Returns how many bytes will be written when calling the make() method.
bool updateOffsets(std::uint64_t originalOffset, std::uint64_t newOffset)
Sets the offset of the entries with the specified originalOffset to newOffset.
bool updateRelativeOffsets(std::uint64_t referenceOffset, std::uint64_t originalRelativeOffset, std::uint64_t newRelativeOffset)
Sets the relative offset of the entries with the specified originalRelativeOffset and the specified r...
void make(std::ostream &stream, Diagnostics &diag)
Writes the previously parsed "Cues"-element with updated positions to the specified stream.
void parse(EbmlElement *cuesElement, Diagnostics &diag)
Parses the specified cuesElement.
The MatroskaSeekInfo class helps parsing and making "SeekHead"-elements.
bool push(unsigned int index, EbmlElement::IdentifierType id, std::uint64_t offset)
Pushes the specified offset of an element with the specified id to the info.
void make(std::ostream &stream, Diagnostics &diag)
Writes a "SeekHead" element for the current instance to the specified stream.
std::uint64_t actualSize() const
Returns the number of bytes which will be written when calling the make() method.
Implementation of TagParser::Tag for the Matroska container.
Definition: matroskatag.h:58
MatroskaTagMaker prepareMaking(Diagnostics &diag)
Prepares making.
Definition: matroskatag.h:121
Implementation of TagParser::AbstractTrack for the Matroska container.
Definition: matroskatrack.h:48
void readStatisticsFromTags(const std::vector< std::unique_ptr< MatroskaTag >> &tags, Diagnostics &diag)
Reads track-specific statistics from the specified tags.
MatroskaTrackHeaderMaker prepareMakingHeader(Diagnostics &diag) const
Prepares making header.
Definition: matroskatrack.h:86
The MediaFileInfo class allows to read and write tag information providing a container/tag format ind...
Definition: mediafileinfo.h:74
bool isForcingRewrite() const
Returns whether forcing rewriting (when applying changes) is enabled.
void setSaveFilePath(std::string_view saveFilePath)
Sets the "save file path".
bool forceIndexPosition() const
Returns whether indexPosition() is forced.
const std::string & saveFilePath() const
Returns the "save file path" which has been set using setSaveFilePath().
std::size_t preferredPadding() const
Returns the padding to be written before the data block when applying changes and the file needs to b...
const std::string & writingApplication() const
Sets the writing application as container-level meta-data.
ElementPosition tagPosition() const
Returns the position (in the output file) where the tag information is written when applying changes.
bool forceTagPosition() const
Returns whether tagPosition() is forced.
ElementPosition indexPosition() const
Returns the position (in the output file) where the index is written when applying changes.
The exception that is thrown when the data to be parsed holds no parsable information (e....
Definition: exceptions.h:18
The exception that is thrown when an operation has been stopped and thus not successfully completed b...
Definition: exceptions.h:46
TAG_PARSER_EXPORT void createBackupFile(const std::string &backupDir, const std::string &originalPath, std::string &backupPath, CppUtilities::NativeFileStream &originalStream, CppUtilities::NativeFileStream &backupStream)
TAG_PARSER_EXPORT void handleFailureAfterFileModified(MediaFileInfo &mediaFileInfo, const std::string &backupPath, CppUtilities::NativeFileStream &outputStream, CppUtilities::NativeFileStream &backupStream, Diagnostics &diag, const std::string &context="making file")
constexpr TAG_PARSER_EXPORT std::string_view title()
Definition: matroskatagid.h:38
Contains all classes and functions of the TagInfo library.
Definition: aaccodebook.h:10
ElementPosition
Definition: settings.h:13
bool sameOffset(std::uint64_t offset, const EbmlElement *element)
Returns an indication whether offset equals the start offset of element.
bool excludesOffset(const vector< EbmlElement * > &elements, std::uint64_t offset)
Returns whether none of the specified elements have the specified offset.
The private SegmentData struct is used in MatroskaContainer::internalMakeFile() to store segment spec...
SegmentData()
Constructs a new segment data object.
bool hasCrc32
whether CRC-32 checksum is present
std::uint64_t newPadding
padding (in the new file)
std::uint64_t totalDataSize
total size of the segment data (in the new file, excluding header)
vector< std::uint64_t > clusterSizes
cluster sizes
std::uint8_t sizeDenotationLength
header size (in the new file)
std::uint64_t newDataOffset
data offset of the segment in the new file
std::uint64_t infoDataSize
size of the "SegmentInfo"-element
std::uint64_t startOffset
start offset (in the new file)
MatroskaSeekInfo seekInfo
used to make "SeekHead"-element
EbmlElement * firstClusterElement
first "Cluster"-element (original file)
std::uint64_t clusterEndOffset
end offset of last "Cluster"-element (original file)
MatroskaCuePositionUpdater cuesUpdater
used to make "Cues"-element
EbmlElement * cuesElement
"Cues"-element (original file)
std::uint64_t totalSize
total size of the segment data (in the new file, including header)