Tag Parser  8.2.0
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
mp4tagfield.cpp
Go to the documentation of this file.
1 #include "./mp4tagfield.h"
2 #include "./mp4atom.h"
3 #include "./mp4container.h"
4 #include "./mp4ids.h"
5 
6 #include "../exceptions.h"
7 
8 #include <c++utilities/conversion/stringbuilder.h>
9 #include <c++utilities/io/binaryreader.h>
10 #include <c++utilities/io/binarywriter.h>
11 
12 #include <algorithm>
13 #include <limits>
14 #include <memory>
15 
16 using namespace std;
17 using namespace IoUtilities;
18 using namespace ConversionUtilities;
19 
20 namespace TagParser {
21 
30 Mp4TagField::Mp4TagField()
31  : m_parsedRawDataType(RawDataType::Reserved)
32  , m_countryIndicator(0)
33  , m_langIndicator(0)
34 {
35 }
36 
40 Mp4TagField::Mp4TagField(IdentifierType id, const TagValue &value)
41  : TagField<Mp4TagField>(id, value)
42  , m_parsedRawDataType(RawDataType::Reserved)
43  , m_countryIndicator(0)
44  , m_langIndicator(0)
45 {
46 }
47 
57 Mp4TagField::Mp4TagField(const string &mean, const string &name, const TagValue &value)
58  : Mp4TagField(Mp4TagAtomIds::Extended, value)
59 {
60  m_name = name;
61  m_mean = mean;
62 }
63 
74 void Mp4TagField::reparse(Mp4Atom &ilstChild, Diagnostics &diag)
75 {
76  // prepare reparsing
77  using namespace Mp4AtomIds;
78  using namespace Mp4TagAtomIds;
79  string context("parsing MP4 tag field");
80  ilstChild.parse(diag); // ensure child has been parsed
81  setId(ilstChild.id());
82  context = "parsing MP4 tag field " + ilstChild.idToString();
83  iostream &stream = ilstChild.stream();
84  BinaryReader &reader = ilstChild.container().reader();
85  int dataAtomFound = 0, meanAtomFound = 0, nameAtomFound = 0;
86  for (Mp4Atom *dataAtom = ilstChild.firstChild(); dataAtom; dataAtom = dataAtom->nextSibling()) {
87  try {
88  dataAtom->parse(diag);
89  if (dataAtom->id() == Mp4AtomIds::Data) {
90  if (dataAtom->dataSize() < 8) {
91  diag.emplace_back(DiagLevel::Warning, "Truncated child atom \"data\" in tag atom (ilst child) found. (will be ignored)", context);
92  continue;
93  }
94  if (++dataAtomFound > 1) {
95  if (dataAtomFound == 2) {
96  diag.emplace_back(
97  DiagLevel::Warning, "Multiple \"data\" child atom in tag atom (ilst child) found. (will be ignored)", context);
98  }
99  continue;
100  }
101  stream.seekg(static_cast<streamoff>(dataAtom->dataOffset()));
102  if (reader.readByte() != 0) {
103  diag.emplace_back(DiagLevel::Warning,
104  "The version indicator byte is not zero, the tag atom might be unsupported and hence not be parsed correctly.", context);
105  }
106  setTypeInfo(m_parsedRawDataType = reader.readUInt24BE());
107  try { // try to show warning if parsed raw data type differs from expected raw data type for this atom id
108  const vector<uint32> expectedRawDataTypes = this->expectedRawDataTypes();
109  if (find(expectedRawDataTypes.cbegin(), expectedRawDataTypes.cend(), m_parsedRawDataType) == expectedRawDataTypes.cend()) {
110  diag.emplace_back(DiagLevel::Warning, "Unexpected data type indicator found.", context);
111  }
112  } catch (const Failure &) {
113  // tag id is unknown, it is not possible to validate parsed data type
114  }
115  m_countryIndicator = reader.readUInt16BE();
116  m_langIndicator = reader.readUInt16BE();
117  switch (m_parsedRawDataType) {
118  case RawDataType::Utf8:
119  case RawDataType::Utf16:
120  stream.seekg(static_cast<streamoff>(dataAtom->dataOffset() + 8));
121  value().assignText(reader.readString(dataAtom->dataSize() - 8),
123  break;
124  case RawDataType::Gif:
125  case RawDataType::Jpeg:
126  case RawDataType::Png:
127  case RawDataType::Bmp: {
128  switch (m_parsedRawDataType) {
129  case RawDataType::Gif:
130  value().setMimeType("image/gif");
131  break;
132  case RawDataType::Jpeg:
133  value().setMimeType("image/jpeg");
134  break;
135  case RawDataType::Png:
136  value().setMimeType("image/png");
137  break;
138  case RawDataType::Bmp:
139  value().setMimeType("image/bmp");
140  break;
141  default:;
142  }
143  const auto coverSize = static_cast<streamoff>(dataAtom->dataSize() - 8);
144  auto coverData = make_unique<char[]>(static_cast<size_t>(coverSize));
145  stream.read(coverData.get(), coverSize);
146  value().assignData(move(coverData), static_cast<size_t>(coverSize), TagDataType::Picture);
147  break;
148  }
150  int number = 0;
151  if (dataAtom->dataSize() > (8 + 4)) {
152  diag.emplace_back(DiagLevel::Warning, "Data atom stores integer of invalid size. Trying to read data anyways.", context);
153  }
154  if (dataAtom->dataSize() >= (8 + 4)) {
155  number = reader.readInt32BE();
156  } else if (dataAtom->dataSize() == (8 + 2)) {
157  number = reader.readInt16BE();
158  } else if (dataAtom->dataSize() == (8 + 1)) {
159  number = reader.readChar();
160  }
161  switch (ilstChild.id()) {
162  case PreDefinedGenre: // consider number as standard genre index
164  break;
165  default:
166  value().assignInteger(number);
167  }
168  break;
169  }
171  int number = 0;
172  if (dataAtom->dataSize() > (8 + 4)) {
173  diag.emplace_back(DiagLevel::Warning, "Data atom stores integer of invalid size. Trying to read data anyways.", context);
174  }
175  if (dataAtom->dataSize() >= (8 + 4)) {
176  number = static_cast<int>(reader.readUInt32BE());
177  } else if (dataAtom->dataSize() == (8 + 2)) {
178  number = static_cast<int>(reader.readUInt16BE());
179  } else if (dataAtom->dataSize() == (8 + 1)) {
180  number = static_cast<int>(reader.readByte());
181  }
182  switch (ilstChild.id()) {
183  case PreDefinedGenre: // consider number as standard genre index
184  value().assignStandardGenreIndex(number - 1);
185  break;
186  default:
187  value().assignInteger(number);
188  }
189  break;
190  }
191  default:
192  switch (ilstChild.id()) {
193  // track number, disk number and genre have no specific data type id
194  case TrackPosition:
195  case DiskPosition: {
196  if (dataAtom->dataSize() < (8 + 6)) {
197  diag.emplace_back(DiagLevel::Warning, "Track/disk position is truncated. Trying to read data anyways.", context);
198  }
199  uint16 pos = 0, total = 0;
200  if (dataAtom->dataSize() >= (8 + 4)) {
201  stream.seekg(2, ios_base::cur);
202  pos = reader.readUInt16BE();
203  }
204  if (dataAtom->dataSize() >= (8 + 6)) {
205  total = reader.readUInt16BE();
206  }
207  value().assignPosition(PositionInSet(pos, total));
208  break;
209  }
210  case PreDefinedGenre:
211  if (dataAtom->dataSize() < (8 + 2)) {
212  diag.emplace_back(DiagLevel::Warning, "Genre index is truncated.", context);
213  } else {
214  value().assignStandardGenreIndex(reader.readUInt16BE() - 1);
215  }
216  break;
217  default: // no supported data type, read raw data
218  const auto dataSize = static_cast<streamsize>(dataAtom->dataSize() - 8);
219  auto data = make_unique<char[]>(static_cast<size_t>(dataSize));
220  stream.read(data.get(), dataSize);
221  if (ilstChild.id() == Mp4TagAtomIds::Cover) {
222  value().assignData(move(data), static_cast<size_t>(dataSize), TagDataType::Picture);
223  } else {
224  value().assignData(move(data), static_cast<size_t>(dataSize), TagDataType::Undefined);
225  }
226  }
227  }
228  } else if (dataAtom->id() == Mp4AtomIds::Mean) {
229  if (dataAtom->dataSize() < 8) {
230  diag.emplace_back(DiagLevel::Warning, "Truncated child atom \"mean\" in tag atom (ilst child) found. (will be ignored)", context);
231  continue;
232  }
233  if (++meanAtomFound > 1) {
234  if (meanAtomFound == 2) {
235  diag.emplace_back(
236  DiagLevel::Warning, "Tag atom contains more than one mean atom. The addiational mean atoms will be ignored.", context);
237  }
238  continue;
239  }
240  stream.seekg(static_cast<streamoff>(dataAtom->dataOffset() + 4));
241  m_mean = reader.readString(dataAtom->dataSize() - 4);
242  } else if (dataAtom->id() == Mp4AtomIds::Name) {
243  if (dataAtom->dataSize() < 4) {
244  diag.emplace_back(DiagLevel::Warning, "Truncated child atom \"name\" in tag atom (ilst child) found. (will be ignored)", context);
245  continue;
246  }
247  if (++nameAtomFound > 1) {
248  if (nameAtomFound == 2) {
249  diag.emplace_back(
250  DiagLevel::Warning, "Tag atom contains more than one name atom. The addiational name atoms will be ignored.", context);
251  }
252  continue;
253  }
254  stream.seekg(static_cast<streamoff>(dataAtom->dataOffset() + 4));
255  m_name = reader.readString(dataAtom->dataSize() - 4);
256  } else {
257  diag.emplace_back(DiagLevel::Warning,
258  "Unkown child atom \"" % dataAtom->idToString() + "\" in tag atom (ilst child) found. (will be ignored)", context);
259  }
260  } catch (const Failure &) {
261  diag.emplace_back(DiagLevel::Warning, "Unable to parse all childs atom in tag atom (ilst child) found. (will be ignored)", context);
262  }
263  }
264  if (value().isEmpty()) {
265  diag.emplace_back(DiagLevel::Warning, "The field value is empty.", context);
266  }
267 }
268 
280 {
281  return Mp4TagFieldMaker(*this, diag);
282 }
283 
291 void Mp4TagField::make(ostream &stream, Diagnostics &diag)
292 {
293  prepareMaking(diag).make(stream);
294 }
295 
299 std::vector<uint32> Mp4TagField::expectedRawDataTypes() const
300 {
301  using namespace Mp4TagAtomIds;
302  std::vector<uint32> res;
303  switch (id()) {
304  case Album:
305  case Artist:
306  case Comment:
307  case Year:
308  case Title:
309  case Genre:
310  case Composer:
311  case Encoder:
312  case Grouping:
313  case Description:
314  case Lyrics:
315  case RecordLabel:
316  case Performers:
317  case Lyricist:
318  res.push_back(RawDataType::Utf8);
319  res.push_back(RawDataType::Utf16);
320  break;
321  case PreDefinedGenre:
322  case TrackPosition:
323  case DiskPosition:
324  res.push_back(RawDataType::Reserved);
325  break;
326  case Bpm:
327  case Rating:
328  res.push_back(RawDataType::BeSignedInt);
329  res.push_back(RawDataType::BeUnsignedInt);
330  break;
331  case Cover:
332  res.push_back(RawDataType::Gif);
333  res.push_back(RawDataType::Jpeg);
334  res.push_back(RawDataType::Png);
335  res.push_back(RawDataType::Bmp);
336  break;
337  case Extended:
339  throw Failure();
340  }
341  // assumption that extended "iTunes" tags always use Unicode correct?
342  res.push_back(RawDataType::Utf8);
343  res.push_back(RawDataType::Utf16);
344  break;
345  default:
346  throw Failure();
347  }
348  return res;
349 }
350 
359 {
360  using namespace Mp4TagAtomIds;
361  if (isTypeInfoAssigned()) {
362  // obtain raw data type from tag field if present
363  return typeInfo();
364  }
365 
366  // there is no raw data type assigned (tag field was not
367  // present in original file but rather was added manually)
368  // try to derive appropriate raw data type from atom id
369  switch (id()) {
370  case Album:
371  case Artist:
372  case Comment:
373  case Year:
374  case Title:
375  case Genre:
376  case Composer:
377  case Encoder:
378  case Grouping:
379  case Description:
380  case Lyrics:
381  case RecordLabel:
382  case Performers:
383  case Lyricist:
384  switch (value().dataEncoding()) {
386  return RawDataType::Utf8;
388  return RawDataType::Utf16;
389  default:;
390  }
391  break;
392  case TrackPosition:
393  case DiskPosition:
394  return RawDataType::Reserved;
395  case PreDefinedGenre:
396  case Bpm:
397  case Rating:
399  case Cover: {
400  const string &mimeType = value().mimeType();
401  if (mimeType == "image/jpg" || mimeType == "image/jpeg") { // "well-known" type
402  return RawDataType::Jpeg;
403  } else if (mimeType == "image/png") {
404  return RawDataType::Png;
405  } else if (mimeType == "image/bmp") {
406  return RawDataType::Bmp;
407  }
408  } break;
409  case Extended:
411  throw Failure();
412  }
413  switch (value().dataEncoding()) {
415  return RawDataType::Utf8;
417  return RawDataType::Utf16;
418  default:;
419  }
420  break;
421  default:;
422  }
423  throw Failure();
424 }
425 
429 void Mp4TagField::reset()
430 {
431  m_name.clear();
432  m_mean.clear();
433  m_parsedRawDataType = RawDataType::Reserved;
434  m_countryIndicator = 0;
435  m_langIndicator = 0;
436 }
437 
449 Mp4TagFieldMaker::Mp4TagFieldMaker(Mp4TagField &field, Diagnostics &diag)
450  : m_field(field)
451  , m_convertedData(stringstream::in | stringstream::out | stringstream::binary)
452  , m_writer(&m_convertedData)
453  , m_rawDataType(0)
454 {
455  if (!m_field.id()) {
456  diag.emplace_back(DiagLevel::Warning, "Invalid tag atom id.", "making MP4 tag field");
457  throw InvalidDataException();
458  }
459  const string context("making MP4 tag field " + Mp4TagField::fieldIdToString(m_field.id()));
460  if (m_field.value().isEmpty() && (!m_field.mean().empty() || !m_field.name().empty())) {
461  diag.emplace_back(DiagLevel::Critical, "No tag value assigned.", context);
462  throw InvalidDataException();
463  }
464 
465  try {
466  // try to use appropriate raw data type
467  m_rawDataType = m_field.appropriateRawDataType();
468  } catch (const Failure &) {
469  // unable to obtain appropriate raw data type
470  if (m_field.id() == Mp4TagAtomIds::Cover) {
471  // assume JPEG image
472  m_rawDataType = RawDataType::Utf8;
473  diag.emplace_back(
474  DiagLevel::Warning, "It was not possible to find an appropriate raw data type id. JPEG image will be assumed.", context);
475  } else {
476  // assume UTF-8 text
477  m_rawDataType = RawDataType::Utf8;
478  diag.emplace_back(DiagLevel::Warning, "It was not possible to find an appropriate raw data type id. UTF-8 will be assumed.", context);
479  }
480  }
481 
482  try {
483  if (!m_field.value().isEmpty()) { // there might be only mean and name info, but no data
484  m_convertedData.exceptions(std::stringstream::failbit | std::stringstream::badbit);
485  switch (m_rawDataType) {
486  case RawDataType::Utf8:
487  case RawDataType::Utf16:
488  m_writer.writeString(m_field.value().toString());
489  break;
491  int number = m_field.value().toInteger();
492  if (number <= numeric_limits<int16>::max() && number >= numeric_limits<int16>::min()) {
493  m_writer.writeInt16BE(static_cast<int16>(number));
494  } else {
495  m_writer.writeInt32BE(number);
496  }
497  break;
498  }
500  int number = m_field.value().toInteger();
501  if (number <= numeric_limits<uint16>::max() && number >= numeric_limits<uint16>::min()) {
502  m_writer.writeUInt16BE(static_cast<uint16>(number));
503  } else if (number > 0) {
504  m_writer.writeUInt32BE(static_cast<uint32>(number));
505  } else {
506  throw ConversionException(
507  "Negative integer can not be assigned to the field with the ID \"" % interpretIntegerAsString<uint32>(m_field.id()) + "\".");
508  }
509  break;
510  }
511  case RawDataType::Bmp:
512  case RawDataType::Jpeg:
513  case RawDataType::Png:
514  break; // leave converted data empty to write original data later
515  default:
516  switch (m_field.id()) {
517  // track number and disk number are exceptions
518  // raw data type 0 is used, information is stored as pair of unsigned integers
521  PositionInSet pos = m_field.value().toPositionInSet();
522  m_writer.writeInt32BE(pos.position());
523  if (pos.total() <= numeric_limits<int16>::max()) {
524  m_writer.writeInt16BE(static_cast<int16>(pos.total()));
525  } else {
526  throw ConversionException(
527  "Integer can not be assigned to the field with the id \"" % interpretIntegerAsString<uint32>(m_field.id())
528  + "\" because it is to big.");
529  }
530  m_writer.writeUInt16BE(0);
531  break;
532  }
534  m_writer.writeUInt16BE(static_cast<uint16>(m_field.value().toStandardGenreIndex()));
535  break;
536  default:; // leave converted data empty to write original data later
537  }
538  }
539  }
540  } catch (ConversionException &ex) {
541  // it was not possible to perform required conversions
542  if (char_traits<char>::length(ex.what())) {
543  diag.emplace_back(DiagLevel::Critical, ex.what(), context);
544  } else {
545  diag.emplace_back(DiagLevel::Critical, "The assigned tag value can not be converted to be written appropriately.", context);
546  }
547  throw InvalidDataException();
548  }
549 
550  // calculate data size
551  m_dataSize
552  = m_field.value().isEmpty() ? 0 : (m_convertedData.tellp() ? static_cast<size_t>(m_convertedData.tellp()) : m_field.value().dataSize());
553  m_totalSize = 8 // calculate entire size
554  + (m_field.name().empty() ? 0 : (12 + m_field.name().length())) + (m_field.mean().empty() ? 0 : (12 + m_field.mean().length()))
555  + (m_dataSize ? (16 + m_dataSize) : 0);
556  if (m_totalSize > numeric_limits<uint32>::max()) {
557  diag.emplace_back(DiagLevel::Critical, "Making a such big MP4 tag field is not supported.", context);
558  throw NotImplementedException();
559  }
560 }
561 
569 void Mp4TagFieldMaker::make(ostream &stream)
570 {
571  m_writer.setStream(&stream);
572  // size of entire tag atom
573  m_writer.writeUInt32BE(static_cast<uint32>(m_totalSize));
574  // id of tag atom
575  m_writer.writeUInt32BE(m_field.id());
576  if (!m_field.mean().empty()) {
577  // write "mean"
578  m_writer.writeUInt32BE(static_cast<uint32>(12 + m_field.mean().size()));
579  m_writer.writeUInt32BE(Mp4AtomIds::Mean);
580  m_writer.writeUInt32BE(0);
581  m_writer.writeString(m_field.mean());
582  }
583  if (!m_field.name().empty()) {
584  // write "name"
585  m_writer.writeUInt32BE(static_cast<uint32>(12 + m_field.name().length()));
586  m_writer.writeUInt32BE(Mp4AtomIds::Name);
587  m_writer.writeUInt32BE(0);
588  m_writer.writeString(m_field.name());
589  }
590  if (!m_field.value().isEmpty()) { // write data
591  m_writer.writeUInt32BE(static_cast<uint32>(16 + m_dataSize)); // size of data atom
592  m_writer.writeUInt32BE(Mp4AtomIds::Data); // id of data atom
593  m_writer.writeByte(0); // version
594  m_writer.writeUInt24BE(m_rawDataType);
595  m_writer.writeUInt16BE(m_field.countryIndicator());
596  m_writer.writeUInt16BE(m_field.languageIndicator());
597  if (m_convertedData.tellp()) {
598  // write converted data
599  stream << m_convertedData.rdbuf();
600  } else {
601  // no conversion was needed, write data directly from tag value
602  stream.write(m_field.value().dataPointer(), static_cast<streamoff>(m_field.value().dataSize()));
603  }
604  }
605 }
606 
607 } // namespace TagParser
Mp4TagField()
Constructs a new Mp4TagField.
Definition: mp4tagfield.cpp:30
void setTypeInfo(const TypeInfoType &typeInfo)
Sets the type info of the current TagField.
const std::string & name() const
Returns the "name" for "extended" fields.
Definition: mp4tagfield.h:144
uint16 languageIndicator() const
Returns the language indicator.
Definition: mp4tagfield.h:192
The Mp4TagField class is used by Mp4Tag to store the fields.
Definition: mp4tagfield.h:97
void setMimeType(const std::string &mimeType)
Sets the MIME type.
Definition: tagvalue.h:482
ImplementationType * firstChild()
Returns the first child of the element.
static std::string fieldIdToString(IdentifierType id)
Returns the string representation for the specified id.
Definition: mp4tagfield.h:226
uint16 countryIndicator() const
Returns the country indicator.
Definition: mp4tagfield.h:184
The Mp4TagFieldMaker class helps making tag fields.
Definition: mp4tagfield.h:62
The Mp4Atom class helps to parse MP4 files.
Definition: mp4atom.h:38
bool isEmpty() const
Returns an indication whether an value is assigned.
Definition: tagvalue.h:389
const std::string & mimeType() const
Returns the MIME type.
Definition: tagvalue.h:472
std::size_t dataSize() const
Returns the size of the assigned value in bytes.
Definition: tagvalue.h:421
Contains utility classes helping to read and write streams.
void parse(Diagnostics &diag)
Parses the header information of the element which is read from the related stream at the start offse...
ContainerType & container()
Returns the related container.
void assignPosition(PositionInSet value)
Assigns the given PositionInSet value.
Definition: tagvalue.h:313
void make(std::ostream &stream)
Saves the field (specified when constructing the object) to the specified stream.
void assignStandardGenreIndex(int index)
Assigns the given standard genre index to be assigned.
Definition: tagvalue.h:344
Mp4TagFieldMaker prepareMaking(Diagnostics &diag)
Prepares making.
const TypeInfoType & typeInfo() const
Returns the type info of the current TagField.
The TagField class is used by FieldMapBasedTag to store the fields.
char * dataPointer()
Returns a pointer to the raw data assigned to the current instance.
Definition: tagvalue.h:432
const IdentifierType & id() const
Returns the id of the current TagField.
uint32 appropriateRawDataType() const
Returns an appropriate raw data type.
void assignInteger(int value)
Assigns the given integer value.
Definition: tagvalue.cpp:639
void assignText(const char *text, std::size_t textSize, TagTextEncoding textEncoding=TagTextEncoding::Latin1, TagTextEncoding convertTo=TagTextEncoding::Unspecified)
Assigns a copy of the given text.
Definition: tagvalue.cpp:588
void reparse(Mp4Atom &ilstChild, Diagnostics &diag)
Parses field information from the specified Mp4Atom.
Definition: mp4tagfield.cpp:74
void assignData(const char *data, std::size_t length, TagDataType type=TagDataType::Binary, TagTextEncoding encoding=TagTextEncoding::Latin1)
std::iostream & stream()
Returns the related stream.
std::vector< uint32 > expectedRawDataTypes() const
Returns the expected raw data types for the ID of the field.
The TagValue class wraps values of different types.
Definition: tagvalue.h:65
bool isTypeInfoAssigned() const
Returns an indication whether a type info is assigned.
The class inherits from std::exception and serves as base class for exceptions thrown by the elements...
Definition: exceptions.h:11
const IdentifierType & id() const
Returns the element ID.
Contains all classes and functions of the TagInfo library.
Definition: aaccodebook.h:9
std::string idToString() const
Converts the specified atom ID to a printable string.
Definition: mp4atom.h:67
void setId(const IdentifierType &id)
Sets the id of the current Tag Field.
The Diagnostics class is a container for DiagMessage.
Definition: diagnostics.h:156
void make(std::ostream &stream, Diagnostics &diag)
Saves the field to the specified stream.
TagValue & value()
Returns the value of the current TagField.
const std::string & mean() const
Returns the "mean" for "extended" fields.
Definition: mp4tagfield.h:160