Tag Parser  7.0.0
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
id3v2frame.cpp
Go to the documentation of this file.
1 #include "./id3v2frame.h"
2 #include "./id3genres.h"
3 #include "./id3v2frameids.h"
4 
5 #include "../diagnostics.h"
6 #include "../exceptions.h"
7 
8 #include <c++utilities/conversion/stringbuilder.h>
9 #include <c++utilities/conversion/stringconversion.h>
10 
11 #include <zlib.h>
12 
13 #include <algorithm>
14 #include <cstring>
15 #include <memory>
16 
17 using namespace std;
18 using namespace ConversionUtilities;
19 using namespace ChronoUtilities;
20 using namespace IoUtilities;
21 
22 namespace TagParser {
23 
24 namespace Id3v2TextEncodingBytes {
26 }
27 
36 Id3v2Frame::Id3v2Frame()
37  : m_parsedVersion(0)
38  , m_dataSize(0)
39  , m_totalSize(0)
40  , m_flag(0)
41  , m_group(0)
42  , m_padding(false)
43 {
44 }
45 
49 Id3v2Frame::Id3v2Frame(const IdentifierType &id, const TagValue &value, byte group, uint16 flag)
50  : TagField<Id3v2Frame>(id, value)
51  , m_parsedVersion(0)
52  , m_dataSize(0)
53  , m_totalSize(0)
54  , m_flag(flag)
55  , m_group(group)
56  , m_padding(false)
57 {
58 }
59 
64 template <class stringtype> int parseGenreIndex(const stringtype &denotation)
65 {
66  int index = -1;
67  for (auto c : denotation) {
68  if (index == -1) {
69  switch (c) {
70  case ' ':
71  break;
72  case '(':
73  index = 0;
74  break;
75  case '\0':
76  return -1;
77  default:
78  if (c >= '0' && c <= '9') {
79  index = c - '0';
80  } else {
81  return -1;
82  }
83  }
84  } else {
85  switch (c) {
86  case ')':
87  return index;
88  case '\0':
89  return index;
90  default:
91  if (c >= '0' && c <= '9') {
92  index = index * 10 + c - '0';
93  } else {
94  return -1;
95  }
96  }
97  }
98  }
99  return index;
100 }
101 
112 void Id3v2Frame::parse(BinaryReader &reader, uint32 version, uint32 maximalSize, Diagnostics &diag)
113 {
114  static const string defaultContext("parsing ID3v2 frame");
115  string context;
116 
117  // parse header
118  if (version < 3) {
119  // parse header for ID3v2.1 and ID3v2.2
120  // -> read ID
121  setId(reader.readUInt24BE());
122  if (id() & 0xFFFF0000u) {
123  m_padding = false;
124  } else {
125  // padding reached
126  m_padding = true;
127  diag.emplace_back(DiagLevel::Debug, "Frame ID starts with null-byte -> padding reached.", defaultContext);
128  throw NoDataFoundException();
129  }
130 
131  // -> update context
132  context = "parsing " % frameIdString() + " frame";
133 
134  // -> read size, check whether frame is truncated
135  m_dataSize = reader.readUInt24BE();
136  m_totalSize = m_dataSize + 6;
137  if (m_totalSize > maximalSize) {
138  diag.emplace_back(DiagLevel::Warning, "The frame is truncated and will be ignored.", context);
139  throw TruncatedDataException();
140  }
141 
142  // -> no flags/group in ID3v2.2
143  m_flag = 0;
144  m_group = 0;
145 
146  } else {
147  // parse header for ID3v2.3 and ID3v2.4
148  // -> read ID
149  setId(reader.readUInt32BE());
150  if (id() & 0xFF000000u) {
151  m_padding = false;
152  } else {
153  // padding reached
154  m_padding = true;
155  diag.emplace_back(DiagLevel::Debug, "Frame ID starts with null-byte -> padding reached.", defaultContext);
156  throw NoDataFoundException();
157  }
158 
159  // -> update context
160  context = "parsing " % frameIdString() + " frame";
161 
162  // -> read size, check whether frame is truncated
163  m_dataSize = version >= 4 ? reader.readSynchsafeUInt32BE() : reader.readUInt32BE();
164  m_totalSize = m_dataSize + 10;
165  if (m_totalSize > maximalSize) {
166  diag.emplace_back(DiagLevel::Warning, "The frame is truncated and will be ignored.", context);
167  throw TruncatedDataException();
168  }
169 
170  // -> read flags and group
171  m_flag = reader.readUInt16BE();
172  m_group = hasGroupInformation() ? reader.readByte() : 0;
173  if (isEncrypted()) {
174  // encryption is not implemented
175  diag.emplace_back(DiagLevel::Critical, "Encrypted frames aren't supported.", context);
177  }
178  }
179 
180  // frame size mustn't be 0
181  if (m_dataSize <= 0) {
182  diag.emplace_back(DiagLevel::Critical, "The frame size is 0.", context);
183  throw InvalidDataException();
184  }
185 
186  // parse the data
187  unique_ptr<char[]> buffer;
188 
189  // -> decompress data if compressed; otherwise just read it
190  if (isCompressed()) {
191  uLongf decompressedSize = version >= 4 ? reader.readSynchsafeUInt32BE() : reader.readUInt32BE();
192  if (decompressedSize < m_dataSize) {
193  diag.emplace_back(DiagLevel::Critical, "The decompressed size is smaller than the compressed size.", context);
194  throw InvalidDataException();
195  }
196  const auto bufferCompressed = make_unique<char[]>(m_dataSize);
197  reader.read(bufferCompressed.get(), m_dataSize);
198  buffer = make_unique<char[]>(decompressedSize);
199  switch (
200  uncompress(reinterpret_cast<Bytef *>(buffer.get()), &decompressedSize, reinterpret_cast<Bytef *>(bufferCompressed.get()), m_dataSize)) {
201  case Z_MEM_ERROR:
202  diag.emplace_back(DiagLevel::Critical, "Decompressing failed. The source buffer was too small.", context);
203  throw InvalidDataException();
204  case Z_BUF_ERROR:
205  diag.emplace_back(DiagLevel::Critical, "Decompressing failed. The destination buffer was too small.", context);
206  throw InvalidDataException();
207  case Z_DATA_ERROR:
208  diag.emplace_back(DiagLevel::Critical, "Decompressing failed. The input data was corrupted or incomplete.", context);
209  throw InvalidDataException();
210  case Z_OK:
211  break;
212  default:
213  diag.emplace_back(DiagLevel::Critical, "Decompressing failed (unknown reason).", context);
214  throw InvalidDataException();
215  }
216  m_dataSize = decompressedSize;
217  } else {
218  buffer = make_unique<char[]>(m_dataSize);
219  reader.read(buffer.get(), m_dataSize);
220  }
221 
222  // -> get tag value depending of field type
223  if (Id3v2FrameIds::isTextFrame(id())) {
224  // frame contains text
225  TagTextEncoding dataEncoding = parseTextEncodingByte(*buffer.get(), diag); // the first byte stores the encoding
226  if ((version >= 3 && (id() == Id3v2FrameIds::lTrackPosition || id() == Id3v2FrameIds::lDiskPosition))
227  || (version < 3 && id() == Id3v2FrameIds::sTrackPosition)) {
228  // the track number or the disk number frame
229  try {
230  if (characterSize(dataEncoding) > 1) {
231  value().assignPosition(PositionInSet(parseWideString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag)));
232  } else {
233  value().assignPosition(PositionInSet(parseString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag)));
234  }
235  } catch (const ConversionException &) {
236  diag.emplace_back(DiagLevel::Warning, "The value of track/disk position frame is not numeric and will be ignored.", context);
237  }
238 
239  } else if ((version >= 3 && id() == Id3v2FrameIds::lLength) || (version < 3 && id() == Id3v2FrameIds::sLength)) {
240  // frame contains length
241  try {
242  string milliseconds;
243  if (dataEncoding == TagTextEncoding::Utf16BigEndian || dataEncoding == TagTextEncoding::Utf16LittleEndian) {
244  const auto parsedStringRef = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
245  const auto convertedStringData = dataEncoding == TagTextEncoding::Utf16BigEndian
246  ? convertUtf16BEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef))
247  : convertUtf16LEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef));
248  milliseconds = string(convertedStringData.first.get(), convertedStringData.second);
249  } else { // Latin-1 or UTF-8
250  milliseconds = parseString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
251  }
252  value().assignTimeSpan(TimeSpan::fromMilliseconds(stringToNumber<double>(milliseconds)));
253  } catch (const ConversionException &) {
254  diag.emplace_back(DiagLevel::Warning, "The value of the length frame is not numeric and will be ignored.", context);
255  }
256 
257  } else if ((version >= 3 && id() == Id3v2FrameIds::lGenre) || (version < 3 && id() == Id3v2FrameIds::sGenre)) {
258  // genre/content type
259  int genreIndex;
260  if (characterSize(dataEncoding) > 1) {
261  const auto genreDenotation = parseWideString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
262  genreIndex = parseGenreIndex(genreDenotation);
263  } else {
264  const auto genreDenotation = parseString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
265  genreIndex = parseGenreIndex(genreDenotation);
266  }
267  if (genreIndex != -1) {
268  // genre is specified as ID3 genre number
269  value().assignStandardGenreIndex(genreIndex);
270  } else {
271  // genre is specified as string
272  // string might be null terminated
273  const auto substr = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
274  value().assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding);
275  }
276  } else {
277  // any other text frame
278  const auto substr = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
279  value().assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding);
280  }
281 
282  } else if (version >= 3 && id() == Id3v2FrameIds::lCover) {
283  // frame stores picture
284  byte type;
285  parsePicture(buffer.get(), m_dataSize, value(), type, diag);
286  setTypeInfo(type);
287 
288  } else if (version < 3 && id() == Id3v2FrameIds::sCover) {
289  // frame stores legacy picutre
290  byte type;
291  parseLegacyPicture(buffer.get(), m_dataSize, value(), type, diag);
292  setTypeInfo(type);
293 
294  } else if (((version >= 3 && id() == Id3v2FrameIds::lComment) || (version < 3 && id() == Id3v2FrameIds::sComment))
296  // comment frame or unsynchronized lyrics frame (these two frame types have the same structure)
297  parseComment(buffer.get(), m_dataSize, value(), diag);
298 
299  } else {
300  // unknown frame
301  value().assignData(buffer.get(), m_dataSize, TagDataType::Undefined);
302  }
303 }
304 
316 {
317  return Id3v2FrameMaker(*this, version, diag);
318 }
319 
328 void Id3v2Frame::make(BinaryWriter &writer, byte version, Diagnostics &diag)
329 {
330  prepareMaking(version, diag).make(writer);
331 }
332 
336 void Id3v2Frame::reset()
337 {
338  m_flag = 0;
339  m_group = 0;
340  m_parsedVersion = 0;
341  m_dataSize = 0;
342  m_totalSize = 0;
343  m_padding = false;
344 }
345 
357 Id3v2FrameMaker::Id3v2FrameMaker(Id3v2Frame &frame, byte version, Diagnostics &diag)
358  : m_frame(frame)
359  , m_frameId(m_frame.id())
360  , m_version(version)
361 {
362  const string context("making " % m_frame.frameIdString() + " frame");
363 
364  // validate assigned data
365  if (m_frame.value().isEmpty()) {
366  diag.emplace_back(DiagLevel::Critical, "Cannot make an empty frame.", context);
367  throw InvalidDataException();
368  }
369  if (m_frame.isEncrypted()) {
370  diag.emplace_back(DiagLevel::Critical, "Cannot make an encrypted frame (isn't supported by this tagging library).", context);
371  throw InvalidDataException();
372  }
373  if (m_frame.hasPaddingReached()) {
374  diag.emplace_back(DiagLevel::Critical, "Cannot make a frame which is marked as padding.", context);
375  throw InvalidDataException();
376  }
377  if (version < 3 && m_frame.isCompressed()) {
378  diag.emplace_back(DiagLevel::Warning, "Compression is not supported by the version of ID3v2 and won't be applied.", context);
379  }
380  if (version < 3 && (m_frame.flag() || m_frame.group())) {
381  diag.emplace_back(DiagLevel::Warning,
382  "The existing flag and group information is not supported by the version of ID3v2 and will be ignored/discarted.", context);
383  }
384 
385  // convert frame ID if necessary
386  if (version >= 3) {
387  if (Id3v2FrameIds::isShortId(m_frameId)) {
388  // try to convert the short frame ID to its long equivalent
389  if (!(m_frameId = Id3v2FrameIds::convertToLongId(m_frameId))) {
390  diag.emplace_back(DiagLevel::Critical,
391  "The short frame ID can't be converted to its long equivalent which is needed to use the frame in a newer version of ID3v2.",
392  context);
393  throw InvalidDataException();
394  }
395  }
396  } else {
397  if (Id3v2FrameIds::isLongId(m_frameId)) {
398  // try to convert the long frame ID to its short equivalent
399  if (!(m_frameId = Id3v2FrameIds::convertToShortId(m_frameId))) {
400  diag.emplace_back(DiagLevel::Critical,
401  "The long frame ID can't be converted to its short equivalent which is needed to use the frame in the old version of ID3v2.",
402  context);
403  throw InvalidDataException();
404  }
405  }
406  }
407 
408  // make actual data depending on the frame ID
409  try {
410  if (Id3v2FrameIds::isTextFrame(m_frameId)) {
411  // it is a text frame
412  if ((version >= 3 && (m_frameId == Id3v2FrameIds::lTrackPosition || m_frameId == Id3v2FrameIds::lDiskPosition))
413  || (version < 3 && m_frameId == Id3v2FrameIds::sTrackPosition)) {
414  // track number or the disk number frame
415  m_frame.makeString(m_data, m_decompressedSize, m_frame.value().toString(), TagTextEncoding::Latin1);
416  } else if ((version >= 3 && m_frameId == Id3v2FrameIds::lLength) || (version < 3 && m_frameId == Id3v2FrameIds::sLength)) {
417  // length frame
418  m_frame.makeString(m_data, m_decompressedSize, ConversionUtilities::numberToString(m_frame.value().toTimeSpan().totalMilliseconds()),
420  } else if (m_frame.value().type() == TagDataType::StandardGenreIndex
421  && ((version >= 3 && m_frameId == Id3v2FrameIds::lGenre) || (version < 3 && m_frameId == Id3v2FrameIds::sGenre))) {
422  // pre-defined genre frame
423  m_frame.makeString(
424  m_data, m_decompressedSize, ConversionUtilities::numberToString(m_frame.value().toStandardGenreIndex()), TagTextEncoding::Latin1);
425  } else {
426  // any other text frame
427  if (version <= 3 && m_frame.value().dataEncoding() == TagTextEncoding::Utf8) {
428  // UTF-8 is only supported by ID3v2.4, so convert back to UTF-16
429  m_frame.makeString(
430  m_data, m_decompressedSize, m_frame.value().toString(TagTextEncoding::Utf16LittleEndian), TagTextEncoding::Utf16LittleEndian);
431  } else {
432  // just keep encoding of the assigned value
433  m_frame.makeString(m_data, m_decompressedSize, m_frame.value().toString(), m_frame.value().dataEncoding());
434  }
435  }
436 
437  } else if ((version >= 3 && m_frameId == Id3v2FrameIds::lCover) || (version < 3 && m_frameId == Id3v2FrameIds::sCover)) {
438  // picture frame
439  m_frame.makePicture(m_data, m_decompressedSize, m_frame.value(), m_frame.isTypeInfoAssigned() ? m_frame.typeInfo() : 0, version);
440 
441  } else if (((version >= 3 && m_frameId == Id3v2FrameIds::lComment) || (version < 3 && m_frameId == Id3v2FrameIds::sComment))
442  || ((version >= 3 && m_frameId == Id3v2FrameIds::lUnsynchronizedLyrics)
443  || (version < 3 && m_frameId == Id3v2FrameIds::sUnsynchronizedLyrics))) {
444  // the comment frame or the unsynchronized lyrics frame
445  m_frame.makeComment(m_data, m_decompressedSize, m_frame.value(), version, diag);
446 
447  } else {
448  // an unknown frame
449  m_data = make_unique<char[]>(m_decompressedSize = m_frame.value().dataSize());
450  copy(m_frame.value().dataPointer(), m_frame.value().dataPointer() + m_decompressedSize, m_data.get());
451  }
452  } catch (const ConversionException &) {
453  diag.emplace_back(DiagLevel::Critical, "Assigned value can not be converted appropriately.", context);
454  throw InvalidDataException();
455  }
456 
457  // apply compression if frame should be compressed
458  if (version >= 3 && m_frame.isCompressed()) {
459  m_dataSize = compressBound(m_decompressedSize);
460  auto compressedData = make_unique<char[]>(m_decompressedSize);
461  switch (compress(reinterpret_cast<Bytef *>(compressedData.get()), reinterpret_cast<uLongf *>(&m_dataSize),
462  reinterpret_cast<Bytef *>(m_data.get()), m_decompressedSize)) {
463  case Z_MEM_ERROR:
464  diag.emplace_back(DiagLevel::Critical, "Decompressing failed. The source buffer was too small.", context);
465  throw InvalidDataException();
466  case Z_BUF_ERROR:
467  diag.emplace_back(DiagLevel::Critical, "Decompressing failed. The destination buffer was too small.", context);
468  throw InvalidDataException();
469  case Z_OK:;
470  }
471  m_data.swap(compressedData);
472  } else {
473  m_dataSize = m_decompressedSize;
474  }
475 
476  // calculate required size
477  // -> data size
478  m_requiredSize = m_dataSize;
479  if (version < 3) {
480  // -> header size
481  m_requiredSize += 6;
482  } else {
483  // -> header size
484  m_requiredSize += 10;
485  // -> group byte
486  if (m_frame.hasGroupInformation()) {
487  m_requiredSize += 1;
488  }
489  // -> decompressed size
490  if (version >= 3 && m_frame.isCompressed()) {
491  m_requiredSize += 4;
492  }
493  }
494 }
495 
503 void Id3v2FrameMaker::make(BinaryWriter &writer)
504 {
505  if (m_version < 3) {
506  writer.writeUInt24BE(m_frameId);
507  writer.writeUInt24BE(m_dataSize);
508  } else {
509  writer.writeUInt32BE(m_frameId);
510  if (m_version >= 4) {
511  writer.writeSynchsafeUInt32BE(m_dataSize);
512  } else {
513  writer.writeUInt32BE(m_dataSize);
514  }
515  writer.writeUInt16BE(m_frame.flag());
516  if (m_frame.hasGroupInformation()) {
517  writer.writeByte(m_frame.group());
518  }
519  if (m_version >= 3 && m_frame.isCompressed()) {
520  if (m_version >= 4) {
521  writer.writeSynchsafeUInt32BE(m_decompressedSize);
522  } else {
523  writer.writeUInt32BE(m_decompressedSize);
524  }
525  }
526  }
527  writer.write(m_data.get(), m_dataSize);
528 }
529 
537 {
538  switch (textEncodingByte) {
546  return TagTextEncoding::Utf8;
547  default:
548  diag.emplace_back(
549  DiagLevel::Warning, "The charset of the frame is invalid. Latin-1 will be used.", "parsing encoding of frame " + frameIdString());
551  }
552 }
553 
558 {
559  switch (textEncoding) {
568  default:
569  return 0;
570  }
571 }
572 
587 tuple<const char *, size_t, const char *> Id3v2Frame::parseSubstring(
588  const char *buffer, std::size_t bufferSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag)
589 {
590  tuple<const char *, size_t, const char *> res(buffer, 0, buffer + bufferSize);
591  switch (encoding) {
594  case TagTextEncoding::Utf8: {
595  if ((bufferSize >= 3) && (ConversionUtilities::BE::toUInt24(buffer) == 0x00EFBBBF)) {
596  if (encoding == TagTextEncoding::Latin1) {
597  diag.emplace_back(DiagLevel::Critical, "Denoted character set is Latin-1 but an UTF-8 BOM is present - assuming UTF-8.",
598  "parsing frame " + frameIdString());
599  encoding = TagTextEncoding::Utf8;
600  }
601  get<0>(res) += 3;
602  }
603  const char *pos = get<0>(res);
604  for (; *pos != 0x00; ++pos) {
605  if (pos < get<2>(res)) {
606  ++get<1>(res);
607  } else {
608  if (addWarnings) {
609  diag.emplace_back(
610  DiagLevel::Warning, "String in frame is not terminated properly.", "parsing termination of frame " + frameIdString());
611  }
612  break;
613  }
614  }
615  get<2>(res) = pos + 1;
616  break;
617  }
620  if (bufferSize >= 2) {
621  switch (ConversionUtilities::LE::toUInt16(buffer)) {
622  case 0xFEFF:
623  if (encoding == TagTextEncoding::Utf16BigEndian) {
624  diag.emplace_back(DiagLevel::Critical,
625  "Denoted character set is UTF-16 Big Endian but UTF-16 Little Endian BOM is present - assuming UTF-16 LE.",
626  "parsing frame " + frameIdString());
628  }
629  get<0>(res) += 2;
630  break;
631  case 0xFFFE:
633  get<0>(res) += 2;
634  }
635  }
636  const uint16 *pos = reinterpret_cast<const uint16 *>(get<0>(res));
637  for (; *pos != 0x0000; ++pos) {
638  if (pos < reinterpret_cast<const uint16 *>(get<2>(res))) {
639  get<1>(res) += 2;
640  } else {
641  if (addWarnings) {
642  diag.emplace_back(
643  DiagLevel::Warning, "Wide string in frame is not terminated properly.", "parsing termination of frame " + frameIdString());
644  }
645  break;
646  }
647  }
648  get<2>(res) = reinterpret_cast<const char *>(pos + 1);
649  break;
650  }
651  }
652  return res;
653 }
654 
660 string Id3v2Frame::parseString(const char *buffer, size_t dataSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag)
661 {
662  const auto substr = parseSubstring(buffer, dataSize, encoding, addWarnings, diag);
663  return string(get<0>(substr), get<1>(substr));
664 }
665 
673 u16string Id3v2Frame::parseWideString(const char *buffer, size_t dataSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag)
674 {
675  const auto substr = parseSubstring(buffer, dataSize, encoding, addWarnings, diag);
676  u16string res(reinterpret_cast<u16string::const_pointer>(get<0>(substr)), get<1>(substr) / 2);
677  TagValue::ensureHostByteOrder(res, encoding);
678  return res;
679 }
680 
690 void Id3v2Frame::parseBom(const char *buffer, size_t maxSize, TagTextEncoding &encoding, Diagnostics &diag)
691 {
692  switch (encoding) {
695  if ((maxSize >= 2) && (ConversionUtilities::BE::toUInt16(buffer) == 0xFFFE)) {
697  } else if ((maxSize >= 2) && (ConversionUtilities::BE::toUInt16(buffer) == 0xFEFF)) {
699  }
700  break;
701  default:
702  if ((maxSize >= 3) && (ConversionUtilities::BE::toUInt24(buffer) == 0x00EFBBBF)) {
703  encoding = TagTextEncoding::Utf8;
704  diag.emplace_back(DiagLevel::Warning, "UTF-8 byte order mark found in text frame.", "parsing byte oder mark of frame " + frameIdString());
705  }
706  }
707 }
708 
716 void Id3v2Frame::parseLegacyPicture(const char *buffer, std::size_t maxSize, TagValue &tagValue, byte &typeInfo, Diagnostics &diag)
717 {
718  static const string context("parsing ID3v2.2 picture frame");
719  if (maxSize < 6) {
720  diag.emplace_back(DiagLevel::Critical, "Picture frame is incomplete.", context);
721  throw TruncatedDataException();
722  }
723  const char *end = buffer + maxSize;
724  auto dataEncoding = parseTextEncodingByte(*buffer, diag); // the first byte stores the encoding
725  typeInfo = static_cast<unsigned char>(*(buffer + 4));
726  auto substr = parseSubstring(buffer + 5, end - 5 - buffer, dataEncoding, true, diag);
727  tagValue.setDescription(string(get<0>(substr), get<1>(substr)), dataEncoding);
728  if (get<2>(substr) >= end) {
729  diag.emplace_back(DiagLevel::Critical, "Picture frame is incomplete (actual data is missing).", context);
730  throw TruncatedDataException();
731  }
732  tagValue.assignData(get<2>(substr), end - get<2>(substr), TagDataType::Picture, dataEncoding);
733 }
734 
742 void Id3v2Frame::parsePicture(const char *buffer, std::size_t maxSize, TagValue &tagValue, byte &typeInfo, Diagnostics &diag)
743 {
744  static const string context("parsing ID3v2.3 picture frame");
745  const char *end = buffer + maxSize;
746  auto dataEncoding = parseTextEncodingByte(*buffer, diag); // the first byte stores the encoding
747  auto mimeTypeEncoding = TagTextEncoding::Latin1;
748  auto substr = parseSubstring(buffer + 1, maxSize - 1, mimeTypeEncoding, true, diag);
749  if (get<1>(substr)) {
750  tagValue.setMimeType(string(get<0>(substr), get<1>(substr)));
751  }
752  if (get<2>(substr) >= end) {
753  diag.emplace_back(DiagLevel::Critical, "Picture frame is incomplete (type info, description and actual data are missing).", context);
754  throw TruncatedDataException();
755  }
756  typeInfo = static_cast<unsigned char>(*get<2>(substr));
757  if (++get<2>(substr) >= end) {
758  diag.emplace_back(DiagLevel::Critical, "Picture frame is incomplete (description and actual data are missing).", context);
759  throw TruncatedDataException();
760  }
761  substr = parseSubstring(get<2>(substr), end - get<2>(substr), dataEncoding, true, diag);
762  tagValue.setDescription(string(get<0>(substr), get<1>(substr)), dataEncoding);
763  if (get<2>(substr) >= end) {
764  diag.emplace_back(DiagLevel::Critical, "Picture frame is incomplete (actual data is missing).", context);
765  throw TruncatedDataException();
766  }
767  tagValue.assignData(get<2>(substr), end - get<2>(substr), TagDataType::Picture, dataEncoding);
768 }
769 
776 void Id3v2Frame::parseComment(const char *buffer, std::size_t dataSize, TagValue &tagValue, Diagnostics &diag)
777 {
778  static const string context("parsing comment/unsynchronized lyrics frame");
779  const char *end = buffer + dataSize;
780  if (dataSize < 5) {
781  diag.emplace_back(DiagLevel::Critical, "Comment frame is incomplete.", context);
782  throw TruncatedDataException();
783  }
784  TagTextEncoding dataEncoding = parseTextEncodingByte(*buffer, diag);
785  if (*(++buffer)) {
786  tagValue.setLanguage(string(buffer, 3));
787  }
788  auto substr = parseSubstring(buffer += 3, dataSize -= 4, dataEncoding, true, diag);
789  tagValue.setDescription(string(get<0>(substr), get<1>(substr)), dataEncoding);
790  if (get<2>(substr) > end) {
791  diag.emplace_back(DiagLevel::Critical, "Comment frame is incomplete (description not terminated?).", context);
792  throw TruncatedDataException();
793  }
794  substr = parseSubstring(get<2>(substr), end - get<2>(substr), dataEncoding, false, diag);
795  tagValue.assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding);
796 }
797 
805 void Id3v2Frame::makeString(std::unique_ptr<char[]> &buffer, uint32 &bufferSize, const std::string &value, TagTextEncoding encoding)
806 {
807  makeEncodingAndData(buffer, bufferSize, encoding, value.data(), value.size());
808 }
809 
819  std::unique_ptr<char[]> &buffer, uint32 &bufferSize, TagTextEncoding encoding, const char *data, std::size_t dataSize)
820 {
821  // calculate buffer size and allocate buffer
822  if (!data) {
823  dataSize = 0;
824  }
825  char *bufferDataAddress;
826  switch (encoding) {
829  case TagTextEncoding::Unspecified: // assumption
830  // allocate buffer
831  buffer = make_unique<char[]>(bufferSize = 1 + dataSize + 1);
832  buffer[0] = makeTextEncodingByte(encoding); // set text encoding byte
833  bufferDataAddress = buffer.get() + 1;
834  break;
837  // allocate buffer
838  buffer = make_unique<char[]>(bufferSize = 1 + 2 + dataSize + 2);
839  buffer[0] = makeTextEncodingByte(encoding); // set text encoding byte
840  ConversionUtilities::LE::getBytes(
841  encoding == TagTextEncoding::Utf16LittleEndian ? static_cast<uint16>(0xFEFF) : static_cast<uint16>(0xFFFE), buffer.get() + 1);
842  bufferDataAddress = buffer.get() + 3;
843  break;
844  default:
845  return;
846  }
847 
848  // write string data
849  if (dataSize) {
850  copy(data, data + dataSize, bufferDataAddress);
851  }
852 }
853 
858 size_t Id3v2Frame::makeBom(char *buffer, TagTextEncoding encoding)
859 {
860  switch (encoding) {
862  ConversionUtilities::LE::getBytes(static_cast<uint16>(0xFEFF), buffer);
863  return 2;
865  ConversionUtilities::BE::getBytes(static_cast<uint16>(0xFEFF), buffer);
866  return 2;
867  default:
868  return 0;
869  }
870 }
871 
875 void Id3v2Frame::makeLegacyPicture(unique_ptr<char[]> &buffer, uint32 &bufferSize, const TagValue &picture, byte typeInfo)
876 {
877  // determine description
878  TagTextEncoding descriptionEncoding = picture.descriptionEncoding();
879  StringData convertedDescription;
880  string::size_type descriptionSize = picture.description().find(
881  "\0\0", 0, descriptionEncoding == TagTextEncoding::Utf16BigEndian || descriptionEncoding == TagTextEncoding::Utf16LittleEndian ? 2 : 1);
882  if (descriptionSize == string::npos) {
883  descriptionSize = picture.description().size();
884  }
885  if (descriptionEncoding == TagTextEncoding::Utf8) {
886  // UTF-8 is only supported by ID3v2.4, so convert back to UTF-16
887  descriptionEncoding = TagTextEncoding::Utf16LittleEndian;
888  convertedDescription = convertUtf8ToUtf16LE(picture.description().data(), descriptionSize);
889  descriptionSize = convertedDescription.second;
890  }
891  // calculate needed buffer size and create buffer
892  const uint32 dataSize = picture.dataSize();
893  buffer = make_unique<char[]>(bufferSize = 1 + 3 + 1 + descriptionSize
894  + (descriptionEncoding == TagTextEncoding::Utf16BigEndian || descriptionEncoding == TagTextEncoding::Utf16LittleEndian ? 4 : 1)
895  + dataSize);
896  // note: encoding byte + image format + picture type byte + description size + 1 or 2 null bytes (depends on encoding) + data size
897  char *offset = buffer.get();
898  // write encoding byte
899  *offset = makeTextEncodingByte(descriptionEncoding);
900  // write mime type
901  const char *imageFormat;
902  if (picture.mimeType() == "image/jpeg") {
903  imageFormat = "JPG";
904  } else if (picture.mimeType() == "image/png") {
905  imageFormat = "PNG";
906  } else if (picture.mimeType() == "image/gif") {
907  imageFormat = "GIF";
908  } else if (picture.mimeType() == "-->") {
909  imageFormat = picture.mimeType().data();
910  } else {
911  imageFormat = "UND";
912  }
913  strncpy(++offset, imageFormat, 3);
914  // write picture type
915  *(offset += 3) = typeInfo;
916  // write description
917  offset += makeBom(offset + 1, descriptionEncoding);
918  if (convertedDescription.first) {
919  copy(convertedDescription.first.get(), convertedDescription.first.get() + descriptionSize, ++offset);
920  } else {
921  picture.description().copy(++offset, descriptionSize);
922  }
923  *(offset += descriptionSize) = 0x00; // terminate description and increase data offset
924  if (descriptionEncoding == TagTextEncoding::Utf16BigEndian || descriptionEncoding == TagTextEncoding::Utf16LittleEndian) {
925  *(++offset) = 0x00;
926  }
927  // write actual data
928  copy(picture.dataPointer(), picture.dataPointer() + picture.dataSize(), ++offset);
929 }
930 
934 void Id3v2Frame::makePicture(std::unique_ptr<char[]> &buffer, uint32 &bufferSize, const TagValue &picture, byte typeInfo, byte version)
935 {
936  if (version < 3) {
937  makeLegacyPicture(buffer, bufferSize, picture, typeInfo);
938  return;
939  }
940 
941  // determine description
942  TagTextEncoding descriptionEncoding = picture.descriptionEncoding();
943  StringData convertedDescription;
944  string::size_type descriptionSize = picture.description().find(
945  "\0\0", 0, descriptionEncoding == TagTextEncoding::Utf16BigEndian || descriptionEncoding == TagTextEncoding::Utf16LittleEndian ? 2 : 1);
946  if (descriptionSize == string::npos) {
947  descriptionSize = picture.description().size();
948  }
949  if (version < 4 && descriptionEncoding == TagTextEncoding::Utf8) {
950  // UTF-8 is only supported by ID3v2.4, so convert back to UTF-16
951  descriptionEncoding = TagTextEncoding::Utf16LittleEndian;
952  convertedDescription = convertUtf8ToUtf16LE(picture.description().data(), descriptionSize);
953  descriptionSize = convertedDescription.second;
954  }
955  // determine mime-type
956  string::size_type mimeTypeSize = picture.mimeType().find('\0');
957  if (mimeTypeSize == string::npos) {
958  mimeTypeSize = picture.mimeType().length();
959  }
960  // calculate needed buffer size and create buffer
961  const uint32 dataSize = picture.dataSize();
962  buffer = make_unique<char[]>(bufferSize = 1 + mimeTypeSize + 1 + 1 + descriptionSize
963  + (descriptionEncoding == TagTextEncoding::Utf16BigEndian || descriptionEncoding == TagTextEncoding::Utf16LittleEndian ? 4 : 1)
964  + dataSize);
965  // note: encoding byte + mime type size + 0 byte + picture type byte + description size + 1 or 4 null bytes (depends on encoding) + data size
966  char *offset = buffer.get();
967  // write encoding byte
968  *offset = makeTextEncodingByte(descriptionEncoding);
969  // write mime type
970  picture.mimeType().copy(++offset, mimeTypeSize);
971  *(offset += mimeTypeSize) = 0x00; // terminate mime type
972  // write picture type
973  *(++offset) = typeInfo;
974  // write description
975  offset += makeBom(offset + 1, descriptionEncoding);
976  if (convertedDescription.first) {
977  copy(convertedDescription.first.get(), convertedDescription.first.get() + descriptionSize, ++offset);
978  } else {
979  picture.description().copy(++offset, descriptionSize);
980  }
981  *(offset += descriptionSize) = 0x00; // terminate description and increase data offset
982  if (descriptionEncoding == TagTextEncoding::Utf16BigEndian || descriptionEncoding == TagTextEncoding::Utf16LittleEndian) {
983  *(++offset) = 0x00;
984  }
985  // write actual data
986  copy(picture.dataPointer(), picture.dataPointer() + picture.dataSize(), ++offset);
987 }
988 
992 void Id3v2Frame::makeComment(unique_ptr<char[]> &buffer, uint32 &bufferSize, const TagValue &comment, byte version, Diagnostics &diag)
993 {
994  static const string context("making comment frame");
995  // check type and other values are valid
996  TagTextEncoding encoding = comment.dataEncoding();
997  if (!comment.description().empty() && encoding != comment.descriptionEncoding()) {
998  diag.emplace_back(DiagLevel::Critical, "Data enoding and description encoding aren't equal.", context);
999  throw InvalidDataException();
1000  }
1001  const string &lng = comment.language();
1002  if (lng.length() > 3) {
1003  diag.emplace_back(DiagLevel::Critical, "The language must be 3 bytes long (ISO-639-2).", context);
1004  throw InvalidDataException();
1005  }
1006  StringData convertedDescription;
1007  string::size_type descriptionSize = comment.description().find(
1008  "\0\0", 0, encoding == TagTextEncoding::Utf16BigEndian || encoding == TagTextEncoding::Utf16LittleEndian ? 2 : 1);
1009  if (descriptionSize == string::npos) {
1010  descriptionSize = comment.description().size();
1011  }
1012  if (version < 4 && encoding == TagTextEncoding::Utf8) {
1013  // UTF-8 is only supported by ID3v2.4, so convert back to UTF-16
1015  convertedDescription = convertUtf8ToUtf16LE(comment.description().data(), descriptionSize);
1016  descriptionSize = convertedDescription.second;
1017  }
1018  // calculate needed buffer size and create buffer
1019  const auto data = comment.toString(encoding);
1020  buffer = make_unique<char[]>(bufferSize = 1 + 3 + descriptionSize + data.size()
1021  + (encoding == TagTextEncoding::Utf16BigEndian || encoding == TagTextEncoding::Utf16LittleEndian ? 6 : 1) + data.size());
1022  // note: encoding byte + language + description size + actual data size + BOMs and termination
1023  char *offset = buffer.get();
1024  // write encoding
1025  *offset = makeTextEncodingByte(encoding);
1026  // write language
1027  for (unsigned int i = 0; i < 3; ++i) {
1028  *(++offset) = (lng.length() > i) ? lng[i] : 0x00;
1029  }
1030  // write description
1031  offset += makeBom(offset + 1, encoding);
1032  if (convertedDescription.first) {
1033  copy(convertedDescription.first.get(), convertedDescription.first.get() + descriptionSize, ++offset);
1034  } else {
1035  comment.description().copy(++offset, descriptionSize);
1036  }
1037  offset += descriptionSize;
1038  *offset = 0x00; // terminate description and increase data offset
1040  *(++offset) = 0x00;
1041  }
1042  // write actual data
1043  offset += makeBom(offset + 1, encoding);
1044  data.copy(++offset, data.size());
1045 }
1046 
1047 } // namespace TagParser
uint16 flag() const
Returns the flags.
Definition: id3v2frame.h:187
bool isShortId(uint32 id)
Returns an indication whether the specified id is a short frame id.
Definition: id3v2frameids.h:76
void setTypeInfo(const TypeInfoType &typeInfo)
Sets the type info of the current TagField.
void makeLegacyPicture(std::unique_ptr< char[]> &buffer, uint32 &bufferSize, const TagValue &picture, byte typeInfo)
Writes the specified picture to the specified buffer (ID3v2.2 compatible).
Definition: id3v2frame.cpp:875
void makeComment(std::unique_ptr< char[]> &buffer, uint32 &bufferSize, const TagValue &comment, byte version, Diagnostics &diag)
Writes the specified comment to the specified buffer.
Definition: id3v2frame.cpp:992
bool hasGroupInformation() const
Returns whether the frame contains group information.
Definition: id3v2frame.h:260
byte group() const
Returns the group.
Definition: id3v2frame.h:285
TagTextEncoding descriptionEncoding() const
Returns the description encoding.
Definition: tagvalue.h:528
const std::string & description() const
Returns the description.
Definition: tagvalue.h:426
TAG_PARSER_EXPORT const char * version()
void setMimeType(const std::string &mimeType)
Sets the MIME type.
Definition: tagvalue.h:461
byte makeTextEncodingByte(TagTextEncoding textEncoding)
Returns a text encoding byte for the specified textEncoding.
Definition: id3v2frame.cpp:557
TagTextEncoding parseTextEncodingByte(byte textEncodingByte, Diagnostics &diag)
Returns the text encoding for the specified textEncodingByte.
Definition: id3v2frame.cpp:536
void make(IoUtilities::BinaryWriter &writer, byte version, Diagnostics &diag)
Writes the frame to a stream using the specified writer and the specified ID3v2 version.
Definition: id3v2frame.cpp:328
std::u16string parseWideString(const char *buffer, std::size_t dataSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag)
Parses a substring in the specified buffer.
Definition: id3v2frame.cpp:673
std::size_t dataSize() const
Returns the size of the assigned value in bytes.
Definition: tagvalue.h:399
STL namespace.
uint32 convertToLongId(uint32 id)
Converts the specified short frame ID to the equivalent long frame ID.
TAG_PARSER_EXPORT const char * comment()
std::string parseString(const char *buffer, std::size_t maxSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag)
Parses a substring in the specified buffer.
Definition: id3v2frame.cpp:660
bool isCompressed() const
Returns whether the frame is compressed.
Definition: id3v2frame.h:243
void makePicture(std::unique_ptr< char[]> &buffer, uint32 &bufferSize, const TagValue &picture, byte typeInfo, byte version)
Writes the specified picture to the specified buffer.
Definition: id3v2frame.cpp:934
const std::string & mimeType() const
Returns the MIME type.
Definition: tagvalue.h:450
void parseLegacyPicture(const char *buffer, std::size_t maxSize, TagValue &tagValue, byte &typeInfo, Diagnostics &diag)
Parses the ID3v2.2 picture from the specified buffer.
Definition: id3v2frame.cpp:716
void parseBom(const char *buffer, std::size_t maxSize, TagTextEncoding &encoding, Diagnostics &diag)
Parses a byte order mark from the specified buffer.
Definition: id3v2frame.cpp:690
void makeString(std::unique_ptr< char[]> &buffer, uint32 &bufferSize, const std::string &value, TagTextEncoding encoding)
Writes an encoding denoation and the specified string value to a buffer.
Definition: id3v2frame.cpp:805
uint32 convertToShortId(uint32 id)
Converts the specified long frame ID to the equivalent short frame ID.
void makeEncodingAndData(std::unique_ptr< char[]> &buffer, uint32 &bufferSize, TagTextEncoding encoding, const char *data, std::size_t m_dataSize)
Writes an encoding denoation and the specified data to a buffer.
Definition: id3v2frame.cpp:818
bool isLongId(uint32 id)
Returns an indication whether the specified id is a long frame id.
Definition: id3v2frameids.h:68
Contains utility classes helping to read and write streams.
bool isEncrypted() const
Returns whether the frame is encrypted.
Definition: id3v2frame.h:252
std::string frameIdString() const
Returns the frame ID as string.
Definition: id3v2frame.h:179
void assignPosition(PositionInSet value)
Assigns the given PositionInSet value.
Definition: tagvalue.h:291
void parseComment(const char *buffer, std::size_t maxSize, TagValue &tagValue, Diagnostics &diag)
Parses the comment/unsynchronized lyrics from the specified buffer.
Definition: id3v2frame.cpp:776
void assignStandardGenreIndex(int index)
Assigns the given standard genre index to be assigned.
Definition: tagvalue.h:322
const TypeInfoType & typeInfo() const
Returns the type info of the current TagField.
void parsePicture(const char *buffer, std::size_t maxSize, TagValue &tagValue, byte &typeInfo, Diagnostics &diag)
Parses the ID3v2.3 picture from the specified buffer.
Definition: id3v2frame.cpp:742
std::tuple< const char *, size_t, const char * > parseSubstring(const char *buffer, std::size_t maxSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag)
Parses a substring in the specified buffer.
Definition: id3v2frame.cpp:587
The TagField class is used by FieldMapBasedTag to store the fields.
int parseGenreIndex(const stringtype &denotation)
Helper function to parse the genre index.
Definition: id3v2frame.cpp:64
void setLanguage(const std::string &language)
Sets the language.
Definition: tagvalue.h:482
char * dataPointer()
Returns a pointer to the raw data assigned to the current instance.
Definition: tagvalue.h:410
const IdentifierType & id() const
Returns the id of the current TagField.
std::size_t makeBom(char *buffer, TagTextEncoding encoding)
Writes the BOM for the specified encoding to the specified buffer.
Definition: id3v2frame.cpp:858
bool isTextFrame(uint32 id)
Returns an indication whether the specified id is a text frame id.
Definition: id3v2frameids.h:84
void make(IoUtilities::BinaryWriter &writer)
Saves the frame (specified when constructing the object) using the specified writer.
Definition: id3v2frame.cpp:503
Id3v2Frame()
Constructs a new Id3v2Frame.
Definition: id3v2frame.cpp:36
void setDescription(const std::string &value, TagTextEncoding encoding=TagTextEncoding::Latin1)
Sets the description.
Definition: tagvalue.h:439
void assignTimeSpan(ChronoUtilities::TimeSpan value)
Assigns the given TimeSpan value.
Definition: tagvalue.h:304
constexpr int characterSize(TagTextEncoding encoding)
Returns the size of one character for the specified encoding in bytes.
Definition: tagvalue.h:34
uint32 dataSize() const
Returns the size of the data stored in the frame in bytes.
Definition: id3v2frame.h:211
void assignData(const char *data, size_t length, TagDataType type=TagDataType::Binary, TagTextEncoding encoding=TagTextEncoding::Latin1)
Assigns a copy of the given data.
Definition: tagvalue.cpp:620
Id3v2FrameMaker prepareMaking(byte version, Diagnostics &diag)
Prepares making.
Definition: id3v2frame.cpp:315
TagTextEncoding
Specifies the text encoding.
Definition: tagvalue.h:22
void parse(IoUtilities::BinaryReader &reader, uint32 version, uint32 maximalSize, Diagnostics &diag)
Parses a frame from the stream read using the specified reader.
Definition: id3v2frame.cpp:112
void setId(const IdentifierType &id)
Sets the id of the current Tag Field.
static void ensureHostByteOrder(std::u16string &u16str, TagTextEncoding currentEncoding)
Ensures the byte-order of the specified UTF-16 string matches the byte-order of the machine...
Definition: tagvalue.cpp:690
TagValue & value()
Returns the value of the current TagField.