Tag Parser  10.0.1
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
aacframe.h
Go to the documentation of this file.
1 #ifndef TAG_PARSER_AACFRAME_H
2 #define TAG_PARSER_AACFRAME_H
3 
4 // NOTE: The AAC parser is still WIP. It does not work yet and its API/ABI may change even in patch releases.
5 
6 #include "../global.h"
7 
8 #include <c++utilities/io/bitreader.h>
9 
10 #include <cstdint>
11 #include <memory>
12 
13 namespace TagParser {
14 
16 
17 class AdtsFrame;
18 
19 constexpr auto aacMaxChannels = 64;
20 constexpr auto aacMaxSyntaxElements = 48;
21 constexpr auto aacMaxWindowGroups = 8;
22 constexpr auto aacMaxSfb = 51;
23 constexpr auto aacMaxLtpSfb = 40;
24 constexpr auto aacMaxltpSfbS = 8;
25 constexpr auto aacInvalidSbrElement = 0xFF;
26 constexpr auto aacNoTimeSlots960 = 15;
27 constexpr auto aacNoTimeSlots = 16;
28 constexpr auto aacSbrRate = 2;
29 constexpr auto aacSbrM = 49;
30 constexpr auto aacSbrMaxLe = 5;
31 constexpr auto aacSbrMaxNtsrhfg = 40;
32 
33 using SbrHuffTab = const std::int8_t (*)[2];
34 
35 namespace AacSyntaxElementTypes {
36 enum KnownTypes : std::uint8_t {
37  SingleChannelElement,
38  ChannelPairElement,
39  ChannelCouplingElement,
40  LowFrequencyElement,
41  DataStreamElement,
42  ProgramConfigElement,
43  FillElement,
44  EndOfFrame
45 };
46 }
47 
48 namespace AacIcsSequenceTypes {
49 enum KnownTypes : std::uint8_t { OnlyLongSequence, LongStartSequence, EightShortSequence, LongStopSequence };
50 }
51 
52 namespace AacScaleFactorTypes {
53 enum KnownTypes : std::uint8_t {
54  ZeroHcb = 0,
55  FirstPairHcb = 5,
56  EscHcb = 11,
57  QuadLen = 4,
58  PairLen = 2,
59  NoiseHcb = 13,
60  IntensityHcb2 = 14,
61  IntensityHcb = 15
62 };
63 }
64 
65 namespace AacExtensionTypes {
66 enum KnownTypes : std::uint8_t { Fill = 0, FillData = 1, DataElement = 2, DynamicRange = 11, SacData = 12, SbrData = 13, SbrDataCrc = 14 };
67 }
68 
69 namespace BsFrameClasses {
70 enum BsFrameClass : std::uint8_t { FixFix, FixVar, VarFix, VarVar };
71 }
72 
73 namespace AacSbrExtensionIds {
74 enum KnownIds : std::uint8_t { DrmParametricStereo = 0, Ps = 2 };
75 }
76 
77 struct TAG_PARSER_EXPORT AacLtpInfo {
78  AacLtpInfo();
79  std::uint8_t lastBand;
80  std::uint8_t dataPresent;
81  std::uint16_t lag;
82  std::uint8_t lagUpdate;
83  std::uint8_t coef;
84  std::uint8_t longUsed[aacMaxLtpSfb];
85  std::uint8_t shortUsed[8];
86  std::uint8_t shortLagPresent[8];
87  std::uint8_t shortLag[8];
88 };
89 
90 struct TAG_PARSER_EXPORT AacPredictorInfo {
91  AacPredictorInfo();
92  std::uint8_t maxSfb;
93  std::uint8_t reset;
94  std::uint8_t resetGroupNumber;
95  std::uint8_t predictionUsed[aacMaxSfb];
96 };
97 
98 struct TAG_PARSER_EXPORT AacPulseInfo {
99  AacPulseInfo();
100  std::uint8_t count;
101  std::uint8_t startSfb;
102  std::uint8_t offset[4];
103  std::uint8_t amp[4];
104 };
105 
106 struct TAG_PARSER_EXPORT AacTnsInfo {
107  AacTnsInfo();
108  std::uint8_t filt[8];
109  std::uint8_t coefRes[8];
110  std::uint8_t length[8][4];
111  std::uint8_t order[8][4];
112  std::uint8_t direction[8][4];
113  std::uint8_t coefCompress[8][4];
114  std::uint8_t coef[8][4][32];
115 };
116 
117 struct TAG_PARSER_EXPORT AacSsrInfo {
118  AacSsrInfo();
119  std::uint8_t maxBand;
120  std::uint8_t adjustNum[4][8];
121  std::uint8_t alevcode[4][8][8];
122  std::uint8_t aloccode[4][8][8];
123 };
124 
125 struct TAG_PARSER_EXPORT AacDrcInfo {
126  AacDrcInfo();
127  std::uint8_t present;
128  std::uint8_t bandCount;
129  std::uint8_t pceInstanceTag;
130  std::uint8_t excludedChannelsPresent;
131  std::uint8_t bandTop[17];
132  std::uint8_t progRefLevel;
133  std::uint8_t dynamicRangeSign[17];
134  std::uint8_t dynamicRangeControl[17];
135  std::uint8_t excludeMask[aacMaxChannels];
136  std::uint8_t additionalExcludedChannels[aacMaxChannels];
137 };
138 
139 struct TAG_PARSER_EXPORT AacPsInfo {
140  AacPsInfo();
141  std::uint8_t headerRead;
142  std::uint8_t use34HybridBands;
143  std::uint8_t enableIID; // Inter-channel Intensity Difference
144  std::uint8_t iidMode;
145  std::uint8_t iidParCount;
146  std::uint8_t iidopdParCount;
147  // TODO
148 };
149 
150 struct TAG_PARSER_EXPORT AacDrmPsInfo {
151  AacDrmPsInfo();
152  std::uint8_t headerRead;
153  std::uint8_t use34HybridBands;
154  std::uint8_t enableIID; // Inter-channel Intensity Difference
155  std::uint8_t iidMode;
156  std::uint8_t iidParCount;
157  std::uint8_t iidopdParCount;
158  // TODO
159 };
160 
161 struct TAG_PARSER_EXPORT AacSbrInfo {
162  AacSbrInfo(std::uint8_t sbrElementType, std::uint16_t samplingFrequency, std::uint16_t frameLength, bool isDrm);
163 
164  std::uint8_t aacElementId;
165  std::uint16_t samplingFrequency;
166 
167  std::uint32_t maxAacLine;
168 
169  std::uint8_t rate;
170  std::uint8_t justSeeked;
171  std::uint8_t ret;
172 
173  std::uint8_t ampRes[2];
174 
175  std::uint8_t k0;
176  std::uint8_t kx;
177  std::uint8_t m;
178  std::uint8_t nMaster;
179  std::uint8_t nHigh;
180  std::uint8_t nLow;
181  std::uint8_t nq;
182  std::uint8_t nl[4];
183  std::uint8_t n[2];
184 
185  std::uint8_t fMaster[64];
186  std::uint8_t fTableRes[2][64];
187  std::uint8_t fTableNoise[64];
188  std::uint8_t fTableLim[4][64];
189  std::uint8_t fGroup[5][64];
190  std::uint8_t ng[5];
191 
192  std::uint8_t tableMapKToG[64];
193 
194  std::uint8_t absBordLead[2];
195  std::uint8_t absBordTrail[2];
196  std::uint8_t relLeadCount[2];
197  std::uint8_t relTrailCount[2];
198 
199  std::uint8_t le[2];
200  std::uint8_t lePrev[2];
201  std::uint8_t lq[2];
202 
203  std::uint8_t te[2][aacSbrMaxLe + 1];
204  std::uint8_t tq[2][3];
205  std::uint8_t f[2][aacSbrMaxLe + 1];
206  std::uint8_t fPrev[2];
207 
208  //real_t *gTempPrev[2][5];
209  //real_t *qTempPrev[2][5];
210  //sbyte gqRingbufIndex[2];
211 
212  std::int16_t e[2][64][aacSbrMaxLe];
213  std::int16_t ePrev[2][64];
214  //real_t eOrig[2][64][aacSbrMaxLe];
215  //real_t eCurr[2][64][aacSbrMaxLe];
216  std::int32_t q[2][64][2];
217  //real_t qDiv[2][64][2];
218  //real_t qDiv2[2][64][2];
219  std::int32_t qPrev[2][64];
220 
221  std::int8_t la[2];
222  std::int8_t laPrev[2];
223 
224  std::uint8_t bsInvfMode[2][aacSbrMaxLe];
225  std::uint8_t bsInvfModePrev[2][aacSbrMaxLe];
226  //real_t bwArray[2][64];
227  //real_t bwArrayPrev[2][64];
228 
229  std::uint8_t noPatches;
230  std::uint8_t patchNoSubbands[64];
231  std::uint8_t patchStartSubband[64];
232 
233  std::uint8_t bsAddHarmonic[2][64];
234  std::uint8_t bsAddHarmonicPrev[2][64];
235 
236  std::uint16_t indexNoisePrev[2];
237  std::uint8_t psiIsPrev[2];
238 
239  std::uint8_t bsStartFreqPrev;
240  std::uint8_t bsStopFreqPrev;
241  std::uint8_t bsXoverBandPrev;
242  std::uint8_t bsFreqScalePrev;
243  std::uint8_t bsAlterScalePrev;
244  std::uint8_t bsNoiseBandsPrev;
245 
246  std::int8_t prevEnvIsShort[2];
247 
248  std::int8_t kxPrev;
249  std::uint8_t bsco;
250  std::uint8_t bscoPrev;
251  std::uint8_t mPrev;
252  std::uint16_t frameLength;
253 
254  std::uint8_t reset;
255  std::uint32_t frame;
256  std::uint32_t headerCount;
257 
258  std::uint8_t idAac;
259  //qmfa_info *qmfa[2];
260  //qmfs_info *qmfs[2];
261 
262  //qmf_t Xsbr[2][aacSbrMaxNtsrhfg][64];
263 
264  std::uint8_t isDrmSbr;
265  std::shared_ptr<AacDrmPsInfo> drmPs;
266 
267  std::uint8_t timeSlotsRateCount;
268  std::uint8_t timeSlotsCount;
269  std::uint8_t tHfGen;
270  std::uint8_t tHfAdj;
271 
272  std::shared_ptr<AacPsInfo> ps;
273  std::uint8_t psUsed;
274  std::uint8_t psResetFlag;
275 
276  std::uint8_t bsHeaderFlag;
277  std::uint8_t bsCrcFlag;
278  std::uint16_t bsSbrCrcBits;
279  std::uint8_t bsProtocolVersion;
280  std::uint8_t bsAmpRes;
281  std::uint8_t bsStartFreq;
282  std::uint8_t bsStopFreq;
283  std::uint8_t bsXoverBand;
284  std::uint8_t bsFreqScale;
285  std::uint8_t bsAlterScale;
286  std::uint8_t bsNoiseBands;
287  std::uint8_t bsLimiterBands;
288  std::uint8_t bsLimiterGains;
289  std::uint8_t bsInterpolFreq;
290  std::uint8_t bsSmoothingMode;
291  std::uint8_t bsSamplerateMode;
292  std::uint8_t bsAddHarmonicFlag[2];
293  std::uint8_t bsAddHarmonicFlagPrev[2];
294  std::uint8_t bsExtendedData;
295  std::uint8_t bsExtensionId;
296  std::uint8_t bsExtensionData;
297  std::uint8_t bsCoupling;
298  std::uint8_t bsFrameClass[2];
299  std::uint8_t bsRelBord[2][9];
300  std::uint8_t bsRelBord0[2][9];
301  std::uint8_t bsRelBord1[2][9];
302  std::uint8_t bsPointer[2];
303  std::uint8_t bsAbsBord0[2];
304  std::uint8_t bsAbsBord1[2];
305  std::uint8_t bsRelCount0[2];
306  std::uint8_t bsRelCount1[2];
307  std::uint8_t bsDfEnv[2][9];
308  std::uint8_t bsDfNoise[2][3];
309 };
310 
311 struct TAG_PARSER_EXPORT AacProgramConfig {
312  AacProgramConfig();
313  std::uint8_t elementInstanceTag;
314  std::uint8_t objectType;
315  std::uint8_t samplingFrequencyIndex;
316  std::uint8_t frontChannelElementCount;
317  std::uint8_t sideChannelElementCount;
318  std::uint8_t backChannelElementCount;
319  std::uint8_t lfeChannelElementCount;
320  std::uint8_t assocDataElementCount;
321  std::uint8_t validCcElementCount;
322  std::uint8_t monoMixdownPresent;
323  std::uint8_t monoMixdownElementNumber;
324  std::uint8_t stereoMixdownPresent;
325  std::uint8_t stereoMixdownElementNumber;
326  std::uint8_t matrixMixdownIdxPresent;
327  std::uint8_t pseudoSurroundEnable;
328  std::uint8_t matrixMixdownIdx;
329  std::uint8_t frontElementIsCpe[16];
330  std::uint8_t frontElementTagSelect[16];
331  std::uint8_t sideElementIsCpe[16];
332  std::uint8_t sideElementTagSelect[16];
333  std::uint8_t backElementIsCpe[16];
334  std::uint8_t backElementTagSelect[16];
335  std::uint8_t lfeElementTagSelect[16];
336  std::uint8_t assocDataElementTagSelect[16];
337  std::uint8_t ccElementIsIndSw[16];
338  std::uint8_t validCcElementTagSelect[16];
339  std::uint8_t channels;
340  std::uint8_t commentFieldBytes;
341  std::uint8_t commentFieldData[257];
342  std::uint8_t frontChannelCount;
343  std::uint8_t sideChannelCount;
344  std::uint8_t backChannelCount;
345  std::uint8_t lfeChannelCount;
346  std::uint8_t sceChannel[16];
347  std::uint8_t cpeChannel[16];
348 };
349 
350 struct TAG_PARSER_EXPORT AacIcsInfo {
351  AacIcsInfo();
352 
353  std::uint8_t maxSfb;
354 
355  std::uint8_t swbCount;
356  std::uint8_t windowGroupCount;
357  std::uint8_t windowCount;
358  std::uint8_t windowSequence;
359  std::uint8_t windowGroupLengths[8];
360  std::uint8_t windowShape;
361  std::uint8_t scaleFactorGrouping;
362  std::uint16_t sectionSfbOffset[8][15 * 8];
363  std::uint16_t swbOffset[52];
364  std::uint16_t maxSwbOffset;
365 
366  std::uint8_t sectionCb[8][15 * 8];
367  std::uint16_t sectionStart[8][15 * 8];
368  std::uint16_t sectionEnd[8][15 * 8];
369  std::uint8_t sfbCb[8][15 * 8];
370  std::uint8_t sectionsPerGroup[8];
371 
372  std::uint8_t globalGain;
373  std::uint16_t scaleFactors[8][51];
374 
375  std::uint8_t midSideCodingMaskPresent;
376  std::uint8_t midSideCodingUsed[aacMaxWindowGroups][aacMaxSfb];
377 
378  std::uint8_t noiseUsed;
379  std::uint8_t isUsed;
380 
381  std::uint8_t pulseDataPresent;
382  std::uint8_t tnsDataPresent;
383  std::uint8_t gainControlPresent;
384  std::uint8_t predictorDataPresent;
385 
386  AacPulseInfo pulse;
387  AacTnsInfo tns;
388  AacPredictorInfo predictor;
389  AacLtpInfo ltp1;
390  AacLtpInfo ltp2;
391  AacSsrInfo ssr;
392  std::shared_ptr<AacSbrInfo> sbr;
393 
394  // error resilience
395  std::uint16_t reorderedSpectralDataLength;
396  std::uint8_t longestCodewordLength;
397  std::uint8_t sfConcealment;
398  std::uint8_t revGlobalGain;
399  std::uint16_t rvlcSfLength;
400  std::uint16_t dpcmNoiseNrg;
401  std::uint8_t sfEscapesPresent;
402  std::uint8_t rvlcEscapesLength;
403  std::uint16_t dpcmNoiseLastPos;
404 };
405 
406 class TAG_PARSER_EXPORT AacFrameElementParser {
407 public:
408  AacFrameElementParser(std::uint8_t audioObjectId, std::uint8_t samplingFrequencyIndex, std::uint8_t extensionSamplingFrequencyIndex,
409  std::uint8_t channelConfig, std::uint16_t frameLength = 1024);
410 
411  void parse(const AdtsFrame &adtsFrame, std::unique_ptr<char[]> &data, std::size_t dataSize);
412  void parse(const AdtsFrame &adtsFrame, std::istream &stream, std::size_t dataSize);
413 
414 private:
415  void parseLtpInfo(const AacIcsInfo &ics, AacLtpInfo &ltp);
416  void parseIcsInfo(AacIcsInfo &ics);
417  void parseSectionData(AacIcsInfo &ics);
418  void decodeScaleFactorData(AacIcsInfo &ics);
419  void decodeRvlcScaleFactorData(AacIcsInfo &ics);
420  void parseScaleFactorData(AacIcsInfo &ics);
421  void parsePulseData(AacIcsInfo &ics);
422  void parseTnsData(AacIcsInfo &ics);
423  void parseGainControlData(AacIcsInfo &ics);
424  void parseSpectralData(AacIcsInfo &ics, std::int16_t *specData);
425  void parseSideInfo(AacIcsInfo &ics, bool scaleFlag);
426  std::uint8_t parseExcludedChannels();
427  std::uint8_t parseDynamicRange();
428  static std::int8_t sbrLog2(const std::int8_t val);
429  std::int16_t sbrHuffmanDec(SbrHuffTab table);
430  void parseSbrGrid(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
431  void parseSbrDtdf(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
432  void parseInvfMode(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
433  void parseSbrEnvelope(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
434  void parseSbrNoise(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
435  void parseSbrSinusoidalCoding(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
436  std::uint16_t parseSbrExtension(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t extensionId, std::uint8_t bitsLeft);
437  std::uint16_t parsePsData(std::shared_ptr<AacPsInfo> &ps, std::uint8_t &header);
438  std::uint16_t parseDrmPsData(std::shared_ptr<AacDrmPsInfo> &drmPs);
439  void parseSbrSingleChannelElement(std::shared_ptr<AacSbrInfo> &sbr);
440  void parseSbrChannelPairElement(std::shared_ptr<AacSbrInfo> &sbr);
441  std::shared_ptr<AacSbrInfo> makeSbrInfo(std::uint8_t sbrElement, bool isDrm = false);
442  void parseSbrExtensionData(std::uint8_t sbrElement, std::uint16_t count, bool crcFlag);
443  std::uint8_t parseHuffmanScaleFactor();
444  void parseHuffmanSpectralData(std::uint8_t cb, std::int16_t *sp);
445  void huffmanSignBits(std::int16_t *sp, std::uint8_t len);
446  void huffman2StepQuad(std::uint8_t cb, std::int16_t *sp);
447  void huffmanBinaryQuadSign(std::uint8_t cb, std::int16_t *sp);
448  void huffmanBinaryPair(std::uint8_t cb, std::int16_t *sp);
449  void huffman2StepPair(std::uint8_t cb, std::int16_t *sp);
450  void huffmanBinaryPairSign(std::uint8_t cb, std::int16_t *sp);
451  void huffman2StepPairSign(std::uint8_t cb, std::int16_t *sp);
452  std::int16_t huffmanGetEscape(std::int16_t sp);
453  constexpr static std::int16_t huffmanCodebook(std::uint8_t i);
454  static void vcb11CheckLav(std::uint8_t cb, std::int16_t *sp);
455  void calculateWindowGroupingInfo(AacIcsInfo &ics);
456  void parseIndividualChannelStream(AacIcsInfo &ics, std::int16_t *specData, bool scaleFlag = false);
457  void parseSingleChannelElement();
458  void parseChannelPairElement();
459  void parseCouplingChannelElement();
460  void parseLowFrequencyElement();
461  void parseDataStreamElement();
462  void parseProgramConfigElement();
463  void parseFillElement(std::uint8_t sbrElement = aacInvalidSbrElement);
464  void parseRawDataBlock();
465 
466  // these fields contain setup information
467  CppUtilities::BitReader m_reader;
468  std::uint8_t m_mpeg4AudioObjectId;
469  std::uint8_t m_mpeg4SamplingFrequencyIndex;
470  std::uint8_t m_mpeg4ExtensionSamplingFrequencyIndex;
471  std::uint8_t m_mpeg4ChannelConfig;
472  std::uint16_t m_frameLength;
473  std::uint8_t m_aacSectionDataResilienceFlag;
474  std::uint8_t m_aacScalefactorDataResilienceFlag;
475  std::uint8_t m_aacSpectralDataResilienceFlag;
476  // these fields will be parsed
477  std::uint8_t m_elementId[aacMaxChannels];
478  std::uint8_t m_channelCount;
479  std::uint8_t m_elementCount;
480  std::uint8_t m_elementChannelCount[aacMaxSyntaxElements];
481  //std::uint8_t m_channel;
482  //std::int16_t m_pairedChannel;
483  std::uint8_t m_elementInstanceTag[aacMaxSyntaxElements];
484  std::uint8_t m_commonWindow;
485  AacIcsInfo m_ics1;
486  AacIcsInfo m_ics2;
487  AacDrcInfo m_drc;
488  AacProgramConfig m_pce;
489  std::uint8_t m_sbrPresentFlag;
490  //std::uint8_t m_forceUpSampling;
491  //std::uint8_t m_downSampledSbr;
492  std::shared_ptr<AacSbrInfo> m_sbrElements[aacMaxSyntaxElements];
493  std::uint8_t m_psUsed[aacMaxSyntaxElements];
494  std::uint8_t m_psUsedGlobal;
495  std::uint8_t m_psResetFlag;
496 };
497 
501 inline AacFrameElementParser::AacFrameElementParser(std::uint8_t audioObjectId, std::uint8_t samplingFrequencyIndex,
502  std::uint8_t extensionSamplingFrequencyIndex, std::uint8_t channelConfig, std::uint16_t frameLength)
503  : m_reader(nullptr, nullptr)
504  , m_mpeg4AudioObjectId(audioObjectId)
505  , m_mpeg4SamplingFrequencyIndex(samplingFrequencyIndex)
506  , m_mpeg4ExtensionSamplingFrequencyIndex(extensionSamplingFrequencyIndex)
507  , m_mpeg4ChannelConfig(channelConfig)
508  , m_frameLength(frameLength)
509  , m_aacSpectralDataResilienceFlag(0)
510  , m_elementId{ 0 }
511  , m_channelCount(0)
512  , m_elementCount(0)
513  , m_elementChannelCount{ 0 }
514  , m_elementInstanceTag{ 0 }
515  , m_commonWindow(0)
516  ,
517  //m_channel(0),
518  //m_pairedChannel(0),
519  m_sbrPresentFlag(0)
520  ,
521  //m_forceUpSampling(0),
522  //m_downSampledSbr(0),
523  m_sbrElements{ 0 }
524  , m_psUsed{ 0 }
525  , m_psUsedGlobal(0)
526  , m_psResetFlag(0)
527 {
528 }
529 
530 inline std::int8_t AacFrameElementParser::sbrLog2(const std::int8_t val)
531 {
532  static const std::int8_t log2tab[] = { 0, 0, 1, 2, 2, 3, 3, 3, 3, 4 };
533  return (val < 10 && val >= 0) ? log2tab[val] : 0;
534 }
535 
536 constexpr std::int16_t AacFrameElementParser::huffmanCodebook(std::uint8_t i)
537 {
538  return static_cast<std::int16_t>(i ? (16428320 & 0xFFFF) : ((16428320 >> 16) & 0xFFFF));
539 }
540 
542 
543 } // namespace TagParser
544 
545 #endif // TAG_PARSER_AACFRAME_H
#define TAG_PARSER_EXPORT
Marks the symbol to be exported by the tagparser library.
Contains all classes and functions of the TagInfo library.
Definition: aaccodebook.h:10