tagparser/aac/aacframe.h

566 lines
14 KiB
C++

#ifndef AACFRAME_H
#define AACFRAME_H
#include <c++utilities/io/bitreader.h>
#include <memory>
namespace Media {
class AdtsFrame;
constexpr auto aacMaxChannels = 64;
constexpr auto aacMaxSyntaxElements = 48;
constexpr auto aacMaxWindowGroups = 8;
constexpr auto aacMaxSfb = 51;
constexpr auto aacMaxLtpSfb = 40;
constexpr auto aacMaxltpSfbS = 8;
constexpr auto aacInvalidSbrElement = 0xFF;
constexpr auto aacNoTimeSlots960 = 15;
constexpr auto aacNoTimeSlots = 16;
constexpr auto aacSbrRate = 2;
constexpr auto aacSbrM = 49;
constexpr auto aacSbrMaxLe = 5;
constexpr auto aacSbrMaxNtsrhfg = 40;
typedef const sbyte (*SbrHuffTab)[2];
namespace AacSyntaxElementTypes {
enum KnownTypes : byte
{
SingleChannelElement, /**< codes a single audio channel */
ChannelPairElement, /**< codes steroe signal */
ChannelCouplingElement, /**< something to do with channel coupling (not implemented in libfaad2) */
LowFrequencyElement, /**< low-frequency effects? referenced as "special effects" in RTP doc */
DataStreamElement, /**< user data */
ProgramConfigElement, /**< describes bitstream */
FillElement, /**< pad space/extension data */
EndOfFrame /**< marks the end of the frame */
};
}
namespace AacIcsSequenceTypes {
enum KnownTypes : byte {
OnlyLongSequence,
LongStartSequence,
EightShortSequence,
LongStopSequence
};
}
namespace AacScaleFactorTypes {
enum KnownTypes : byte {
ZeroHcb = 0,
FirstPairHcb = 5,
EscHcb = 11,
QuadLen = 4,
PairLen = 2,
NoiseHcb = 13,
IntensityHcb2 = 14,
IntensityHcb = 15
};
}
namespace AacExtensionTypes {
enum KnownTypes : byte {
Fill = 0,
FillData = 1,
DataElement = 2,
DynamicRange = 11,
SacData = 12,
SbrData = 13,
SbrDataCrc = 14
};
}
namespace BsFrameClasses {
enum BsFrameClass : byte {
FixFix,
FixVar,
VarFix,
VarVar
};
}
namespace AacSbrExtensionIds {
enum KnownIds : byte {
DrmParametricStereo = 0,
Ps = 2
};
}
struct LIB_EXPORT AacLtpInfo
{
AacLtpInfo();
byte lastBand;
byte dataPresent;
uint16 lag;
byte lagUpdate;
byte coef;
byte longUsed[aacMaxLtpSfb];
byte shortUsed[8];
byte shortLagPresent[8];
byte shortLag[8];
};
struct LIB_EXPORT AacPredictorInfo
{
AacPredictorInfo();
byte maxSfb;
byte reset;
byte resetGroupNumber;
byte predictionUsed[aacMaxSfb];
};
struct LIB_EXPORT AacPulseInfo
{
AacPulseInfo();
byte count;
byte startSfb;
byte offset[4];
byte amp[4];
};
struct LIB_EXPORT AacTnsInfo
{
AacTnsInfo();
byte filt[8];
byte coefRes[8];
byte length[8][4];
byte order[8][4];
byte direction[8][4];
byte coefCompress[8][4];
byte coef[8][4][32];
};
struct LIB_EXPORT AacSsrInfo
{
AacSsrInfo();
byte maxBand;
byte adjustNum[4][8];
byte alevcode[4][8][8];
byte aloccode[4][8][8];
};
struct LIB_EXPORT AacDrcInfo
{
AacDrcInfo();
byte present;
byte bandCount;
byte pceInstanceTag;
byte excludedChannelsPresent;
byte bandTop[17];
byte progRefLevel;
byte dynamicRangeSign[17];
byte dynamicRangeControl[17];
byte excludeMask[aacMaxChannels];
byte additionalExcludedChannels[aacMaxChannels];
};
struct LIB_EXPORT AacPsInfo
{
AacPsInfo();
byte headerRead;
byte use34HybridBands;
byte enableIID; // Inter-channel Intensity Difference
byte iidMode;
byte iidParCount;
byte iidopdParCount;
// TODO
};
struct LIB_EXPORT AacDrmPsInfo
{
AacDrmPsInfo();
byte headerRead;
byte use34HybridBands;
byte enableIID; // Inter-channel Intensity Difference
byte iidMode;
byte iidParCount;
byte iidopdParCount;
// TODO
};
struct LIB_EXPORT AacSbrInfo
{
AacSbrInfo(byte sbrElementType, uint16 samplingFrequency, uint16 frameLength, bool isDrm);
byte aacElementId;
byte samplingFrequency;
uint32 maxAacLine;
byte rate;
byte justSeeked;
byte ret;
byte ampRes[2];
byte k0;
byte kx;
byte m;
byte nMaster;
byte nHigh;
byte nLow;
byte nq;
byte nl[4];
byte n[2];
byte fMaster[64];
byte fTableRes[2][64];
byte fTableNoise[64];
byte fTableLim[4][64];
byte fGroup[5][64];
byte ng[5];
byte tableMapKToG[64];
byte absBordLead[2];
byte absBordTrail[2];
byte relLeadCount[2];
byte relTrailCount[2];
byte le[2];
byte lePrev[2];
byte lq[2];
byte te[2][aacSbrMaxLe + 1];
byte tq[2][3];
byte f[2][aacSbrMaxLe + 1];
byte fPrev[2];
//real_t *gTempPrev[2][5];
//real_t *qTempPrev[2][5];
//sbyte gqRingbufIndex[2];
int16 e[2][64][aacSbrMaxLe];
int16 ePrev[2][64];
//real_t eOrig[2][64][aacSbrMaxLe];
//real_t eCurr[2][64][aacSbrMaxLe];
int32 q[2][64][2];
//real_t qDiv[2][64][2];
//real_t qDiv2[2][64][2];
int32 qPrev[2][64];
sbyte la[2];
sbyte laPrev[2];
byte bsInvfMode[2][aacSbrMaxLe];
byte bsInvfModePrev[2][aacSbrMaxLe];
//real_t bwArray[2][64];
//real_t bwArrayPrev[2][64];
byte noPatches;
byte patchNoSubbands[64];
byte patchStartSubband[64];
byte bsAddHarmonic[2][64];
byte bsAddHarmonicPrev[2][64];
uint16 indexNoisePrev[2];
byte psiIsPrev[2];
byte bsStartFreqPrev;
byte bsStopFreqPrev;
byte bsXoverBandPrev;
byte bsFreqScalePrev;
byte bsAlterScalePrev;
byte bsNoiseBandsPrev;
sbyte prevEnvIsShort[2];
sbyte kxPrev;
byte bsco;
byte bscoPrev;
byte mPrev;
uint16 frameLength;
byte reset;
uint32 frame;
uint32 headerCount;
byte idAac;
//qmfa_info *qmfa[2];
//qmfs_info *qmfs[2];
//qmf_t Xsbr[2][aacSbrMaxNtsrhfg][64];
byte isDrmSbr;
std::shared_ptr<AacDrmPsInfo> drmPs;
byte timeSlotsRateCount;
byte timeSlotsCount;
byte tHfGen;
byte tHfAdj;
std::shared_ptr<AacPsInfo> ps;
byte psUsed;
byte psResetFlag;
byte bsHeaderFlag;
byte bsCrcFlag;
uint16 bsSbrCrcBits;
byte bsProtocolVersion;
byte bsAmpRes;
byte bsStartFreq;
byte bsStopFreq;
byte bsXoverBand;
byte bsFreqScale;
byte bsAlterScale;
byte bsNoiseBands;
byte bsLimiterBands;
byte bsLimiterGains;
byte bsInterpolFreq;
byte bsSmoothingMode;
byte bsSamplerateMode;
byte bsAddHarmonicFlag[2];
byte bsAddHarmonicFlagPrev[2];
byte bsExtendedData;
byte bsExtensionId;
byte bsExtensionData;
byte bsCoupling;
byte bsFrameClass[2];
byte bsRelBord[2][9];
byte bsRelBord0[2][9];
byte bsRelBord1[2][9];
byte bsPointer[2];
byte bsAbsBord0[2];
byte bsAbsBord1[2];
byte bsRelCount0[2];
byte bsRelCount1[2];
byte bsDfEnv[2][9];
byte bsDfNoise[2][3];
};
struct LIB_EXPORT AacProgramConfig
{
AacProgramConfig();
byte elementInstanceTag;
byte objectType;
byte samplingFrequencyIndex;
byte frontChannelElementCount;
byte sideChannelElementCount;
byte backChannelElementCount;
byte lfeChannelElementCount;
byte assocDataElementCount;
byte validCcElementCount;
byte monoMixdownPresent;
byte monoMixdownElementNumber;
byte stereoMixdownPresent;
byte stereoMixdownElementNumber;
byte matrixMixdownIdxPresent;
byte pseudoSurroundEnable;
byte matrixMixdownIdx;
byte frontElementIsCpe[16];
byte frontElementTagSelect[16];
byte sideElementIsCpe[16];
byte sideElementTagSelect[16];
byte backElementIsCpe[16];
byte backElementTagSelect[16];
byte lfeElementTagSelect[16];
byte assocDataElementTagSelect[16];
byte ccElementIsIndSw[16];
byte validCcElementTagSelect[16];
byte channels;
byte commentFieldBytes;
byte commentFieldData[257];
byte frontChannelCount;
byte sideChannelCount;
byte backChannelCount;
byte lfeChannelCount;
byte sceChannel[16];
byte cpeChannel[16];
};
struct LIB_EXPORT AacIcsInfo
{
AacIcsInfo();
byte maxSfb;
byte swbCount;
byte windowGroupCount;
byte windowCount;
byte windowSequence;
byte windowGroupLengths[8];
byte windowShape;
byte scaleFactorGrouping;
uint16 sectionSfbOffset[8][15 * 8];
uint16 swbOffset[52];
uint16 maxSwbOffset;
byte sectionCb[8][15 * 8];
uint16 sectionStart[8][15 * 8];
uint16 sectionEnd[8][15 * 8];
byte sfbCb[8][15 * 8];
byte sectionsPerGroup[8];
byte globalGain;
uint16 scaleFactors[8][51];
byte midSideCodingMaskPresent;
byte midSideCodingUsed[aacMaxWindowGroups][aacMaxSfb];
byte noiseUsed;
byte isUsed;
byte pulseDataPresent;
byte tnsDataPresent;
byte gainControlPresent;
byte predictorDataPresent;
AacPulseInfo pulse;
AacTnsInfo tns;
AacPredictorInfo predictor;
AacLtpInfo ltp1;
AacLtpInfo ltp2;
AacSsrInfo ssr;
std::shared_ptr<AacSbrInfo> sbr;
// error resilience
uint16 reorderedSpectralDataLength;
byte longestCodewordLength;
byte sfConcealment;
byte revGlobalGain;
uint16 rvlcSfLength;
uint16 dpcmNoiseNrg;
byte sfEscapesPresent;
byte rvlcEscapesLength;
uint16 dpcmNoiseLastPos;
};
class LIB_EXPORT AacFrameElementParser
{
public:
AacFrameElementParser(byte audioObjectId, byte samplingFrequencyIndex, byte extensionSamplingFrequencyIndex, byte channelConfig, uint16 frameLength = 1024);
void parse(const AdtsFrame &adtsFrame, std::unique_ptr<char []> &data, std::size_t dataSize);
void parse(const AdtsFrame &adtsFrame, std::istream &stream, std::size_t dataSize);
private:
void parseLtpInfo(const AacIcsInfo &ics, AacLtpInfo &ltp);
void parseIcsInfo(AacIcsInfo &ics);
void parseSectionData(AacIcsInfo &ics);
void decodeScaleFactorData(AacIcsInfo &ics);
void decodeRvlcScaleFactorData(AacIcsInfo &ics);
void parseScaleFactorData(AacIcsInfo &ics);
void parsePulseData(AacIcsInfo &ics);
void parseTnsData(AacIcsInfo &ics);
void parseGainControlData(AacIcsInfo &ics);
void parseSpectralData(AacIcsInfo &ics, int16 *specData);
void parseSideInfo(AacIcsInfo &ics, bool scaleFlag);
byte parseExcludedChannels();
byte parseDynamicRange();
static sbyte sbrLog2(const sbyte val);
int16 sbrHuffmanDec(SbrHuffTab table);
void parseSbrGrid(std::shared_ptr<AacSbrInfo> &sbr, byte channel);
void parseSbrDtdf(std::shared_ptr<AacSbrInfo> &sbr, byte channel);
void parseInvfMode(std::shared_ptr<AacSbrInfo> &sbr, byte channel);
void parseSbrEnvelope(std::shared_ptr<AacSbrInfo> &sbr, byte channel);
void parseSbrNoise(std::shared_ptr<AacSbrInfo> &sbr, byte channel);
void parseSbrSinusoidalCoding(std::shared_ptr<AacSbrInfo> &sbr, byte channel);
uint16 parseSbrExtension(std::shared_ptr<AacSbrInfo> &sbr, byte extensionId, byte bitsLeft);
uint16 parsePsData(std::shared_ptr<AacPsInfo> &ps, byte &header);
uint16 parseDrmPsData(std::shared_ptr<AacDrmPsInfo> &drmPs);
void parseSbrSingleChannelElement(std::shared_ptr<AacSbrInfo> &sbr);
void parseSbrChannelPairElement(std::shared_ptr<AacSbrInfo> &sbr);
std::shared_ptr<AacSbrInfo> makeSbrInfo(byte sbrElement, bool isDrm = false);
void parseSbrExtensionData(byte sbrElement, uint16 count, bool crcFlag);
byte parseHuffmanScaleFactor();
void parseHuffmanSpectralData(byte cb, int16 *sp);
void huffmanSignBits(int16 *sp, byte len);
void huffman2StepQuad(byte cb, int16 *sp);
void huffmanBinaryQuadSign(byte cb, int16 *sp);
void huffmanBinaryPair(byte cb, int16 *sp);
void huffman2StepPair(byte cb, int16 *sp);
void huffmanBinaryPairSign(byte cb, int16 *sp);
void huffman2StepPairSign(byte cb, int16 *sp);
int16 huffmanGetEscape(int16 sp);
constexpr static int16 huffmanCodebook(byte i);
static void vcb11CheckLav(byte cb, int16 *sp);
void calculateWindowGroupingInfo(AacIcsInfo &ics);
void parseIndividualChannelStream(AacIcsInfo &ics, int16 *specData, bool scaleFlag = false);
void parseSingleChannelElement();
void parseChannelPairElement();
void parseCouplingChannelElement();
void parseLowFrequencyElement();
void parseDataStreamElement();
void parseProgramConfigElement();
void parseFillElement(byte sbrElement = aacInvalidSbrElement);
void parseRawDataBlock();
// these fields contain setup information
IoUtilities::BitReader m_reader;
byte m_mpeg4AudioObjectId;
byte m_mpeg4SamplingFrequencyIndex;
byte m_mpeg4ExtensionSamplingFrequencyIndex;
byte m_mpeg4ChannelConfig;
uint16 m_frameLength;
byte m_aacSectionDataResilienceFlag;
byte m_aacScalefactorDataResilienceFlag;
byte m_aacSpectralDataResilienceFlag;
// these fields will be parsed
byte m_elementId[aacMaxChannels];
byte m_channelCount;
byte m_elementCount;
byte m_elementChannelCount[aacMaxSyntaxElements];
//byte m_channel;
//int16 m_pairedChannel;
byte m_elementInstanceTag[aacMaxSyntaxElements];
byte m_commonWindow;
AacIcsInfo m_ics1;
AacIcsInfo m_ics2;
AacDrcInfo m_drc;
AacProgramConfig m_pce;
byte m_sbrPresentFlag;
byte m_forceUpSampling;
byte m_downSampledSbr;
std::shared_ptr<AacSbrInfo> m_sbrElements[aacMaxSyntaxElements];
byte m_psUsed[aacMaxSyntaxElements];
byte m_psUsedGlobal;
byte m_psResetFlag;
};
/*!
* \brief Constructs a new parser with the specified setup information.
*/
inline AacFrameElementParser::AacFrameElementParser(byte audioObjectId, byte samplingFrequencyIndex, byte extensionSamplingFrequencyIndex, byte channelConfig, uint16 frameLength) :
m_reader(nullptr, nullptr),
m_mpeg4AudioObjectId(audioObjectId),
m_mpeg4SamplingFrequencyIndex(samplingFrequencyIndex),
m_mpeg4ExtensionSamplingFrequencyIndex(extensionSamplingFrequencyIndex),
m_mpeg4ChannelConfig(channelConfig),
m_frameLength(frameLength),
m_aacSpectralDataResilienceFlag(0),
m_elementId{0},
m_channelCount(0),
m_elementCount(0),
m_elementChannelCount{0},
m_elementInstanceTag{0},
m_commonWindow(0),
//m_channel(0),
//m_pairedChannel(0),
m_sbrPresentFlag(0),
m_forceUpSampling(0),
m_downSampledSbr(0),
m_sbrElements{0},
m_psUsed{0},
m_psUsedGlobal(0),
m_psResetFlag(0)
{}
inline sbyte AacFrameElementParser::sbrLog2(const sbyte val)
{
static const int log2tab[] = {0, 0, 1, 2, 2, 3, 3, 3, 3, 4};
return (val < 10 && val >= 0) ? log2tab[val] : 0;
}
constexpr int16 AacFrameElementParser::huffmanCodebook(byte i)
{
return static_cast<int16>(i ? (16428320 & 0xFFFF) : ((16428320 >> 16) & 0xFFFF));
}
}
#endif // AACFRAME_H