diff --git a/abstracttrack.cpp b/abstracttrack.cpp index f866cd9..2226ed7 100644 --- a/abstracttrack.cpp +++ b/abstracttrack.cpp @@ -39,7 +39,9 @@ AbstractTrack::AbstractTrack(istream &inputStream, ostream &outputStream, uint64 m_trackNumber(0), m_id(0), m_bitrate(0.0), - m_samplesPerSecond(0), + m_maxBitrate(0.0), + m_sampleRate(0), + m_extensionSampleRate(0), m_bitsPerSample(0), m_bytesPerSecond(0), m_channelCount(0), diff --git a/abstracttrack.h b/abstracttrack.h index 6dcd8fe..7de644a 100644 --- a/abstracttrack.h +++ b/abstracttrack.h @@ -33,8 +33,12 @@ enum class TrackType OggStream /**< The track is a Media::OggStream. */ }; +class MpegAudioFrameStream; + class LIB_EXPORT AbstractTrack : public StatusProvider { + friend class MpegAudioFrameStream; // this is a temporary solution, until I have a better design + public: virtual ~AbstractTrack(); @@ -58,10 +62,12 @@ public: const std::string name() const; const ChronoUtilities::TimeSpan &duration() const; double bitrate() const; + double maxBitrate() const; const ChronoUtilities::DateTime &creationTime() const; const ChronoUtilities::DateTime &modificationTime() const; const std::string &language() const; - uint32 samplesPerSecond() const; + uint32 sampleRate() const; + uint32 extensionSampleRate() const; uint16 bitsPerSample() const; uint16 channelCount() const; uint64 sampleCount() const; @@ -108,10 +114,12 @@ protected: std::string m_name; ChronoUtilities::TimeSpan m_duration; double m_bitrate; + double m_maxBitrate; ChronoUtilities::DateTime m_creationTime; ChronoUtilities::DateTime m_modificationTime; std::string m_language; - uint32 m_samplesPerSecond; + uint32 m_sampleRate; + uint32 m_extensionSampleRate; uint16 m_bitsPerSample; uint32 m_bytesPerSecond; uint16 m_channelCount; @@ -287,6 +295,14 @@ inline double AbstractTrack::bitrate() const return m_bitrate; } +/*! + * \brief Returns the maximum bitrate in kbit/s if known; otherwise returns zero. + */ +inline double AbstractTrack::maxBitrate() const +{ + return m_maxBitrate; +} + /*! * \brief Returns the creation time if known; otherwise returns a DateTime of zero ticks. */ @@ -316,9 +332,18 @@ inline const std::string &AbstractTrack::language() const /*! * \brief Returns the number of samples per second if known; otherwise returns 0. */ -inline uint32 AbstractTrack::samplesPerSecond() const +inline uint32 AbstractTrack::sampleRate() const { - return m_samplesPerSecond; + return m_sampleRate; +} + +/*! + * \brief Returns the number of samples per second if known; otherwise returns 0. + * \remarks This sample rate value takes extensions like SBR into account. + */ +inline uint32 AbstractTrack::extensionSampleRate() const +{ + return m_extensionSampleRate; } /*! diff --git a/matroska/matroskatrack.cpp b/matroska/matroskatrack.cpp index ca30c25..aa8c3e5 100644 --- a/matroska/matroskatrack.cpp +++ b/matroska/matroskatrack.cpp @@ -128,6 +128,7 @@ MediaFormat MatroskaTrack::codecIdToMediaFormat(const string &codecId) fmt.sub = SubFormats::AacMpeg2LowComplexityProfile; } else if(part3 == "SBR") { fmt.sub = SubFormats::AacMpeg2SpectralBandReplicationProfile; + fmt.extension = ExtensionFormats::SpectralBandReplication; } else if(part3 == "SSR") { fmt.sub = SubFormats::AacMpeg2ScalableSamplingRateProfile; } @@ -141,7 +142,7 @@ MediaFormat MatroskaTrack::codecIdToMediaFormat(const string &codecId) } else if(part3 == "SSR") { fmt.sub = SubFormats::AacMpeg4ScalableSamplingRateProfile; } else if(part3 == "LTP") { - fmt.sub = SubFormats::AacMpeg4LongTermPredictionProfile; + fmt.sub = SubFormats::AacMpeg4LongTermPrediction; } } } else if(part1 == "A_QUICKTIME") { @@ -279,7 +280,7 @@ void MatroskaTrack::internalParseHeader() m_channelCount = subElement->readUInteger(); break; case MatroskaIds::SamplingFrequency: - m_samplesPerSecond = subElement->readFloat(); + m_sampleRate = subElement->readFloat(); break; default: ; } diff --git a/mediaformat.cpp b/mediaformat.cpp index 1972f68..8db6d36 100644 --- a/mediaformat.cpp +++ b/mediaformat.cpp @@ -27,7 +27,12 @@ const char *MediaFormat::name() const case AacMpeg4LowComplexityProfile: return "Advanced Audio Coding Low Complexity Profile"; case AacMpeg4SpectralBandReplicationProfile: return "Advanced Audio Coding Low Complexity with Spectral Band Replication Profile"; case AacMpeg4ScalableSamplingRateProfile: return "Advanced Audio Coding Scaleable Sampling Rate Profile"; - case AacMpeg4LongTermPredictionProfile: return "Advanced Audio Coding Scalable Sampling Rate Profile"; + case AacMpeg4LongTermPrediction: return "Advanced Audio Coding Long Term Predicition"; + case AacMpeg4ERLowComplecityProfile: return "Advanced Audio Coding Error Resilient Low Complexity Profile"; + case AacMpeg4ERScalableSampingRateProfile: return "Advanced Audio Coding Error Resilient Scalable Sampling Rate Profile"; + case AacMpeg4ERLongTermPrediction: return "Advanced Audio Coding Error Resilient Long Term Predicition"; + case AacMpeg4ERLowDelay: return "Advanced Audio Coding Error Resilient Low Delay"; + case AacMpeg4EREnhancedLowDelay: return "Advanced Audio Coding Error Resilient Enhanced Low Delay"; default: return "Advanced Audio Coding"; } case GeneralMediaFormat::Ac3: return "Dolby Digital"; @@ -159,9 +164,14 @@ const char *MediaFormat::abbreviation() const case AacMpeg2ScalableSamplingRateProfile: return "MPEG-2 AAC-SSR"; case AacMpeg4MainProfile: return "MPEG-4 AAC Main"; case AacMpeg4LowComplexityProfile: return "MPEG-4 AAC-LC"; - case AacMpeg4SpectralBandReplicationProfile: return "MPEG-4 AAC-SBR"; + case AacMpeg4SpectralBandReplicationProfile: return "MPEG-4 HE-AAC"; case AacMpeg4ScalableSamplingRateProfile: return "MPEG-4 AAC-SSR"; - case AacMpeg4LongTermPredictionProfile: return "MPEG-4 AAC-LTP"; + case AacMpeg4LongTermPrediction: return "MPEG-4 AAC-LTP"; + case AacMpeg4ERLowComplecityProfile: return "MPEG-4 ER AAC-LC"; + case AacMpeg4ERScalableSampingRateProfile: return "MPEG-4 ER AAC-LC"; + case AacMpeg4ERLongTermPrediction: return "MPEG-4 ER AAC-LTP"; + case AacMpeg4ERLowDelay: return "MPEG-4 ER AAC-LD"; + case AacMpeg4EREnhancedLowDelay: return "MPEG-4 ER AAC-ELD"; default: return "AAC"; } case GeneralMediaFormat::Ac3: return "AC-3"; diff --git a/mediaformat.h b/mediaformat.h index 4dc93aa..0362278 100644 --- a/mediaformat.h +++ b/mediaformat.h @@ -106,7 +106,12 @@ enum AacProfile : unsigned char { AacMpeg4LowComplexityProfile, AacMpeg4SpectralBandReplicationProfile, AacMpeg4ScalableSamplingRateProfile, - AacMpeg4LongTermPredictionProfile + AacMpeg4LongTermPrediction, + AacMpeg4ERLowComplecityProfile, + AacMpeg4ERScalableSampingRateProfile, + AacMpeg4ERLongTermPrediction, + AacMpeg4ERLowDelay, + AacMpeg4EREnhancedLowDelay }; enum Mpeg2VideoProfile : unsigned char { @@ -152,27 +157,57 @@ enum ImageSubtitle : unsigned char { } +/*! + * \brief Encapsulates extension formats. + */ +namespace ExtensionFormats { +enum AudioFormatExtensions : unsigned char { + SpectralBandReplication = 1, + ParametricStereo = 2 +}; +} + class LIB_EXPORT MediaFormat { public: - MediaFormat(GeneralMediaFormat general = GeneralMediaFormat::Unknown, unsigned char sub = 0); + MediaFormat(GeneralMediaFormat general = GeneralMediaFormat::Unknown, unsigned char sub = 0, unsigned char extension = 0); const char *name() const; const char *abbreviation() const; operator bool() const; + MediaFormat &operator+=(const MediaFormat &other); GeneralMediaFormat general; unsigned char sub; + unsigned char extension; }; /*! * \brief Constructs a new media format. */ -inline MediaFormat::MediaFormat(GeneralMediaFormat general, unsigned char sub) : +inline MediaFormat::MediaFormat(GeneralMediaFormat general, unsigned char sub, unsigned char extension) : general(general), - sub(sub) + sub(sub), + extension(extension) {} +/*! + * \brief "Adds" information from another instance to the object. + */ +inline MediaFormat &MediaFormat::operator+=(const MediaFormat &other) +{ + if(other) { + general = other.general; + if(other.sub) { + sub = other.sub; + } + if(other.extension) { + extension = other.extension; + } + } + return *this; +} + /*! * \brief Returns whether the media format is known. */ diff --git a/mp4/mp4ids.cpp b/mp4/mp4ids.cpp index ce955da..ffbce7e 100644 --- a/mp4/mp4ids.cpp +++ b/mp4/mp4ids.cpp @@ -120,9 +120,9 @@ MediaFormat streamObjectTypeFormat(byte streamObjectTypeId) case Mpeg2VideoSpatialProfile: return MediaFormat(GeneralMediaFormat::Mpeg4Video, SubFormats::Mpeg2HighProfile); case Mpeg2VideoHighProfile: return MediaFormat(GeneralMediaFormat::Mpeg4Video, SubFormats::Mpeg2HighProfile); case Mpeg2Video422Profile: return MediaFormat(GeneralMediaFormat::Mpeg4Video, SubFormats::Mpeg2SimpleProfile); - case AacMainProfile: return MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg2MainProfile); - case AacLowComplexityProfile: return MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg2LowComplexityProfile); - case AacScaleableSamplingRateProfile: return MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg2ScalableSamplingRateProfile); + case Mpeg2AacMainProfile: return MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg2MainProfile); + case Mpeg2AacLowComplexityProfile: return MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg2LowComplexityProfile); + case Mpeg2AacScaleableSamplingRateProfile: return MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg2ScalableSamplingRateProfile); case Mpeg2Audio: return GeneralMediaFormat::Mpeg2Audio; case Mpeg1Video: return GeneralMediaFormat::Mpeg1Video; case Mpeg1Audio: return GeneralMediaFormat::Mpeg1Audio; @@ -188,6 +188,56 @@ const char *streamTypeName(byte streamTypeId) * \sa http://wiki.multimedia.cx/index.php?title=MPEG-4_Audio */ namespace Mpeg4AudioObjectIds { + +LIB_EXPORT MediaFormat idToMediaFormat(byte mpeg4AudioObjectId, bool sbrPresent, bool psPresent) +{ + MediaFormat fmt; + switch(mpeg4AudioObjectId) { + case AacMain: + fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4MainProfile); + break; + case AacLc: + fmt = MediaFormat(GeneralMediaFormat::Aac, sbrPresent ? SubFormats::AacMpeg4SpectralBandReplicationProfile : SubFormats::AacMpeg4LowComplexityProfile); + break; + case AacSsr: + fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4ScalableSamplingRateProfile); + break; + case AacLtp: + fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4LongTermPrediction); + break; + case AacScalable: + fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4ScalableSamplingRateProfile); + break; + case ErAacLc: + fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4ERLowComplecityProfile); + break; + case ErAacLtp: + fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4ERLongTermPrediction); + break; + case ErAacLd: + fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4ERLowDelay); + break; + case Layer1: + fmt = MediaFormat(GeneralMediaFormat::Mpeg1Audio, SubFormats::Mpeg1Layer1); + break; + case Layer2: + fmt = MediaFormat(GeneralMediaFormat::Mpeg1Audio, SubFormats::Mpeg1Layer2); + break; + case Layer3: + fmt = MediaFormat(GeneralMediaFormat::Mpeg1Audio, SubFormats::Mpeg1Layer3); + break; + default: + ; + } + if(sbrPresent) { + fmt.extension |= ExtensionFormats::SpectralBandReplication; + } + if(psPresent) { + fmt.extension |= ExtensionFormats::ParametricStereo; + } + return fmt; +} + } } diff --git a/mp4/mp4ids.h b/mp4/mp4ids.h index 7931810..ce8d7cd 100644 --- a/mp4/mp4ids.h +++ b/mp4/mp4ids.h @@ -1,6 +1,7 @@ #ifndef MP4TAGATOMNAMES_H #define MP4TAGATOMNAMES_H +#include #include namespace Media @@ -166,7 +167,7 @@ enum KnownValue : uint32 { Mp3CbrOnly = 0x6D730055 /**< MPEG-1 Layer 3 (constant bitrate only) */ }; -MediaFormat fourccToMediaFormat(uint32 fourccId); +LIB_EXPORT MediaFormat fourccToMediaFormat(uint32 fourccId); } @@ -206,9 +207,9 @@ enum KnownValue : byte { Mpeg2VideoSpatialProfile, /**< MPEG-2 Video Spatial Profile */ Mpeg2VideoHighProfile, /**< MPEG-2 Video High Profile */ Mpeg2Video422Profile, /**< MPEG-2 Video 422 Profile */ - AacMainProfile, /**< Advanced Audio Coding Main Profile */ - AacLowComplexityProfile, /**< Advanced Audio Coding Low Complexity Profile */ - AacScaleableSamplingRateProfile, /**< Advanced Audio Coding Scaleable Sampling Rate Profile */ + Mpeg2AacMainProfile, /**< Advanced Audio Coding Main Profile */ + Mpeg2AacLowComplexityProfile, /**< Advanced Audio Coding Low Complexity Profile */ + Mpeg2AacScaleableSamplingRateProfile, /**< Advanced Audio Coding Scaleable Sampling Rate Profile */ Mpeg2Audio, /**< MPEG-2 Audio */ Mpeg1Video, /**< MPEG-1 Video */ Mpeg1Audio, /**< MPEG-1 Audio */ @@ -233,7 +234,7 @@ enum KnownValue : byte { PrivateQcelp = 0xE1 /**< QCELP */ }; -MediaFormat streamObjectTypeFormat(byte streamObjectTypeId); +LIB_EXPORT MediaFormat streamObjectTypeFormat(byte streamObjectTypeId); } @@ -254,7 +255,7 @@ enum KnownValue : byte { StreamingText }; -const char *streamTypeName(byte streamTypeId); +LIB_EXPORT const char *streamTypeName(byte streamTypeId); } @@ -350,6 +351,9 @@ enum KnownValue : byte { LdMpegSurround, Usac /**< unified speech and audio coding */ }; + +LIB_EXPORT MediaFormat idToMediaFormat(byte mpeg4AudioObjectId, bool sbrPresent = false, bool psPresent = false); + } /*! diff --git a/mp4/mp4track.cpp b/mp4/mp4track.cpp index 3c7022a..bcc4a2f 100644 --- a/mp4/mp4track.cpp +++ b/mp4/mp4track.cpp @@ -4,11 +4,15 @@ #include "mp4ids.h" #include "mpeg4descriptor.h" +#include "../mpegaudio/mpegaudioframe.h" +#include "../mpegaudio/mpegaudioframestream.h" + #include "../exceptions.h" #include "../mediaformat.h" #include #include +#include #include #include @@ -31,10 +35,32 @@ MediaFormat fmtTable[] = { MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4MainProfile), MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4LowComplexityProfile), MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4ScalableSamplingRateProfile), - MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4LongTermPredictionProfile), + MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4LongTermPrediction), MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4SpectralBandReplicationProfile) }; +Mpeg4AudioSpecificConfig::Mpeg4AudioSpecificConfig() : + audioObjectType(0), + sampleFrequencyIndex(0xF), + sampleFrequency(0), + channelConfiguration(0), + extensionAudioObjectType(0), + sbrPresent(false), + psPresent(false), + extensionSampleFrequencyIndex(0xF), + extensionSampleFrequency(0), + extensionChannelConfiguration(0), + frameLengthFlag(false), + dependsOnCoreCoder(false), + coreCoderDelay(0), + extensionFlag(0), + layerNr(0), + numOfSubFrame(0), + layerLength(0), + resilienceFlags(0), + epConfig(0) +{} + /*! * \class Media::Mp4Track * \brief Implementation of Media::AbstractTrack for the MP4 container. @@ -59,8 +85,8 @@ Mp4Track::Mp4Track(Mp4Atom &trakAtom) : m_stscAtom(nullptr), m_stcoAtom(nullptr), m_stszAtom(nullptr), - m_codecConfigAtom(nullptr), - m_esDescAtom(nullptr), + //m_codecConfigAtom(nullptr), + //m_esDescAtom(nullptr), m_framesPerSample(1), m_chunkOffsetSize(4), m_chunkCount(0), @@ -413,15 +439,15 @@ vector Mp4Track::readChunkSizes() * - Returns an empty configuration for non-AVC tracks. * - Notifications might be added. */ -AvcConfiguration Mp4Track::parseAvcConfiguration() +AvcConfiguration Mp4Track::parseAvcConfiguration(Mp4Atom *avcConfigAtom) { AvcConfiguration config; - if(m_codecConfigAtom) { + if(avcConfigAtom) { try { - auto configSize = m_codecConfigAtom->dataSize(); - if(m_codecConfigAtom && configSize >= 5) { + auto configSize = avcConfigAtom->dataSize(); + if(avcConfigAtom && configSize >= 5) { // skip first byte (is always 1) - m_istream->seekg(m_codecConfigAtom->dataOffset() + 1); + m_istream->seekg(avcConfigAtom->dataOffset() + 1); // read profile, IDC level, NALU size length config.profileIdc = m_reader.readByte(); config.profileCompat = m_reader.readByte(); @@ -471,73 +497,211 @@ AvcConfiguration Mp4Track::parseAvcConfiguration() * - Notifications might be added. * \sa mpeg4ElementaryStreamInfo() */ -void Mp4Track::parseMpeg4ElementaryStreamInfo() +std::unique_ptr Mp4Track::parseMpeg4ElementaryStreamInfo(Mp4Atom *esDescAtom) { static const string context("parsing MPEG-4 elementary stream descriptor"); - if(m_esDescAtom) { - if(m_esDescAtom->dataSize() >= 12) { - m_istream->seekg(m_esDescAtom->dataOffset()); - // read version/flags - if(m_reader.readUInt32BE() != 0) { - addNotification(NotificationType::Warning, "Unknown version/flags.", context); + using namespace Mpeg4ElementaryStreamObjectIds; + unique_ptr esInfo; + if(esDescAtom->dataSize() >= 12) { + m_istream->seekg(esDescAtom->dataOffset()); + // read version/flags + if(m_reader.readUInt32BE() != 0) { + addNotification(NotificationType::Warning, "Unknown version/flags.", context); + } + // read extended descriptor + Mpeg4Descriptor esDesc(esDescAtom->container(), m_istream->tellg(), esDescAtom->dataSize() - 4); + try { + esDesc.parse(); + // check ID + if(esDesc.id() != Mpeg4DescriptorIds::ElementaryStreamDescr) { + addNotification(NotificationType::Critical, "Invalid descriptor found.", context); + throw Failure(); } - // read extended descriptor - Mpeg4Descriptor esDesc(m_esDescAtom->container(), m_istream->tellg(), m_esDescAtom->dataSize() - 4); - try { - esDesc.parse(); - // check ID - if(esDesc.id() != Mpeg4DescriptorIds::ElementaryStreamDescr) { - addNotification(NotificationType::Critical, "Invalid descriptor found.", context); - throw Failure(); + // read stream info + m_istream->seekg(esDesc.dataOffset()); + esInfo = make_unique(); + esInfo->id = m_reader.readUInt16BE(); + esInfo->esDescFlags = m_reader.readByte(); + if(esInfo->dependencyFlag()) { + esInfo->dependsOnId = m_reader.readUInt16BE(); + } + if(esInfo->urlFlag()) { + esInfo->url = m_reader.readString(m_reader.readByte()); + } + if(esInfo->ocrFlag()) { + esInfo->ocrId = m_reader.readUInt16BE(); + } + for(Mpeg4Descriptor *esDescChild = esDesc.denoteFirstChild(static_cast(m_istream->tellg()) - esDesc.startOffset()); esDescChild; esDescChild = esDescChild->nextSibling()) { + esDescChild->parse(); + switch(esDescChild->id()) { + case Mpeg4DescriptorIds::DecoderConfigDescr: + // read decoder config descriptor + m_istream->seekg(esDescChild->dataOffset()); + esInfo->objectTypeId = m_reader.readByte(); + esInfo->decCfgDescFlags = m_reader.readByte(); + esInfo->bufferSize = m_reader.readUInt24BE(); + esInfo->maxBitrate = m_reader.readUInt32BE(); + esInfo->averageBitrate = m_reader.readUInt32BE(); + for(Mpeg4Descriptor *decCfgDescChild = esDescChild->denoteFirstChild(esDescChild->headerSize() + 13); decCfgDescChild; decCfgDescChild = decCfgDescChild->nextSibling()) { + decCfgDescChild->parse(); + switch(decCfgDescChild->id()) { + case Mpeg4DescriptorIds::DecoderSpecificInfo: + // read decoder specific info + switch(esInfo->objectTypeId) { + case Aac: case Mpeg2AacMainProfile: case Mpeg2AacLowComplexityProfile: + case Mpeg2AacScaleableSamplingRateProfile: case Mpeg2Audio: case Mpeg1Audio: + esInfo->audioSpecificConfig = parseAudioSpecificConfig(decCfgDescChild); + default: + ; // TODO: covering remaining object types + } + break; + } + } + break; + case Mpeg4DescriptorIds::SlConfigDescr: + // uninteresting + break; } - // read stream info - m_istream->seekg(esDesc.dataOffset()); - m_esInfo = make_unique(); - m_esInfo->id = m_reader.readUInt16BE(); - m_esInfo->esDescFlags = m_reader.readByte(); - if(m_esInfo->dependencyFlag()) { - m_esInfo->dependsOnId = m_reader.readUInt16BE(); - } - if(m_esInfo->urlFlag()) { - m_esInfo->url = m_reader.readString(m_reader.readByte()); - } - if(m_esInfo->ocrFlag()) { - m_esInfo->ocrId = m_reader.readUInt16BE(); - } - for(Mpeg4Descriptor *esDescChild = esDesc.denoteFirstChild(static_cast(m_istream->tellg()) - esDesc.startOffset()); esDescChild; esDescChild = esDescChild->nextSibling()) { - esDescChild->parse(); - switch(esDescChild->id()) { - case Mpeg4DescriptorIds::DecoderConfigDescr: - // read decoder config descriptor - m_istream->seekg(esDescChild->dataOffset()); - m_esInfo->objectTypeId = m_reader.readByte(); - m_esInfo->decCfgDescFlags = m_reader.readByte(); - m_esInfo->bufferSize = m_reader.readUInt24BE(); - m_esInfo->maxBitrate = m_reader.readUInt32BE(); - m_esInfo->averageBitrate = m_reader.readUInt32BE(); - for(Mpeg4Descriptor *decCfgDescChild = esDescChild->denoteFirstChild(13); decCfgDescChild; decCfgDescChild = decCfgDescChild->nextSibling()) { - decCfgDescChild->parse(); - switch(esDescChild->id()) { - case Mpeg4DescriptorIds::DecoderSpecificInfo: - // read decoder specific info + } + } catch (Failure &) { + addNotification(NotificationType::Critical, "The MPEG-4 descriptor element structure is invalid.", context); + } + } else { + addNotification(NotificationType::Warning, "Elementary stream descriptor atom (esds) is truncated.", context); + } + return esInfo; +} - break; +/*! + * \brief Reads the audio specific configuration for the track. + * \remarks + * - Notifications might be added. + * \sa mpeg4ElementaryStreamInfo() + */ +unique_ptr Mp4Track::parseAudioSpecificConfig(Mpeg4Descriptor *decSpecInfoDesc) +{ + static const string context("parsing MPEG-4 audio specific config from elementary stream descriptor"); + using namespace Mpeg4AudioObjectIds; + // read config into buffer and construct BitReader for bitwise reading + m_istream->seekg(decSpecInfoDesc->dataOffset()); + cout << "audio cfg @" << decSpecInfoDesc->dataOffset() << endl; + auto buff = make_unique(decSpecInfoDesc->dataSize()); + m_istream->read(buff.get(), decSpecInfoDesc->dataSize()); + BitReader bitReader(buff.get(), decSpecInfoDesc->dataSize()); + auto audioCfg = make_unique(); + try { + // read audio object type + auto getAudioObjectType = [&audioCfg, &bitReader] { + byte objType = bitReader.readBits(5); + if(objType == 31) { + objType = 32 + bitReader.readBits(6); + } + return objType; + }; + audioCfg->audioObjectType = getAudioObjectType(); + // read sampling frequency + if((audioCfg->sampleFrequencyIndex = bitReader.readBits(4)) == 0xF) { + audioCfg->sampleFrequency = bitReader.readBits(24); + } + // read channel config + audioCfg->channelConfiguration = bitReader.readBits(4); + // read extension header + switch(audioCfg->audioObjectType) { + case Sbr: + case Ps: + audioCfg->extensionAudioObjectType = Sbr; + audioCfg->sbrPresent = true; + if((audioCfg->extensionSampleFrequencyIndex = bitReader.readBits(4)) == 0xF) { + audioCfg->extensionSampleFrequency = bitReader.readBits(24); + } + if((audioCfg->audioObjectType = getAudioObjectType()) == ErBsac) { + audioCfg->extensionChannelConfiguration = bitReader.readBits(4); + } + break; + } + switch(audioCfg->audioObjectType) { + case Ps: + audioCfg->psPresent = true; + break; + } + // read GA specific config + switch(audioCfg->audioObjectType) { + case AacMain: case AacLc: case AacLtp: case AacScalable: + case TwinVq: case ErAacLc: case ErAacLtp: case ErAacScalable: + case ErTwinVq: case ErBsac: case ErAacLd: + audioCfg->frameLengthFlag = bitReader.readBits(1); + if((audioCfg->dependsOnCoreCoder = bitReader.readBits(1))) { + audioCfg->coreCoderDelay = bitReader.readBits(14); + } + audioCfg->extensionFlag = bitReader.readBits(1); + if(audioCfg->channelConfiguration == 0) { + throw NotImplementedException(); // TODO: parse program_config_element + } + switch(audioCfg->audioObjectType) { + case AacScalable: case ErAacScalable: + audioCfg->layerNr = bitReader.readBits(3); + break; + default: + ; + } + if(audioCfg->extensionFlag == 1) { + switch(audioCfg->audioObjectType) { + case ErBsac: + audioCfg->numOfSubFrame = bitReader.readBits(5); + audioCfg->layerLength = bitReader.readBits(11); + break; + case ErAacLc: case ErAacLtp: case ErAacScalable: case ErAacLd: + audioCfg->resilienceFlags = bitReader.readBits(3); + break; + default: + ; + } + if(bitReader.readBits(1) == 1) { // extension flag 3 + throw NotImplementedException(); // TODO + } + } + break; + default: + throw NotImplementedException(); // TODO: cover remaining object types + } + // read error specific config + switch(audioCfg->audioObjectType) { + case ErAacLc: case ErAacLtp: case ErAacScalable: case ErTwinVq: + case ErBsac: case ErAacLd: case ErCelp: case ErHvxc: case ErHiln: + case ErParametric: case ErAacEld: + switch(audioCfg->epConfig = bitReader.readBits(2)) { + default: + throw NotImplementedException(); // TODO + } + break; + } + if(audioCfg->extensionAudioObjectType != 5 && bitReader.bitsAvailable() >= 16) { + uint16 syncExtensionType = bitReader.readBits(11); + if(syncExtensionType == 0x2B7) { + if((audioCfg->extensionAudioObjectType = getAudioObjectType()) == Sbr) { + if((audioCfg->sbrPresent = bitReader.readBits(1))) { + if((audioCfg->extensionSampleFrequencyIndex = bitReader.readBits(4)) == 0xF) { + audioCfg->extensionSampleFrequency = bitReader.readBits(24); + } + if(bitReader.bitsAvailable() >= 12) { + if((syncExtensionType = bitReader.readBits(11)) == 0x548) { + audioCfg->psPresent = bitReader.readBits(1); } } - break; - case Mpeg4DescriptorIds::SlConfigDescr: - // uninteresting - break; } } - } catch (Failure &) { - // notifications will be added in any case } - addNotifications(esDesc); - } else { - addNotification(NotificationType::Warning, "Elementary stream descriptor atom (esds) is truncated.", context); } + } catch(ios_base::failure &) { + if(m_istream->fail()) { + throw; // IO error caused by input stream + } + // IO error caused by bitReader + addNotification(NotificationType::Critical, "Audio specific configuration is truncated.", context); + } catch(NotImplementedException &) { + addNotification(NotificationType::Information, "Not implemented for the format of audio track.", context); } + return audioCfg; } /*! @@ -563,8 +727,8 @@ void Mp4Track::updateChunkOffsets(const vector &oldMdatOffsets, const vec if(oldMdatOffsets.size() == 0 || oldMdatOffsets.size() != newMdatOffsets.size()) { throw InvalidDataException(); } - static const unsigned int stcoDataBegin = 16; - uint64 startPos = m_stcoAtom->startOffset() + stcoDataBegin; + static const unsigned int stcoDataBegin = 8; + uint64 startPos = m_stcoAtom->dataOffset() + stcoDataBegin; uint64 endPos = startPos + m_stcoAtom->totalSize() - stcoDataBegin; m_istream->seekg(startPos); m_ostream->seekp(startPos); @@ -1026,6 +1190,10 @@ void Mp4Track::internalParseHeader() m_istream->seekg(12, ios_base::cur); // skip reserved bytes //name = reader.readString(hdlrAtom->size - 16 - 4 - 12); m_name = reader.readTerminatedString(m_hdlrAtom->totalSize() - 12 - 4 - 12, 0); + // read stco atom (only chunk count) + m_chunkOffsetSize = (m_stcoAtom->id() == Mp4AtomIds::ChunkOffset64) ? 8 : 4; + m_istream->seekg(m_stcoAtom->dataOffset() + 4); + m_chunkCount = reader.readUInt32BE(); // read stsd atom m_istream->seekg(m_stsdAtom->startOffset() + 12); // seek to beg, skip size, name, version and flags uint32 entryCount = reader.readUInt32BE(); @@ -1040,21 +1208,51 @@ void Mp4Track::internalParseHeader() m_formatId = interpretIntegerAsString(codecConfigContainerAtom->id()); m_format = Mp4FormatIds::fourccToMediaFormat(codecConfigContainerAtom->id()); // parse AVC configuration - m_codecConfigAtom = codecConfigContainerAtom->childById(Mp4AtomIds::AvcConfiguration); + //codecConfigContainerAtom->childById(Mp4AtomIds::AvcConfiguration); // parse MPEG-4 elementary stream descriptor - m_esDescAtom = codecConfigContainerAtom->childById(Mp4FormatExtensionIds::Mpeg4ElementaryStreamDescriptor); - if(!m_esDescAtom) { - m_esDescAtom = codecConfigContainerAtom->childById(Mp4FormatExtensionIds::Mpeg4ElementaryStreamDescriptor2); + Mp4Atom *esDescAtom = codecConfigContainerAtom->childById(Mp4FormatExtensionIds::Mpeg4ElementaryStreamDescriptor); + if(!esDescAtom) { + esDescAtom = codecConfigContainerAtom->childById(Mp4FormatExtensionIds::Mpeg4ElementaryStreamDescriptor2); } - try { - parseMpeg4ElementaryStreamInfo(); - if(m_esInfo) { - auto mediaFormat = Mpeg4ElementaryStreamObjectIds::streamObjectTypeFormat(m_esInfo->objectTypeId); - if(mediaFormat) { - m_format = mediaFormat; + if(esDescAtom) { + try { + if((m_esInfo = parseMpeg4ElementaryStreamInfo(esDescAtom))) { + m_format += Mpeg4ElementaryStreamObjectIds::streamObjectTypeFormat(m_esInfo->objectTypeId); + m_bitrate = static_cast(m_esInfo->averageBitrate) / 1000; + m_maxBitrate = static_cast(m_esInfo->maxBitrate) / 1000; + if(m_esInfo->audioSpecificConfig) { + // check the audio specific config for useful information + m_format += Mpeg4AudioObjectIds::idToMediaFormat(m_esInfo->audioSpecificConfig->audioObjectType, m_esInfo->audioSpecificConfig->sbrPresent, m_esInfo->audioSpecificConfig->psPresent); + if(m_esInfo->audioSpecificConfig->sampleFrequencyIndex == 0xF) { + m_sampleRate = m_esInfo->audioSpecificConfig->sampleFrequency; + } else if(m_esInfo->audioSpecificConfig->sampleFrequencyIndex < sizeof(sampleRateTable)) { + m_sampleRate = sampleRateTable[m_esInfo->audioSpecificConfig->sampleFrequencyIndex]; + } else { + addNotification(NotificationType::Warning, "Audio specific config has invalid sample frequency index.", context); + } + if(m_esInfo->audioSpecificConfig->extensionSampleFrequencyIndex == 0xF) { + m_extensionSampleRate = m_esInfo->audioSpecificConfig->extensionSampleFrequency; + } else if(m_esInfo->audioSpecificConfig->extensionSampleFrequencyIndex < sizeof(sampleRateTable)) { + m_extensionSampleRate = sampleRateTable[m_esInfo->audioSpecificConfig->extensionSampleFrequencyIndex]; + } else { + addNotification(NotificationType::Warning, "Audio specific config has invalid extension sample frequency index.", context); + } + } + // check the stream data for missing information + switch(m_format.general) { + case GeneralMediaFormat::Mpeg1Audio: case GeneralMediaFormat::Mpeg2Audio: { + MpegAudioFrame frame; + m_istream->seekg(m_stcoAtom->dataOffset() + 8); + m_istream->seekg(m_chunkOffsetSize == 8 ? reader.readUInt64BE() : reader.readUInt32BE()); + frame.parseHeader(*m_istream); + MpegAudioFrameStream::addInfo(frame, *this); + break; + } default: + ; + } } + } catch(Failure &) { } - } catch(Failure &) { } // seek to start offset of additional atom and skip reserved bytes and data reference index m_istream->seekg(codecConfigContainerAtom->startOffset() + 8 + 6 + 2); @@ -1064,7 +1262,11 @@ void Mp4Track::internalParseHeader() m_channelCount = reader.readUInt16BE(); m_bitsPerSample = reader.readUInt16BE(); m_istream->seekg(4, ios_base::cur); // skip reserved bytes - m_samplesPerSecond = reader.readUInt32BE() >> 16; + if(!m_sampleRate) { + m_sampleRate = reader.readUInt32BE() >> 16; + } else { + m_istream->seekg(4, ios_base::cur); + } break; case MediaType::Video: m_istream->seekg(16, ios_base::cur); // skip reserved bytes @@ -1333,11 +1535,9 @@ void Mp4Track::internalParseHeader() } } // caluculate average bitrate - m_bitrate = (static_cast(m_size) * 0.0078125) / m_duration.totalSeconds(); - // read stco atom (only chunk count) - m_chunkOffsetSize = (m_stcoAtom->id() == Mp4AtomIds::ChunkOffset64) ? 8 : 4; - m_istream->seekg(m_stcoAtom->dataOffset() + 4); - m_chunkCount = reader.readUInt32BE(); + if(m_bitrate < 0.01 && m_bitrate > -0.01) { + m_bitrate = (static_cast(m_size) * 0.0078125) / m_duration.totalSeconds(); + } // read stsc atom (only number of entries) m_istream->seekg(m_stscAtom->dataOffset() + 4); m_sampleToChunkEntryCount = reader.readUInt32BE(); diff --git a/mp4/mp4track.h b/mp4/mp4track.h index 7123cd1..0c2cea9 100644 --- a/mp4/mp4track.h +++ b/mp4/mp4track.h @@ -12,12 +12,46 @@ namespace Media { class Mp4Atom; +class Mpeg4Descriptor; + +class LIB_EXPORT Mpeg4AudioSpecificConfig +{ +public: + Mpeg4AudioSpecificConfig(); + + byte audioObjectType; + byte sampleFrequencyIndex; + uint32 sampleFrequency; + byte channelConfiguration; + byte extensionAudioObjectType; + bool sbrPresent; + bool psPresent; + byte extensionSampleFrequencyIndex; + uint32 extensionSampleFrequency; + byte extensionChannelConfiguration; + bool frameLengthFlag; + bool dependsOnCoreCoder; + uint16 coreCoderDelay; + byte extensionFlag; + byte layerNr; + byte numOfSubFrame; + uint16 layerLength; + byte resilienceFlags; + byte epConfig; +}; class LIB_EXPORT Mpeg4ElementaryStreamInfo { public: Mpeg4ElementaryStreamInfo(); + bool dependencyFlag() const; + bool urlFlag() const; + bool ocrFlag() const; + byte priority() const; + byte streamTypeId() const; + bool upstream() const; + uint16 id; byte esDescFlags; uint16 dependsOnId; @@ -28,13 +62,7 @@ public: uint32 bufferSize; uint32 maxBitrate; uint32 averageBitrate; - - bool dependencyFlag() const; - bool urlFlag() const; - bool ocrFlag() const; - byte priority() const; - byte streamTypeId() const; - bool upstream() const; + std::unique_ptr audioSpecificConfig; }; inline Mpeg4ElementaryStreamInfo::Mpeg4ElementaryStreamInfo() : @@ -84,33 +112,42 @@ class LIB_EXPORT Mp4Track : public AbstractTrack public: Mp4Track(Mp4Atom &trakAtom); ~Mp4Track(); - TrackType type() const; - Mp4Atom &trakAtom(); + // getter methods specific for MP4 tracks + Mp4Atom &trakAtom(); const std::vector &sampleSizes() const; unsigned int chunkOffsetSize() const; uint32 chunkCount() const; uint32 sampleToChunkEntryCount() const; const Mpeg4ElementaryStreamInfo *mpeg4ElementaryStreamInfo() const; + + // methods to parse configuration details from the track header + AvcConfiguration parseAvcConfiguration(Mp4Atom *avcConfigAtom); + std::unique_ptr parseMpeg4ElementaryStreamInfo(Mp4Atom *esDescAtom); + std::unique_ptr parseAudioSpecificConfig(Mpeg4Descriptor *decSpecInfoDesc); + + // methods to read the "index" (chunk offsets and sizes) std::vector readChunkOffsets(); std::vector > readSampleToChunkTable(); std::vector readChunkSizes(); - AvcConfiguration parseAvcConfiguration(); - bool hasMpeg4ElementaryStreamDesc() const; - void parseMpeg4ElementaryStreamInfo(); - void updateChunkOffsets(const std::vector &oldMdatOffsets, const std::vector &newMdatOffsets); - void updateChunkOffset(uint32 chunkIndex, uint64 offset); + + // methods to make the track header void makeTrack(); void makeTrackHeader(); void makeMedia(); void makeMediaInfo(); void makeSampleTable(); + // methods to update chunk offsets + void updateChunkOffsets(const std::vector &oldMdatOffsets, const std::vector &newMdatOffsets); + void updateChunkOffset(uint32 chunkIndex, uint64 offset); + protected: void internalParseHeader(); private: + // private helper methods uint64 accumulateSampleSizes(size_t &sampleIndex, size_t count); void addChunkSizeEntries(std::vector &chunkSizeTable, size_t count, size_t &sampleIndex, uint32 sampleCount); @@ -125,8 +162,8 @@ private: Mp4Atom *m_stscAtom; Mp4Atom *m_stcoAtom; Mp4Atom *m_stszAtom; - Mp4Atom *m_codecConfigAtom; - Mp4Atom *m_esDescAtom; + //Mp4Atom *m_codecConfigAtom; + //Mp4Atom *m_esDescAtom; uint16 m_framesPerSample; std::vector m_sampleSizes; unsigned int m_chunkOffsetSize; @@ -198,14 +235,6 @@ inline const Mpeg4ElementaryStreamInfo *Mp4Track::mpeg4ElementaryStreamInfo() co return m_esInfo.get(); } -/*! - * \brief Returns whether the track has an MPEG-4 elementary stream descriptor atom. - */ -inline bool Mp4Track::hasMpeg4ElementaryStreamDesc() const -{ - return m_esDescAtom != nullptr; -} - } #endif // MP4TRACK_H diff --git a/mp4/mpeg4descriptor.h b/mp4/mpeg4descriptor.h index bf2eb2f..79942c6 100644 --- a/mp4/mpeg4descriptor.h +++ b/mp4/mpeg4descriptor.h @@ -94,7 +94,11 @@ inline uint64 Mpeg4Descriptor::firstChildOffset() const */ inline Mpeg4Descriptor::implementationType *Mpeg4Descriptor::denoteFirstChild(uint32 relativeFirstChildOffset) { - m_firstChild.reset(new implementationType(static_cast(*this), startOffset() + relativeFirstChildOffset)); + if(relativeFirstChildOffset + 4 < dataSize()) { + m_firstChild.reset(new implementationType(static_cast(*this), startOffset() + relativeFirstChildOffset)); + } else { + m_firstChild.reset(); + } return m_firstChild.get(); } diff --git a/mpegaudio/mpegaudioframestream.cpp b/mpegaudio/mpegaudioframestream.cpp index 3f52979..dd5fd56 100644 --- a/mpegaudio/mpegaudioframestream.cpp +++ b/mpegaudio/mpegaudioframestream.cpp @@ -38,9 +38,20 @@ TrackType MpegAudioFrameStream::type() const return TrackType::MpegAudioFrameStream; } +/*! + * \brief Adds the information from the specified \a frame to the specified \a track. + */ +void MpegAudioFrameStream::addInfo(const MpegAudioFrame &frame, AbstractTrack &track) +{ + track.m_version = frame.mpegVersion(); + track.m_format = MediaFormat(GeneralMediaFormat::Mpeg1Audio, frame.layer()); + track.m_channelCount = frame.channelMode() == MpegChannelMode::SingleChannel ? 1 : 2; + track.m_sampleRate = frame.samperate(); +} + void MpegAudioFrameStream::internalParseHeader() { - const string context("parsing MPEG audio frame header"); + static const string context("parsing MPEG audio frame header"); if(!m_istream) { throw NoDataFoundException(); } @@ -53,15 +64,13 @@ void MpegAudioFrameStream::internalParseHeader() } m_istream->seekg(m_startOffset, ios_base::beg); // parse frame header - MpegAudioFrame frame; + m_frames.emplace_back(); + MpegAudioFrame &frame = m_frames.back(); frame.parseHeader(*m_istream); - m_version = frame.mpegVersion(); - m_format = MediaFormat(GeneralMediaFormat::Mpeg1Audio, frame.layer()); - m_channelCount = frame.channelMode() == MpegChannelMode::SingleChannel ? 1 : 2; - m_samplesPerSecond = frame.samperate(); + addInfo(frame, *this); if(frame.isXingBytesfieldPresent()) { uint32 xingSize = frame.xingBytesfield(); - if(xingSize != m_size) { + if(m_size && xingSize != m_size) { addNotification(NotificationType::Warning, "Real length MPEG of audio frames is not equal with value provided by Xing header. The Xing header value will be used.", context); m_size = xingSize; } @@ -71,7 +80,6 @@ void MpegAudioFrameStream::internalParseHeader() : frame.bitrate(); m_bytesPerSecond = m_bitrate * 125; m_duration = TimeSpan::fromSeconds(static_cast(m_size) / (m_bitrate * 128.0)); - m_frames.push_back(frame); } } diff --git a/mpegaudio/mpegaudioframestream.h b/mpegaudio/mpegaudioframestream.h index 8d0e30e..6ab4f05 100644 --- a/mpegaudio/mpegaudioframestream.h +++ b/mpegaudio/mpegaudioframestream.h @@ -16,6 +16,9 @@ public: ~MpegAudioFrameStream(); TrackType type() const; + + static void addInfo(const MpegAudioFrame &frame, AbstractTrack &track); + protected: void internalParseHeader(); diff --git a/ogg/oggstream.cpp b/ogg/oggstream.cpp index ffeb641..27507d5 100644 --- a/ogg/oggstream.cpp +++ b/ogg/oggstream.cpp @@ -78,7 +78,7 @@ void OggStream::internalParseHeader() VorbisIdentificationHeader ind; ind.parseHeader(iterator); m_channelCount = ind.channels(); - m_samplesPerSecond = ind.sampleRate(); + m_sampleRate = ind.sampleRate(); if(ind.nominalBitrate()) { m_bitrate = ind.nominalBitrate(); } else if(ind.maxBitrate() == ind.minBitrate()) { @@ -96,7 +96,7 @@ void OggStream::internalParseHeader() auto lastPage = find_if(pages.crbegin(), pages.crend(), pred); if(firstPage != pages.cend() && lastPage != pages.crend()) { m_sampleCount = lastPage->absoluteGranulePosition() - firstPage->absoluteGranulePosition(); - m_duration = TimeSpan::fromSeconds(static_cast(m_sampleCount) / m_samplesPerSecond); + m_duration = TimeSpan::fromSeconds(static_cast(m_sampleCount) / m_sampleRate); } } hasIdentificationHeader = true; diff --git a/wav/waveaudiostream.cpp b/wav/waveaudiostream.cpp index ca7bc82..356e164 100644 --- a/wav/waveaudiostream.cpp +++ b/wav/waveaudiostream.cpp @@ -58,11 +58,11 @@ void WaveAudioStream::internalParseHeader() m_format = GeneralMediaFormat::Unknown; } m_channelCount = m_reader.readUInt16LE(); - m_samplesPerSecond = m_reader.readUInt32LE(); + m_sampleRate = m_reader.readUInt32LE(); m_bytesPerSecond = m_reader.readUInt32LE(); m_chunkSize = m_reader.readUInt16LE(); m_bitsPerSample = m_reader.readUInt16LE(); - m_bitrate = m_bitsPerSample * m_samplesPerSecond * m_channelCount; + m_bitrate = m_bitsPerSample * m_sampleRate * m_channelCount; } else { m_format = GeneralMediaFormat::Unknown; } @@ -72,7 +72,7 @@ void WaveAudioStream::internalParseHeader() if(m_reader.readUInt32BE() == 0x64617461u) { m_size = m_reader.readUInt32LE(); m_sampleCount = m_size / m_chunkSize; - m_duration = ChronoUtilities::TimeSpan::fromSeconds(static_cast(m_sampleCount) / static_cast(m_samplesPerSecond)); + m_duration = ChronoUtilities::TimeSpan::fromSeconds(static_cast(m_sampleCount) / static_cast(m_sampleRate)); } else { throw NoDataFoundException(); }