improved detection of format of MP4 audio tracks

This commit is contained in:
Martchus 2015-06-10 01:28:22 +02:00
parent 30f4ad1e9a
commit 43c33adedf
14 changed files with 514 additions and 143 deletions

View File

@ -39,7 +39,9 @@ AbstractTrack::AbstractTrack(istream &inputStream, ostream &outputStream, uint64
m_trackNumber(0),
m_id(0),
m_bitrate(0.0),
m_samplesPerSecond(0),
m_maxBitrate(0.0),
m_sampleRate(0),
m_extensionSampleRate(0),
m_bitsPerSample(0),
m_bytesPerSecond(0),
m_channelCount(0),

View File

@ -33,8 +33,12 @@ enum class TrackType
OggStream /**< The track is a Media::OggStream. */
};
class MpegAudioFrameStream;
class LIB_EXPORT AbstractTrack : public StatusProvider
{
friend class MpegAudioFrameStream; // this is a temporary solution, until I have a better design
public:
virtual ~AbstractTrack();
@ -58,10 +62,12 @@ public:
const std::string name() const;
const ChronoUtilities::TimeSpan &duration() const;
double bitrate() const;
double maxBitrate() const;
const ChronoUtilities::DateTime &creationTime() const;
const ChronoUtilities::DateTime &modificationTime() const;
const std::string &language() const;
uint32 samplesPerSecond() const;
uint32 sampleRate() const;
uint32 extensionSampleRate() const;
uint16 bitsPerSample() const;
uint16 channelCount() const;
uint64 sampleCount() const;
@ -108,10 +114,12 @@ protected:
std::string m_name;
ChronoUtilities::TimeSpan m_duration;
double m_bitrate;
double m_maxBitrate;
ChronoUtilities::DateTime m_creationTime;
ChronoUtilities::DateTime m_modificationTime;
std::string m_language;
uint32 m_samplesPerSecond;
uint32 m_sampleRate;
uint32 m_extensionSampleRate;
uint16 m_bitsPerSample;
uint32 m_bytesPerSecond;
uint16 m_channelCount;
@ -287,6 +295,14 @@ inline double AbstractTrack::bitrate() const
return m_bitrate;
}
/*!
* \brief Returns the maximum bitrate in kbit/s if known; otherwise returns zero.
*/
inline double AbstractTrack::maxBitrate() const
{
return m_maxBitrate;
}
/*!
* \brief Returns the creation time if known; otherwise returns a DateTime of zero ticks.
*/
@ -316,9 +332,18 @@ inline const std::string &AbstractTrack::language() const
/*!
* \brief Returns the number of samples per second if known; otherwise returns 0.
*/
inline uint32 AbstractTrack::samplesPerSecond() const
inline uint32 AbstractTrack::sampleRate() const
{
return m_samplesPerSecond;
return m_sampleRate;
}
/*!
* \brief Returns the number of samples per second if known; otherwise returns 0.
* \remarks This sample rate value takes extensions like SBR into account.
*/
inline uint32 AbstractTrack::extensionSampleRate() const
{
return m_extensionSampleRate;
}
/*!

View File

@ -128,6 +128,7 @@ MediaFormat MatroskaTrack::codecIdToMediaFormat(const string &codecId)
fmt.sub = SubFormats::AacMpeg2LowComplexityProfile;
} else if(part3 == "SBR") {
fmt.sub = SubFormats::AacMpeg2SpectralBandReplicationProfile;
fmt.extension = ExtensionFormats::SpectralBandReplication;
} else if(part3 == "SSR") {
fmt.sub = SubFormats::AacMpeg2ScalableSamplingRateProfile;
}
@ -141,7 +142,7 @@ MediaFormat MatroskaTrack::codecIdToMediaFormat(const string &codecId)
} else if(part3 == "SSR") {
fmt.sub = SubFormats::AacMpeg4ScalableSamplingRateProfile;
} else if(part3 == "LTP") {
fmt.sub = SubFormats::AacMpeg4LongTermPredictionProfile;
fmt.sub = SubFormats::AacMpeg4LongTermPrediction;
}
}
} else if(part1 == "A_QUICKTIME") {
@ -279,7 +280,7 @@ void MatroskaTrack::internalParseHeader()
m_channelCount = subElement->readUInteger();
break;
case MatroskaIds::SamplingFrequency:
m_samplesPerSecond = subElement->readFloat();
m_sampleRate = subElement->readFloat();
break;
default: ;
}

View File

@ -27,7 +27,12 @@ const char *MediaFormat::name() const
case AacMpeg4LowComplexityProfile: return "Advanced Audio Coding Low Complexity Profile";
case AacMpeg4SpectralBandReplicationProfile: return "Advanced Audio Coding Low Complexity with Spectral Band Replication Profile";
case AacMpeg4ScalableSamplingRateProfile: return "Advanced Audio Coding Scaleable Sampling Rate Profile";
case AacMpeg4LongTermPredictionProfile: return "Advanced Audio Coding Scalable Sampling Rate Profile";
case AacMpeg4LongTermPrediction: return "Advanced Audio Coding Long Term Predicition";
case AacMpeg4ERLowComplecityProfile: return "Advanced Audio Coding Error Resilient Low Complexity Profile";
case AacMpeg4ERScalableSampingRateProfile: return "Advanced Audio Coding Error Resilient Scalable Sampling Rate Profile";
case AacMpeg4ERLongTermPrediction: return "Advanced Audio Coding Error Resilient Long Term Predicition";
case AacMpeg4ERLowDelay: return "Advanced Audio Coding Error Resilient Low Delay";
case AacMpeg4EREnhancedLowDelay: return "Advanced Audio Coding Error Resilient Enhanced Low Delay";
default: return "Advanced Audio Coding";
}
case GeneralMediaFormat::Ac3: return "Dolby Digital";
@ -159,9 +164,14 @@ const char *MediaFormat::abbreviation() const
case AacMpeg2ScalableSamplingRateProfile: return "MPEG-2 AAC-SSR";
case AacMpeg4MainProfile: return "MPEG-4 AAC Main";
case AacMpeg4LowComplexityProfile: return "MPEG-4 AAC-LC";
case AacMpeg4SpectralBandReplicationProfile: return "MPEG-4 AAC-SBR";
case AacMpeg4SpectralBandReplicationProfile: return "MPEG-4 HE-AAC";
case AacMpeg4ScalableSamplingRateProfile: return "MPEG-4 AAC-SSR";
case AacMpeg4LongTermPredictionProfile: return "MPEG-4 AAC-LTP";
case AacMpeg4LongTermPrediction: return "MPEG-4 AAC-LTP";
case AacMpeg4ERLowComplecityProfile: return "MPEG-4 ER AAC-LC";
case AacMpeg4ERScalableSampingRateProfile: return "MPEG-4 ER AAC-LC";
case AacMpeg4ERLongTermPrediction: return "MPEG-4 ER AAC-LTP";
case AacMpeg4ERLowDelay: return "MPEG-4 ER AAC-LD";
case AacMpeg4EREnhancedLowDelay: return "MPEG-4 ER AAC-ELD";
default: return "AAC";
}
case GeneralMediaFormat::Ac3: return "AC-3";

View File

@ -106,7 +106,12 @@ enum AacProfile : unsigned char {
AacMpeg4LowComplexityProfile,
AacMpeg4SpectralBandReplicationProfile,
AacMpeg4ScalableSamplingRateProfile,
AacMpeg4LongTermPredictionProfile
AacMpeg4LongTermPrediction,
AacMpeg4ERLowComplecityProfile,
AacMpeg4ERScalableSampingRateProfile,
AacMpeg4ERLongTermPrediction,
AacMpeg4ERLowDelay,
AacMpeg4EREnhancedLowDelay
};
enum Mpeg2VideoProfile : unsigned char {
@ -152,27 +157,57 @@ enum ImageSubtitle : unsigned char {
}
/*!
* \brief Encapsulates extension formats.
*/
namespace ExtensionFormats {
enum AudioFormatExtensions : unsigned char {
SpectralBandReplication = 1,
ParametricStereo = 2
};
}
class LIB_EXPORT MediaFormat
{
public:
MediaFormat(GeneralMediaFormat general = GeneralMediaFormat::Unknown, unsigned char sub = 0);
MediaFormat(GeneralMediaFormat general = GeneralMediaFormat::Unknown, unsigned char sub = 0, unsigned char extension = 0);
const char *name() const;
const char *abbreviation() const;
operator bool() const;
MediaFormat &operator+=(const MediaFormat &other);
GeneralMediaFormat general;
unsigned char sub;
unsigned char extension;
};
/*!
* \brief Constructs a new media format.
*/
inline MediaFormat::MediaFormat(GeneralMediaFormat general, unsigned char sub) :
inline MediaFormat::MediaFormat(GeneralMediaFormat general, unsigned char sub, unsigned char extension) :
general(general),
sub(sub)
sub(sub),
extension(extension)
{}
/*!
* \brief "Adds" information from another instance to the object.
*/
inline MediaFormat &MediaFormat::operator+=(const MediaFormat &other)
{
if(other) {
general = other.general;
if(other.sub) {
sub = other.sub;
}
if(other.extension) {
extension = other.extension;
}
}
return *this;
}
/*!
* \brief Returns whether the media format is known.
*/

View File

@ -120,9 +120,9 @@ MediaFormat streamObjectTypeFormat(byte streamObjectTypeId)
case Mpeg2VideoSpatialProfile: return MediaFormat(GeneralMediaFormat::Mpeg4Video, SubFormats::Mpeg2HighProfile);
case Mpeg2VideoHighProfile: return MediaFormat(GeneralMediaFormat::Mpeg4Video, SubFormats::Mpeg2HighProfile);
case Mpeg2Video422Profile: return MediaFormat(GeneralMediaFormat::Mpeg4Video, SubFormats::Mpeg2SimpleProfile);
case AacMainProfile: return MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg2MainProfile);
case AacLowComplexityProfile: return MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg2LowComplexityProfile);
case AacScaleableSamplingRateProfile: return MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg2ScalableSamplingRateProfile);
case Mpeg2AacMainProfile: return MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg2MainProfile);
case Mpeg2AacLowComplexityProfile: return MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg2LowComplexityProfile);
case Mpeg2AacScaleableSamplingRateProfile: return MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg2ScalableSamplingRateProfile);
case Mpeg2Audio: return GeneralMediaFormat::Mpeg2Audio;
case Mpeg1Video: return GeneralMediaFormat::Mpeg1Video;
case Mpeg1Audio: return GeneralMediaFormat::Mpeg1Audio;
@ -188,6 +188,56 @@ const char *streamTypeName(byte streamTypeId)
* \sa http://wiki.multimedia.cx/index.php?title=MPEG-4_Audio
*/
namespace Mpeg4AudioObjectIds {
LIB_EXPORT MediaFormat idToMediaFormat(byte mpeg4AudioObjectId, bool sbrPresent, bool psPresent)
{
MediaFormat fmt;
switch(mpeg4AudioObjectId) {
case AacMain:
fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4MainProfile);
break;
case AacLc:
fmt = MediaFormat(GeneralMediaFormat::Aac, sbrPresent ? SubFormats::AacMpeg4SpectralBandReplicationProfile : SubFormats::AacMpeg4LowComplexityProfile);
break;
case AacSsr:
fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4ScalableSamplingRateProfile);
break;
case AacLtp:
fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4LongTermPrediction);
break;
case AacScalable:
fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4ScalableSamplingRateProfile);
break;
case ErAacLc:
fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4ERLowComplecityProfile);
break;
case ErAacLtp:
fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4ERLongTermPrediction);
break;
case ErAacLd:
fmt = MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4ERLowDelay);
break;
case Layer1:
fmt = MediaFormat(GeneralMediaFormat::Mpeg1Audio, SubFormats::Mpeg1Layer1);
break;
case Layer2:
fmt = MediaFormat(GeneralMediaFormat::Mpeg1Audio, SubFormats::Mpeg1Layer2);
break;
case Layer3:
fmt = MediaFormat(GeneralMediaFormat::Mpeg1Audio, SubFormats::Mpeg1Layer3);
break;
default:
;
}
if(sbrPresent) {
fmt.extension |= ExtensionFormats::SpectralBandReplication;
}
if(psPresent) {
fmt.extension |= ExtensionFormats::ParametricStereo;
}
return fmt;
}
}
}

View File

@ -1,6 +1,7 @@
#ifndef MP4TAGATOMNAMES_H
#define MP4TAGATOMNAMES_H
#include <c++utilities/application/global.h>
#include <c++utilities/conversion/types.h>
namespace Media
@ -166,7 +167,7 @@ enum KnownValue : uint32 {
Mp3CbrOnly = 0x6D730055 /**< MPEG-1 Layer 3 (constant bitrate only) */
};
MediaFormat fourccToMediaFormat(uint32 fourccId);
LIB_EXPORT MediaFormat fourccToMediaFormat(uint32 fourccId);
}
@ -206,9 +207,9 @@ enum KnownValue : byte {
Mpeg2VideoSpatialProfile, /**< MPEG-2 Video Spatial Profile */
Mpeg2VideoHighProfile, /**< MPEG-2 Video High Profile */
Mpeg2Video422Profile, /**< MPEG-2 Video 422 Profile */
AacMainProfile, /**< Advanced Audio Coding Main Profile */
AacLowComplexityProfile, /**< Advanced Audio Coding Low Complexity Profile */
AacScaleableSamplingRateProfile, /**< Advanced Audio Coding Scaleable Sampling Rate Profile */
Mpeg2AacMainProfile, /**< Advanced Audio Coding Main Profile */
Mpeg2AacLowComplexityProfile, /**< Advanced Audio Coding Low Complexity Profile */
Mpeg2AacScaleableSamplingRateProfile, /**< Advanced Audio Coding Scaleable Sampling Rate Profile */
Mpeg2Audio, /**< MPEG-2 Audio */
Mpeg1Video, /**< MPEG-1 Video */
Mpeg1Audio, /**< MPEG-1 Audio */
@ -233,7 +234,7 @@ enum KnownValue : byte {
PrivateQcelp = 0xE1 /**< QCELP */
};
MediaFormat streamObjectTypeFormat(byte streamObjectTypeId);
LIB_EXPORT MediaFormat streamObjectTypeFormat(byte streamObjectTypeId);
}
@ -254,7 +255,7 @@ enum KnownValue : byte {
StreamingText
};
const char *streamTypeName(byte streamTypeId);
LIB_EXPORT const char *streamTypeName(byte streamTypeId);
}
@ -350,6 +351,9 @@ enum KnownValue : byte {
LdMpegSurround,
Usac /**< unified speech and audio coding */
};
LIB_EXPORT MediaFormat idToMediaFormat(byte mpeg4AudioObjectId, bool sbrPresent = false, bool psPresent = false);
}
/*!

View File

@ -4,11 +4,15 @@
#include "mp4ids.h"
#include "mpeg4descriptor.h"
#include "../mpegaudio/mpegaudioframe.h"
#include "../mpegaudio/mpegaudioframestream.h"
#include "../exceptions.h"
#include "../mediaformat.h"
#include <c++utilities/io/binaryreader.h>
#include <c++utilities/io/binarywriter.h>
#include <c++utilities/io/bitreader.h>
#include <locale>
#include <cmath>
@ -31,10 +35,32 @@ MediaFormat fmtTable[] = {
MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4MainProfile),
MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4LowComplexityProfile),
MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4ScalableSamplingRateProfile),
MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4LongTermPredictionProfile),
MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4LongTermPrediction),
MediaFormat(GeneralMediaFormat::Aac, SubFormats::AacMpeg4SpectralBandReplicationProfile)
};
Mpeg4AudioSpecificConfig::Mpeg4AudioSpecificConfig() :
audioObjectType(0),
sampleFrequencyIndex(0xF),
sampleFrequency(0),
channelConfiguration(0),
extensionAudioObjectType(0),
sbrPresent(false),
psPresent(false),
extensionSampleFrequencyIndex(0xF),
extensionSampleFrequency(0),
extensionChannelConfiguration(0),
frameLengthFlag(false),
dependsOnCoreCoder(false),
coreCoderDelay(0),
extensionFlag(0),
layerNr(0),
numOfSubFrame(0),
layerLength(0),
resilienceFlags(0),
epConfig(0)
{}
/*!
* \class Media::Mp4Track
* \brief Implementation of Media::AbstractTrack for the MP4 container.
@ -59,8 +85,8 @@ Mp4Track::Mp4Track(Mp4Atom &trakAtom) :
m_stscAtom(nullptr),
m_stcoAtom(nullptr),
m_stszAtom(nullptr),
m_codecConfigAtom(nullptr),
m_esDescAtom(nullptr),
//m_codecConfigAtom(nullptr),
//m_esDescAtom(nullptr),
m_framesPerSample(1),
m_chunkOffsetSize(4),
m_chunkCount(0),
@ -413,15 +439,15 @@ vector<uint64> Mp4Track::readChunkSizes()
* - Returns an empty configuration for non-AVC tracks.
* - Notifications might be added.
*/
AvcConfiguration Mp4Track::parseAvcConfiguration()
AvcConfiguration Mp4Track::parseAvcConfiguration(Mp4Atom *avcConfigAtom)
{
AvcConfiguration config;
if(m_codecConfigAtom) {
if(avcConfigAtom) {
try {
auto configSize = m_codecConfigAtom->dataSize();
if(m_codecConfigAtom && configSize >= 5) {
auto configSize = avcConfigAtom->dataSize();
if(avcConfigAtom && configSize >= 5) {
// skip first byte (is always 1)
m_istream->seekg(m_codecConfigAtom->dataOffset() + 1);
m_istream->seekg(avcConfigAtom->dataOffset() + 1);
// read profile, IDC level, NALU size length
config.profileIdc = m_reader.readByte();
config.profileCompat = m_reader.readByte();
@ -471,73 +497,211 @@ AvcConfiguration Mp4Track::parseAvcConfiguration()
* - Notifications might be added.
* \sa mpeg4ElementaryStreamInfo()
*/
void Mp4Track::parseMpeg4ElementaryStreamInfo()
std::unique_ptr<Mpeg4ElementaryStreamInfo> Mp4Track::parseMpeg4ElementaryStreamInfo(Mp4Atom *esDescAtom)
{
static const string context("parsing MPEG-4 elementary stream descriptor");
if(m_esDescAtom) {
if(m_esDescAtom->dataSize() >= 12) {
m_istream->seekg(m_esDescAtom->dataOffset());
// read version/flags
if(m_reader.readUInt32BE() != 0) {
addNotification(NotificationType::Warning, "Unknown version/flags.", context);
using namespace Mpeg4ElementaryStreamObjectIds;
unique_ptr<Mpeg4ElementaryStreamInfo> esInfo;
if(esDescAtom->dataSize() >= 12) {
m_istream->seekg(esDescAtom->dataOffset());
// read version/flags
if(m_reader.readUInt32BE() != 0) {
addNotification(NotificationType::Warning, "Unknown version/flags.", context);
}
// read extended descriptor
Mpeg4Descriptor esDesc(esDescAtom->container(), m_istream->tellg(), esDescAtom->dataSize() - 4);
try {
esDesc.parse();
// check ID
if(esDesc.id() != Mpeg4DescriptorIds::ElementaryStreamDescr) {
addNotification(NotificationType::Critical, "Invalid descriptor found.", context);
throw Failure();
}
// read extended descriptor
Mpeg4Descriptor esDesc(m_esDescAtom->container(), m_istream->tellg(), m_esDescAtom->dataSize() - 4);
try {
esDesc.parse();
// check ID
if(esDesc.id() != Mpeg4DescriptorIds::ElementaryStreamDescr) {
addNotification(NotificationType::Critical, "Invalid descriptor found.", context);
throw Failure();
// read stream info
m_istream->seekg(esDesc.dataOffset());
esInfo = make_unique<Mpeg4ElementaryStreamInfo>();
esInfo->id = m_reader.readUInt16BE();
esInfo->esDescFlags = m_reader.readByte();
if(esInfo->dependencyFlag()) {
esInfo->dependsOnId = m_reader.readUInt16BE();
}
if(esInfo->urlFlag()) {
esInfo->url = m_reader.readString(m_reader.readByte());
}
if(esInfo->ocrFlag()) {
esInfo->ocrId = m_reader.readUInt16BE();
}
for(Mpeg4Descriptor *esDescChild = esDesc.denoteFirstChild(static_cast<uint64>(m_istream->tellg()) - esDesc.startOffset()); esDescChild; esDescChild = esDescChild->nextSibling()) {
esDescChild->parse();
switch(esDescChild->id()) {
case Mpeg4DescriptorIds::DecoderConfigDescr:
// read decoder config descriptor
m_istream->seekg(esDescChild->dataOffset());
esInfo->objectTypeId = m_reader.readByte();
esInfo->decCfgDescFlags = m_reader.readByte();
esInfo->bufferSize = m_reader.readUInt24BE();
esInfo->maxBitrate = m_reader.readUInt32BE();
esInfo->averageBitrate = m_reader.readUInt32BE();
for(Mpeg4Descriptor *decCfgDescChild = esDescChild->denoteFirstChild(esDescChild->headerSize() + 13); decCfgDescChild; decCfgDescChild = decCfgDescChild->nextSibling()) {
decCfgDescChild->parse();
switch(decCfgDescChild->id()) {
case Mpeg4DescriptorIds::DecoderSpecificInfo:
// read decoder specific info
switch(esInfo->objectTypeId) {
case Aac: case Mpeg2AacMainProfile: case Mpeg2AacLowComplexityProfile:
case Mpeg2AacScaleableSamplingRateProfile: case Mpeg2Audio: case Mpeg1Audio:
esInfo->audioSpecificConfig = parseAudioSpecificConfig(decCfgDescChild);
default:
; // TODO: covering remaining object types
}
break;
}
}
break;
case Mpeg4DescriptorIds::SlConfigDescr:
// uninteresting
break;
}
// read stream info
m_istream->seekg(esDesc.dataOffset());
m_esInfo = make_unique<Mpeg4ElementaryStreamInfo>();
m_esInfo->id = m_reader.readUInt16BE();
m_esInfo->esDescFlags = m_reader.readByte();
if(m_esInfo->dependencyFlag()) {
m_esInfo->dependsOnId = m_reader.readUInt16BE();
}
if(m_esInfo->urlFlag()) {
m_esInfo->url = m_reader.readString(m_reader.readByte());
}
if(m_esInfo->ocrFlag()) {
m_esInfo->ocrId = m_reader.readUInt16BE();
}
for(Mpeg4Descriptor *esDescChild = esDesc.denoteFirstChild(static_cast<uint64>(m_istream->tellg()) - esDesc.startOffset()); esDescChild; esDescChild = esDescChild->nextSibling()) {
esDescChild->parse();
switch(esDescChild->id()) {
case Mpeg4DescriptorIds::DecoderConfigDescr:
// read decoder config descriptor
m_istream->seekg(esDescChild->dataOffset());
m_esInfo->objectTypeId = m_reader.readByte();
m_esInfo->decCfgDescFlags = m_reader.readByte();
m_esInfo->bufferSize = m_reader.readUInt24BE();
m_esInfo->maxBitrate = m_reader.readUInt32BE();
m_esInfo->averageBitrate = m_reader.readUInt32BE();
for(Mpeg4Descriptor *decCfgDescChild = esDescChild->denoteFirstChild(13); decCfgDescChild; decCfgDescChild = decCfgDescChild->nextSibling()) {
decCfgDescChild->parse();
switch(esDescChild->id()) {
case Mpeg4DescriptorIds::DecoderSpecificInfo:
// read decoder specific info
}
} catch (Failure &) {
addNotification(NotificationType::Critical, "The MPEG-4 descriptor element structure is invalid.", context);
}
} else {
addNotification(NotificationType::Warning, "Elementary stream descriptor atom (esds) is truncated.", context);
}
return esInfo;
}
break;
/*!
* \brief Reads the audio specific configuration for the track.
* \remarks
* - Notifications might be added.
* \sa mpeg4ElementaryStreamInfo()
*/
unique_ptr<Mpeg4AudioSpecificConfig> Mp4Track::parseAudioSpecificConfig(Mpeg4Descriptor *decSpecInfoDesc)
{
static const string context("parsing MPEG-4 audio specific config from elementary stream descriptor");
using namespace Mpeg4AudioObjectIds;
// read config into buffer and construct BitReader for bitwise reading
m_istream->seekg(decSpecInfoDesc->dataOffset());
cout << "audio cfg @" << decSpecInfoDesc->dataOffset() << endl;
auto buff = make_unique<char []>(decSpecInfoDesc->dataSize());
m_istream->read(buff.get(), decSpecInfoDesc->dataSize());
BitReader bitReader(buff.get(), decSpecInfoDesc->dataSize());
auto audioCfg = make_unique<Mpeg4AudioSpecificConfig>();
try {
// read audio object type
auto getAudioObjectType = [&audioCfg, &bitReader] {
byte objType = bitReader.readBits<byte>(5);
if(objType == 31) {
objType = 32 + bitReader.readBits<byte>(6);
}
return objType;
};
audioCfg->audioObjectType = getAudioObjectType();
// read sampling frequency
if((audioCfg->sampleFrequencyIndex = bitReader.readBits<byte>(4)) == 0xF) {
audioCfg->sampleFrequency = bitReader.readBits<uint32>(24);
}
// read channel config
audioCfg->channelConfiguration = bitReader.readBits<byte>(4);
// read extension header
switch(audioCfg->audioObjectType) {
case Sbr:
case Ps:
audioCfg->extensionAudioObjectType = Sbr;
audioCfg->sbrPresent = true;
if((audioCfg->extensionSampleFrequencyIndex = bitReader.readBits<byte>(4)) == 0xF) {
audioCfg->extensionSampleFrequency = bitReader.readBits<uint32>(24);
}
if((audioCfg->audioObjectType = getAudioObjectType()) == ErBsac) {
audioCfg->extensionChannelConfiguration = bitReader.readBits<byte>(4);
}
break;
}
switch(audioCfg->audioObjectType) {
case Ps:
audioCfg->psPresent = true;
break;
}
// read GA specific config
switch(audioCfg->audioObjectType) {
case AacMain: case AacLc: case AacLtp: case AacScalable:
case TwinVq: case ErAacLc: case ErAacLtp: case ErAacScalable:
case ErTwinVq: case ErBsac: case ErAacLd:
audioCfg->frameLengthFlag = bitReader.readBits<byte>(1);
if((audioCfg->dependsOnCoreCoder = bitReader.readBits<byte>(1))) {
audioCfg->coreCoderDelay = bitReader.readBits<byte>(14);
}
audioCfg->extensionFlag = bitReader.readBits<byte>(1);
if(audioCfg->channelConfiguration == 0) {
throw NotImplementedException(); // TODO: parse program_config_element
}
switch(audioCfg->audioObjectType) {
case AacScalable: case ErAacScalable:
audioCfg->layerNr = bitReader.readBits<byte>(3);
break;
default:
;
}
if(audioCfg->extensionFlag == 1) {
switch(audioCfg->audioObjectType) {
case ErBsac:
audioCfg->numOfSubFrame = bitReader.readBits<byte>(5);
audioCfg->layerLength = bitReader.readBits<uint16>(11);
break;
case ErAacLc: case ErAacLtp: case ErAacScalable: case ErAacLd:
audioCfg->resilienceFlags = bitReader.readBits<byte>(3);
break;
default:
;
}
if(bitReader.readBits<byte>(1) == 1) { // extension flag 3
throw NotImplementedException(); // TODO
}
}
break;
default:
throw NotImplementedException(); // TODO: cover remaining object types
}
// read error specific config
switch(audioCfg->audioObjectType) {
case ErAacLc: case ErAacLtp: case ErAacScalable: case ErTwinVq:
case ErBsac: case ErAacLd: case ErCelp: case ErHvxc: case ErHiln:
case ErParametric: case ErAacEld:
switch(audioCfg->epConfig = bitReader.readBits<byte>(2)) {
default:
throw NotImplementedException(); // TODO
}
break;
}
if(audioCfg->extensionAudioObjectType != 5 && bitReader.bitsAvailable() >= 16) {
uint16 syncExtensionType = bitReader.readBits<uint16>(11);
if(syncExtensionType == 0x2B7) {
if((audioCfg->extensionAudioObjectType = getAudioObjectType()) == Sbr) {
if((audioCfg->sbrPresent = bitReader.readBits<byte>(1))) {
if((audioCfg->extensionSampleFrequencyIndex = bitReader.readBits<byte>(4)) == 0xF) {
audioCfg->extensionSampleFrequency = bitReader.readBits<uint32>(24);
}
if(bitReader.bitsAvailable() >= 12) {
if((syncExtensionType = bitReader.readBits<uint16>(11)) == 0x548) {
audioCfg->psPresent = bitReader.readBits<byte>(1);
}
}
break;
case Mpeg4DescriptorIds::SlConfigDescr:
// uninteresting
break;
}
}
} catch (Failure &) {
// notifications will be added in any case
}
addNotifications(esDesc);
} else {
addNotification(NotificationType::Warning, "Elementary stream descriptor atom (esds) is truncated.", context);
}
} catch(ios_base::failure &) {
if(m_istream->fail()) {
throw; // IO error caused by input stream
}
// IO error caused by bitReader
addNotification(NotificationType::Critical, "Audio specific configuration is truncated.", context);
} catch(NotImplementedException &) {
addNotification(NotificationType::Information, "Not implemented for the format of audio track.", context);
}
return audioCfg;
}
/*!
@ -563,8 +727,8 @@ void Mp4Track::updateChunkOffsets(const vector<int64> &oldMdatOffsets, const vec
if(oldMdatOffsets.size() == 0 || oldMdatOffsets.size() != newMdatOffsets.size()) {
throw InvalidDataException();
}
static const unsigned int stcoDataBegin = 16;
uint64 startPos = m_stcoAtom->startOffset() + stcoDataBegin;
static const unsigned int stcoDataBegin = 8;
uint64 startPos = m_stcoAtom->dataOffset() + stcoDataBegin;
uint64 endPos = startPos + m_stcoAtom->totalSize() - stcoDataBegin;
m_istream->seekg(startPos);
m_ostream->seekp(startPos);
@ -1026,6 +1190,10 @@ void Mp4Track::internalParseHeader()
m_istream->seekg(12, ios_base::cur); // skip reserved bytes
//name = reader.readString(hdlrAtom->size - 16 - 4 - 12);
m_name = reader.readTerminatedString(m_hdlrAtom->totalSize() - 12 - 4 - 12, 0);
// read stco atom (only chunk count)
m_chunkOffsetSize = (m_stcoAtom->id() == Mp4AtomIds::ChunkOffset64) ? 8 : 4;
m_istream->seekg(m_stcoAtom->dataOffset() + 4);
m_chunkCount = reader.readUInt32BE();
// read stsd atom
m_istream->seekg(m_stsdAtom->startOffset() + 12); // seek to beg, skip size, name, version and flags
uint32 entryCount = reader.readUInt32BE();
@ -1040,21 +1208,51 @@ void Mp4Track::internalParseHeader()
m_formatId = interpretIntegerAsString<uint32>(codecConfigContainerAtom->id());
m_format = Mp4FormatIds::fourccToMediaFormat(codecConfigContainerAtom->id());
// parse AVC configuration
m_codecConfigAtom = codecConfigContainerAtom->childById(Mp4AtomIds::AvcConfiguration);
//codecConfigContainerAtom->childById(Mp4AtomIds::AvcConfiguration);
// parse MPEG-4 elementary stream descriptor
m_esDescAtom = codecConfigContainerAtom->childById(Mp4FormatExtensionIds::Mpeg4ElementaryStreamDescriptor);
if(!m_esDescAtom) {
m_esDescAtom = codecConfigContainerAtom->childById(Mp4FormatExtensionIds::Mpeg4ElementaryStreamDescriptor2);
Mp4Atom *esDescAtom = codecConfigContainerAtom->childById(Mp4FormatExtensionIds::Mpeg4ElementaryStreamDescriptor);
if(!esDescAtom) {
esDescAtom = codecConfigContainerAtom->childById(Mp4FormatExtensionIds::Mpeg4ElementaryStreamDescriptor2);
}
try {
parseMpeg4ElementaryStreamInfo();
if(m_esInfo) {
auto mediaFormat = Mpeg4ElementaryStreamObjectIds::streamObjectTypeFormat(m_esInfo->objectTypeId);
if(mediaFormat) {
m_format = mediaFormat;
if(esDescAtom) {
try {
if((m_esInfo = parseMpeg4ElementaryStreamInfo(esDescAtom))) {
m_format += Mpeg4ElementaryStreamObjectIds::streamObjectTypeFormat(m_esInfo->objectTypeId);
m_bitrate = static_cast<double>(m_esInfo->averageBitrate) / 1000;
m_maxBitrate = static_cast<double>(m_esInfo->maxBitrate) / 1000;
if(m_esInfo->audioSpecificConfig) {
// check the audio specific config for useful information
m_format += Mpeg4AudioObjectIds::idToMediaFormat(m_esInfo->audioSpecificConfig->audioObjectType, m_esInfo->audioSpecificConfig->sbrPresent, m_esInfo->audioSpecificConfig->psPresent);
if(m_esInfo->audioSpecificConfig->sampleFrequencyIndex == 0xF) {
m_sampleRate = m_esInfo->audioSpecificConfig->sampleFrequency;
} else if(m_esInfo->audioSpecificConfig->sampleFrequencyIndex < sizeof(sampleRateTable)) {
m_sampleRate = sampleRateTable[m_esInfo->audioSpecificConfig->sampleFrequencyIndex];
} else {
addNotification(NotificationType::Warning, "Audio specific config has invalid sample frequency index.", context);
}
if(m_esInfo->audioSpecificConfig->extensionSampleFrequencyIndex == 0xF) {
m_extensionSampleRate = m_esInfo->audioSpecificConfig->extensionSampleFrequency;
} else if(m_esInfo->audioSpecificConfig->extensionSampleFrequencyIndex < sizeof(sampleRateTable)) {
m_extensionSampleRate = sampleRateTable[m_esInfo->audioSpecificConfig->extensionSampleFrequencyIndex];
} else {
addNotification(NotificationType::Warning, "Audio specific config has invalid extension sample frequency index.", context);
}
}
// check the stream data for missing information
switch(m_format.general) {
case GeneralMediaFormat::Mpeg1Audio: case GeneralMediaFormat::Mpeg2Audio: {
MpegAudioFrame frame;
m_istream->seekg(m_stcoAtom->dataOffset() + 8);
m_istream->seekg(m_chunkOffsetSize == 8 ? reader.readUInt64BE() : reader.readUInt32BE());
frame.parseHeader(*m_istream);
MpegAudioFrameStream::addInfo(frame, *this);
break;
} default:
;
}
}
} catch(Failure &) {
}
} catch(Failure &) {
}
// seek to start offset of additional atom and skip reserved bytes and data reference index
m_istream->seekg(codecConfigContainerAtom->startOffset() + 8 + 6 + 2);
@ -1064,7 +1262,11 @@ void Mp4Track::internalParseHeader()
m_channelCount = reader.readUInt16BE();
m_bitsPerSample = reader.readUInt16BE();
m_istream->seekg(4, ios_base::cur); // skip reserved bytes
m_samplesPerSecond = reader.readUInt32BE() >> 16;
if(!m_sampleRate) {
m_sampleRate = reader.readUInt32BE() >> 16;
} else {
m_istream->seekg(4, ios_base::cur);
}
break;
case MediaType::Video:
m_istream->seekg(16, ios_base::cur); // skip reserved bytes
@ -1333,11 +1535,9 @@ void Mp4Track::internalParseHeader()
}
}
// caluculate average bitrate
m_bitrate = (static_cast<double>(m_size) * 0.0078125) / m_duration.totalSeconds();
// read stco atom (only chunk count)
m_chunkOffsetSize = (m_stcoAtom->id() == Mp4AtomIds::ChunkOffset64) ? 8 : 4;
m_istream->seekg(m_stcoAtom->dataOffset() + 4);
m_chunkCount = reader.readUInt32BE();
if(m_bitrate < 0.01 && m_bitrate > -0.01) {
m_bitrate = (static_cast<double>(m_size) * 0.0078125) / m_duration.totalSeconds();
}
// read stsc atom (only number of entries)
m_istream->seekg(m_stscAtom->dataOffset() + 4);
m_sampleToChunkEntryCount = reader.readUInt32BE();

View File

@ -12,12 +12,46 @@ namespace Media
{
class Mp4Atom;
class Mpeg4Descriptor;
class LIB_EXPORT Mpeg4AudioSpecificConfig
{
public:
Mpeg4AudioSpecificConfig();
byte audioObjectType;
byte sampleFrequencyIndex;
uint32 sampleFrequency;
byte channelConfiguration;
byte extensionAudioObjectType;
bool sbrPresent;
bool psPresent;
byte extensionSampleFrequencyIndex;
uint32 extensionSampleFrequency;
byte extensionChannelConfiguration;
bool frameLengthFlag;
bool dependsOnCoreCoder;
uint16 coreCoderDelay;
byte extensionFlag;
byte layerNr;
byte numOfSubFrame;
uint16 layerLength;
byte resilienceFlags;
byte epConfig;
};
class LIB_EXPORT Mpeg4ElementaryStreamInfo
{
public:
Mpeg4ElementaryStreamInfo();
bool dependencyFlag() const;
bool urlFlag() const;
bool ocrFlag() const;
byte priority() const;
byte streamTypeId() const;
bool upstream() const;
uint16 id;
byte esDescFlags;
uint16 dependsOnId;
@ -28,13 +62,7 @@ public:
uint32 bufferSize;
uint32 maxBitrate;
uint32 averageBitrate;
bool dependencyFlag() const;
bool urlFlag() const;
bool ocrFlag() const;
byte priority() const;
byte streamTypeId() const;
bool upstream() const;
std::unique_ptr<Mpeg4AudioSpecificConfig> audioSpecificConfig;
};
inline Mpeg4ElementaryStreamInfo::Mpeg4ElementaryStreamInfo() :
@ -84,33 +112,42 @@ class LIB_EXPORT Mp4Track : public AbstractTrack
public:
Mp4Track(Mp4Atom &trakAtom);
~Mp4Track();
TrackType type() const;
Mp4Atom &trakAtom();
// getter methods specific for MP4 tracks
Mp4Atom &trakAtom();
const std::vector<uint32> &sampleSizes() const;
unsigned int chunkOffsetSize() const;
uint32 chunkCount() const;
uint32 sampleToChunkEntryCount() const;
const Mpeg4ElementaryStreamInfo *mpeg4ElementaryStreamInfo() const;
// methods to parse configuration details from the track header
AvcConfiguration parseAvcConfiguration(Mp4Atom *avcConfigAtom);
std::unique_ptr<Mpeg4ElementaryStreamInfo> parseMpeg4ElementaryStreamInfo(Mp4Atom *esDescAtom);
std::unique_ptr<Mpeg4AudioSpecificConfig> parseAudioSpecificConfig(Mpeg4Descriptor *decSpecInfoDesc);
// methods to read the "index" (chunk offsets and sizes)
std::vector<uint64> readChunkOffsets();
std::vector<std::tuple<uint32, uint32, uint32> > readSampleToChunkTable();
std::vector<uint64> readChunkSizes();
AvcConfiguration parseAvcConfiguration();
bool hasMpeg4ElementaryStreamDesc() const;
void parseMpeg4ElementaryStreamInfo();
void updateChunkOffsets(const std::vector<int64> &oldMdatOffsets, const std::vector<int64> &newMdatOffsets);
void updateChunkOffset(uint32 chunkIndex, uint64 offset);
// methods to make the track header
void makeTrack();
void makeTrackHeader();
void makeMedia();
void makeMediaInfo();
void makeSampleTable();
// methods to update chunk offsets
void updateChunkOffsets(const std::vector<int64> &oldMdatOffsets, const std::vector<int64> &newMdatOffsets);
void updateChunkOffset(uint32 chunkIndex, uint64 offset);
protected:
void internalParseHeader();
private:
// private helper methods
uint64 accumulateSampleSizes(size_t &sampleIndex, size_t count);
void addChunkSizeEntries(std::vector<uint64> &chunkSizeTable, size_t count, size_t &sampleIndex, uint32 sampleCount);
@ -125,8 +162,8 @@ private:
Mp4Atom *m_stscAtom;
Mp4Atom *m_stcoAtom;
Mp4Atom *m_stszAtom;
Mp4Atom *m_codecConfigAtom;
Mp4Atom *m_esDescAtom;
//Mp4Atom *m_codecConfigAtom;
//Mp4Atom *m_esDescAtom;
uint16 m_framesPerSample;
std::vector<uint32> m_sampleSizes;
unsigned int m_chunkOffsetSize;
@ -198,14 +235,6 @@ inline const Mpeg4ElementaryStreamInfo *Mp4Track::mpeg4ElementaryStreamInfo() co
return m_esInfo.get();
}
/*!
* \brief Returns whether the track has an MPEG-4 elementary stream descriptor atom.
*/
inline bool Mp4Track::hasMpeg4ElementaryStreamDesc() const
{
return m_esDescAtom != nullptr;
}
}
#endif // MP4TRACK_H

View File

@ -94,7 +94,11 @@ inline uint64 Mpeg4Descriptor::firstChildOffset() const
*/
inline Mpeg4Descriptor::implementationType *Mpeg4Descriptor::denoteFirstChild(uint32 relativeFirstChildOffset)
{
m_firstChild.reset(new implementationType(static_cast<implementationType &>(*this), startOffset() + relativeFirstChildOffset));
if(relativeFirstChildOffset + 4 < dataSize()) {
m_firstChild.reset(new implementationType(static_cast<implementationType &>(*this), startOffset() + relativeFirstChildOffset));
} else {
m_firstChild.reset();
}
return m_firstChild.get();
}

View File

@ -38,9 +38,20 @@ TrackType MpegAudioFrameStream::type() const
return TrackType::MpegAudioFrameStream;
}
/*!
* \brief Adds the information from the specified \a frame to the specified \a track.
*/
void MpegAudioFrameStream::addInfo(const MpegAudioFrame &frame, AbstractTrack &track)
{
track.m_version = frame.mpegVersion();
track.m_format = MediaFormat(GeneralMediaFormat::Mpeg1Audio, frame.layer());
track.m_channelCount = frame.channelMode() == MpegChannelMode::SingleChannel ? 1 : 2;
track.m_sampleRate = frame.samperate();
}
void MpegAudioFrameStream::internalParseHeader()
{
const string context("parsing MPEG audio frame header");
static const string context("parsing MPEG audio frame header");
if(!m_istream) {
throw NoDataFoundException();
}
@ -53,15 +64,13 @@ void MpegAudioFrameStream::internalParseHeader()
}
m_istream->seekg(m_startOffset, ios_base::beg);
// parse frame header
MpegAudioFrame frame;
m_frames.emplace_back();
MpegAudioFrame &frame = m_frames.back();
frame.parseHeader(*m_istream);
m_version = frame.mpegVersion();
m_format = MediaFormat(GeneralMediaFormat::Mpeg1Audio, frame.layer());
m_channelCount = frame.channelMode() == MpegChannelMode::SingleChannel ? 1 : 2;
m_samplesPerSecond = frame.samperate();
addInfo(frame, *this);
if(frame.isXingBytesfieldPresent()) {
uint32 xingSize = frame.xingBytesfield();
if(xingSize != m_size) {
if(m_size && xingSize != m_size) {
addNotification(NotificationType::Warning, "Real length MPEG of audio frames is not equal with value provided by Xing header. The Xing header value will be used.", context);
m_size = xingSize;
}
@ -71,7 +80,6 @@ void MpegAudioFrameStream::internalParseHeader()
: frame.bitrate();
m_bytesPerSecond = m_bitrate * 125;
m_duration = TimeSpan::fromSeconds(static_cast<double>(m_size) / (m_bitrate * 128.0));
m_frames.push_back(frame);
}
}

View File

@ -16,6 +16,9 @@ public:
~MpegAudioFrameStream();
TrackType type() const;
static void addInfo(const MpegAudioFrame &frame, AbstractTrack &track);
protected:
void internalParseHeader();

View File

@ -78,7 +78,7 @@ void OggStream::internalParseHeader()
VorbisIdentificationHeader ind;
ind.parseHeader(iterator);
m_channelCount = ind.channels();
m_samplesPerSecond = ind.sampleRate();
m_sampleRate = ind.sampleRate();
if(ind.nominalBitrate()) {
m_bitrate = ind.nominalBitrate();
} else if(ind.maxBitrate() == ind.minBitrate()) {
@ -96,7 +96,7 @@ void OggStream::internalParseHeader()
auto lastPage = find_if(pages.crbegin(), pages.crend(), pred);
if(firstPage != pages.cend() && lastPage != pages.crend()) {
m_sampleCount = lastPage->absoluteGranulePosition() - firstPage->absoluteGranulePosition();
m_duration = TimeSpan::fromSeconds(static_cast<double>(m_sampleCount) / m_samplesPerSecond);
m_duration = TimeSpan::fromSeconds(static_cast<double>(m_sampleCount) / m_sampleRate);
}
}
hasIdentificationHeader = true;

View File

@ -58,11 +58,11 @@ void WaveAudioStream::internalParseHeader()
m_format = GeneralMediaFormat::Unknown;
}
m_channelCount = m_reader.readUInt16LE();
m_samplesPerSecond = m_reader.readUInt32LE();
m_sampleRate = m_reader.readUInt32LE();
m_bytesPerSecond = m_reader.readUInt32LE();
m_chunkSize = m_reader.readUInt16LE();
m_bitsPerSample = m_reader.readUInt16LE();
m_bitrate = m_bitsPerSample * m_samplesPerSecond * m_channelCount;
m_bitrate = m_bitsPerSample * m_sampleRate * m_channelCount;
} else {
m_format = GeneralMediaFormat::Unknown;
}
@ -72,7 +72,7 @@ void WaveAudioStream::internalParseHeader()
if(m_reader.readUInt32BE() == 0x64617461u) {
m_size = m_reader.readUInt32LE();
m_sampleCount = m_size / m_chunkSize;
m_duration = ChronoUtilities::TimeSpan::fromSeconds(static_cast<double>(m_sampleCount) / static_cast<double>(m_samplesPerSecond));
m_duration = ChronoUtilities::TimeSpan::fromSeconds(static_cast<double>(m_sampleCount) / static_cast<double>(m_sampleRate));
} else {
throw NoDataFoundException();
}