From c4adad4297efbb5176650e8ee774d29ee94b4a1c Mon Sep 17 00:00:00 2001 From: Martchus Date: Sat, 28 Jul 2018 14:56:00 +0200 Subject: [PATCH] Detect AV1 in IVF stream --- CMakeLists.txt | 4 ++ abstracttrack.h | 1 + ivf/ivfframe.cpp | 29 +++++++++++++ ivf/ivfframe.h | 37 +++++++++++++++++ ivf/ivfstream.cpp | 66 ++++++++++++++++++++++++++++++ ivf/ivfstream.h | 47 +++++++++++++++++++++ mediafileinfo.cpp | 5 +++ mp4/mp4ids.cpp | 10 ++++- mp4/mp4ids.h | 3 ++ mp4/mp4track.cpp | 2 +- mpegaudio/mpegaudioframe.cpp | 3 +- mpegaudio/mpegaudioframe.h | 4 +- mpegaudio/mpegaudioframestream.cpp | 2 +- scripts/download_testfiles.sh | 1 + signature.cpp | 7 ++++ signature.h | 1 + wav/waveaudiostream.cpp | 2 +- 17 files changed, 216 insertions(+), 8 deletions(-) create mode 100644 ivf/ivfframe.cpp create mode 100644 ivf/ivfframe.h create mode 100644 ivf/ivfstream.cpp create mode 100644 ivf/ivfstream.h diff --git a/CMakeLists.txt b/CMakeLists.txt index dc7299e..76e7f16 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,8 @@ set(HEADER_FILES id3/id3v2frame.h id3/id3v2frameids.h id3/id3v2tag.h + ivf/ivfframe.h + ivf/ivfstream.h localeawarestring.h margin.h matroska/ebmlelement.h @@ -102,6 +104,8 @@ set(SRC_FILES id3/id3v2frame.cpp id3/id3v2frameids.cpp id3/id3v2tag.cpp + ivf/ivfframe.cpp + ivf/ivfstream.cpp localeawarestring.cpp matroska/ebmlelement.cpp matroska/matroskaattachment.cpp diff --git a/abstracttrack.h b/abstracttrack.h index 191854e..86fdfc0 100644 --- a/abstracttrack.h +++ b/abstracttrack.h @@ -34,6 +34,7 @@ enum class TrackType { OggStream, /**< The track is a TagParser::OggStream. */ AdtsStream, /**< The track is a TagParser::AdtsStream. */ FlacStream, /**< The track is a TagParser::FlacStream. */ + IvfStream, /**< The track is a TagParser::IvfStream. */ }; class TAG_PARSER_EXPORT AbstractTrack { diff --git a/ivf/ivfframe.cpp b/ivf/ivfframe.cpp new file mode 100644 index 0000000..a0e175e --- /dev/null +++ b/ivf/ivfframe.cpp @@ -0,0 +1,29 @@ +#include "./ivfframe.h" + +#include "../exceptions.h" + +#include + +using namespace std; +using namespace IoUtilities; + +namespace TagParser { + +/*! + * \class TagParser::IvfFrame + * \brief The IvfFrame class is used to parse IVF frames. + * \sa https://wiki.multimedia.cx/index.php/IVF + */ + +/*! + * \brief Parses the header read using the specified \a reader. + */ +void IvfFrame::parseHeader(IoUtilities::BinaryReader &reader, Diagnostics &diag) +{ + VAR_UNUSED(diag) + startOffset = static_cast(reader.stream()->tellg()); + size = reader.readUInt32BE(); + timestamp = reader.readUInt64BE(); +} + +} // namespace TagParser diff --git a/ivf/ivfframe.h b/ivf/ivfframe.h new file mode 100644 index 0000000..8a34b3c --- /dev/null +++ b/ivf/ivfframe.h @@ -0,0 +1,37 @@ +#ifndef TAG_PARSER_IVFRAME_H +#define TAG_PARSER_IVFRAME_H + +#include "../diagnostics.h" + +#include + +namespace IoUtilities { +class BinaryReader; +} + +namespace TagParser { + +class TAG_PARSER_EXPORT IvfFrame { +public: + constexpr IvfFrame(); + void parseHeader(IoUtilities::BinaryReader &reader, Diagnostics &diag); + +private: + uint64 startOffset; + uint64 timestamp; + uint32 size; +}; + +/*! + * \brief Constructs a new frame. + */ +constexpr IvfFrame::IvfFrame() + : startOffset(0) + , timestamp(0) + , size(0) +{ +} + +} // namespace TagParser + +#endif // TAG_PARSER_IVFRAME_H diff --git a/ivf/ivfstream.cpp b/ivf/ivfstream.cpp new file mode 100644 index 0000000..9ffcb07 --- /dev/null +++ b/ivf/ivfstream.cpp @@ -0,0 +1,66 @@ +#include "./ivfstream.h" + +#include "../mp4/mp4ids.h" + +#include "../exceptions.h" + +#include +#include + +#include + +using namespace std; +using namespace ChronoUtilities; +using namespace ConversionUtilities; + +namespace TagParser { + +/*! + * \class TagParser::IvfStream + * \brief Implementation of TagParser::AbstractTrack for ADTS streams. + * \sa https://wiki.multimedia.cx/index.php/IVF + */ + +void IvfStream::internalParseHeader(Diagnostics &diag) +{ + static const string context("parsing IVF header"); + if (!m_istream) { + throw NoDataFoundException(); + } + + // check signature and version + if (m_reader.readUInt32BE() != 0x444B4946u) { + diag.emplace_back(DiagLevel::Critical, "Signature not \"DKIF\".", context); + throw InvalidDataException(); + } + const auto version = m_reader.readUInt16LE(); + m_version = version; + if (version != 0) { + diag.emplace_back(DiagLevel::Warning, argsToString("Version ", version, " is not supported."), context); + } + + // read remaining header + m_headerLength = m_reader.readUInt16LE(); + const auto formatId = m_reader.readUInt32BE(); + m_formatId = interpretIntegerAsString(formatId); + m_pixelSize.setWidth(m_reader.readUInt16LE()); + m_pixelSize.setHeight(m_reader.readUInt16LE()); + m_fps = m_reader.readUInt32LE(); + m_timeScale = m_reader.readUInt32LE(); + m_sampleCount = m_reader.readUInt32LE(); + + // compute further values + m_format = FourccIds::fourccToMediaFormat(formatId); + m_duration = TimeSpan::fromSeconds(static_cast(m_sampleCount) / m_fps); + + // skip unused bytes + m_istream->seekg(4, ios_base::cur); +} + +void IvfStream::readFrame(Diagnostics &diag) +{ + m_frames.emplace_back(); + m_frames.back().parseHeader(m_reader, diag); +} + +} // namespace TagParser diff --git a/ivf/ivfstream.h b/ivf/ivfstream.h new file mode 100644 index 0000000..7aa1d74 --- /dev/null +++ b/ivf/ivfstream.h @@ -0,0 +1,47 @@ +#ifndef TAG_PARSER_IVFSTREAM_H +#define TAG_PARSER_IVFSTREAM_H + +#include "./ivfframe.h" + +#include "../abstracttrack.h" + +namespace TagParser { + +class TAG_PARSER_EXPORT IvfStream : public AbstractTrack { +public: + IvfStream(std::iostream &stream, uint64 startOffset); + ~IvfStream() override; + + TrackType type() const override; + + void readFrame(Diagnostics &diag); + +protected: + void internalParseHeader(Diagnostics &diag) override; + +private: + std::vector m_frames; + uint16 m_headerLength; +}; + +/*! + * \brief Constructs a new track for the \a stream at the specified \a startOffset. + */ +inline IvfStream::IvfStream(std::iostream &stream, uint64 startOffset) + : AbstractTrack(stream, startOffset) +{ + m_mediaType = MediaType::Video; +} + +inline IvfStream::~IvfStream() +{ +} + +inline TrackType IvfStream::type() const +{ + return TrackType::IvfStream; +} + +} // namespace TagParser + +#endif // TAG_PARSER_IVFSTREAM_H diff --git a/mediafileinfo.cpp b/mediafileinfo.cpp index a21ea81..15bb57c 100644 --- a/mediafileinfo.cpp +++ b/mediafileinfo.cpp @@ -16,6 +16,8 @@ #include "./adts/adtsstream.h" +#include "./ivf/ivfstream.h" + #include "./mp4/mp4atom.h" #include "./mp4/mp4container.h" #include "./mp4/mp4ids.h" @@ -316,6 +318,9 @@ void MediaFileInfo::parseTracks(Diagnostics &diag) case ContainerFormat::Flac: m_singleTrack = make_unique(*this, m_containerOffset); break; + case ContainerFormat::Ivf: + m_singleTrack = make_unique(stream(), m_containerOffset); + break; case ContainerFormat::MpegAudioFrames: m_singleTrack = make_unique(stream(), m_containerOffset); break; diff --git a/mp4/mp4ids.cpp b/mp4/mp4ids.cpp index ff7d908..d23e9bb 100644 --- a/mp4/mp4ids.cpp +++ b/mp4/mp4ids.cpp @@ -57,7 +57,7 @@ MediaFormat fourccToMediaFormat(uint32 fourccId) return GeneralMediaFormat::Mpeg4TimedText; case Hevc1: case Hevc2: - return MediaFormat(GeneralMediaFormat::Hevc); + return GeneralMediaFormat::Hevc; case Avc1: case Avc2: case Avc3: @@ -68,7 +68,9 @@ MediaFormat fourccToMediaFormat(uint32 fourccId) case H264Decoder4: case H264Decoder5: case H264Decoder6: - return MediaFormat(GeneralMediaFormat::Avc); + return GeneralMediaFormat::Avc; + case Av1: + return GeneralMediaFormat::Av1; case Divx4Decoder1: case Divx4Decoder2: case H263Quicktime: @@ -169,6 +171,10 @@ MediaFormat fourccToMediaFormat(uint32 fourccId) case MsMpeg4V3Decoder1: case MsMpeg4V3Decoder2: return MediaFormat(GeneralMediaFormat::MicrosoftMpeg4, 3); + case Vp8: + return GeneralMediaFormat::Vp8; + case Vp9: + return GeneralMediaFormat::Vp9; case WavPack: return MediaFormat(GeneralMediaFormat::WavPack); case WindowsMediaVideoV17: diff --git a/mp4/mp4ids.h b/mp4/mp4ids.h index a509eb3..6b61772 100644 --- a/mp4/mp4ids.h +++ b/mp4/mp4ids.h @@ -217,6 +217,7 @@ enum KnownValue : uint32 { Avc2 = 0x61766332, /**< H.264/MPEG-4 AVC video */ Avc3 = 0x61766333, /**< H.264/MPEG-4 AVC video */ Avc4 = 0x61766334, /**< H.264/MPEG-4 AVC video */ + Av1 = 0x41563031, /**< AV1 video */ Blur = 0x626C7572, Bps8 = 0x38627073, BrightnessAndContrast = 0x6272636F, @@ -375,6 +376,8 @@ enum KnownValue : uint32 { Ulaw21 = 0x756C6177, VcmImageCodec = 0x4D6A7067, Vdva = 0x76647661, + Vp8 = 0x56503830, /**< VP8 video */ + Vp9 = 0x56503930, /**< VP9 video */ WavPack = 0x5756504B, WindowsMediaAudio = 0x6F776D61, /**< ? */ WindowsMediaAudio7 = 0x574D4131, diff --git a/mp4/mp4track.cpp b/mp4/mp4track.cpp index 1a88d5d..d5760e7 100644 --- a/mp4/mp4track.cpp +++ b/mp4/mp4track.cpp @@ -1766,7 +1766,7 @@ void Mp4Track::internalParseHeader(Diagnostics &diag) MpegAudioFrame frame; m_istream->seekg(m_stcoAtom->dataOffset() + 8); m_istream->seekg(m_chunkOffsetSize == 8 ? reader.readUInt64BE() : reader.readUInt32BE()); - frame.parseHeader(reader); + frame.parseHeader(reader, diag); MpegAudioFrameStream::addInfo(frame, *this); break; } diff --git a/mpegaudio/mpegaudioframe.cpp b/mpegaudio/mpegaudioframe.cpp index 1fbaa84..c1996bd 100644 --- a/mpegaudio/mpegaudioframe.cpp +++ b/mpegaudio/mpegaudioframe.cpp @@ -48,10 +48,11 @@ const uint32 MpegAudioFrame::m_sync = 0xFFE00000u; * \throws Throws InvalidDataException if the data read from the stream is * no valid frame header. */ -void MpegAudioFrame::parseHeader(BinaryReader &reader) +void MpegAudioFrame::parseHeader(BinaryReader &reader, Diagnostics &diag) { m_header = reader.readUInt32BE(); if (!isValid()) { + diag.emplace_back(DiagLevel::Critical, "Header is invalid.", "parsing MPEG audio frame header"); throw InvalidDataException(); } reader.stream()->seekg(m_xingHeaderOffset - 4, ios_base::cur); diff --git a/mpegaudio/mpegaudioframe.h b/mpegaudio/mpegaudioframe.h index b310d4a..c283ab0 100644 --- a/mpegaudio/mpegaudioframe.h +++ b/mpegaudio/mpegaudioframe.h @@ -1,7 +1,7 @@ #ifndef TAG_PARSER_MP3FRAMEAUDIOSTREAM_H #define TAG_PARSER_MP3FRAMEAUDIOSTREAM_H -#include "../global.h" +#include "../diagnostics.h" #include @@ -38,7 +38,7 @@ class TAG_PARSER_EXPORT MpegAudioFrame { public: constexpr MpegAudioFrame(); - void parseHeader(IoUtilities::BinaryReader &reader); + void parseHeader(IoUtilities::BinaryReader &reader, Diagnostics &diag); constexpr bool isValid() const; double mpegVersion() const; diff --git a/mpegaudio/mpegaudioframestream.cpp b/mpegaudio/mpegaudioframestream.cpp index 07c19e7..157aba2 100644 --- a/mpegaudio/mpegaudioframestream.cpp +++ b/mpegaudio/mpegaudioframestream.cpp @@ -46,7 +46,7 @@ void MpegAudioFrameStream::internalParseHeader(Diagnostics &diag) // parse frame header m_frames.emplace_back(); MpegAudioFrame &frame = m_frames.back(); - frame.parseHeader(m_reader); + frame.parseHeader(m_reader, diag); addInfo(frame, *this); if (frame.isXingBytesfieldPresent()) { uint32 xingSize = frame.xingBytesfield(); diff --git a/scripts/download_testfiles.sh b/scripts/download_testfiles.sh index fefa033..ee38a98 100755 --- a/scripts/download_testfiles.sh +++ b/scripts/download_testfiles.sh @@ -116,6 +116,7 @@ convert() { convert flac/test.flac ffmpeg -i mtx-test-data/alac/othertest-itunes.m4a -c:a flac flac/test.flac convert flac/test.ogg ffmpeg -i flac/test.flac -vn -c:a copy flac/test.ogg convert mkv/av1_test.mkv ffmpeg -i matroska_wave1/test1.mkv -t 1 -c:v libaom-av1 -crf 30 -cpu-used 5 -an -strict experimental mkv/av1_test.mkv +convert misc/av1.ivf ffmpeg -i mkv/av1_test.mkv -c copy misc/av1.ivf convert mkv/nested-tags.mkv \ mkvmerge --ui-language en_US \ --output 'mkv/nested-tags.mkv' \ diff --git a/signature.cpp b/signature.cpp index ee57401..7bfa474 100644 --- a/signature.cpp +++ b/signature.cpp @@ -43,6 +43,7 @@ enum Sig32 : uint32 { Dirac = 0x42424344u, Elf = 0x7F454C46u, Flac = 0x664C6143u, + Ivf = 0x444B4946u, JavaClassFile = 0xCAFEBABEu, Ebml = 0x1A45DFA3u, MonkeysAudio = 0x4D414320u, @@ -159,6 +160,8 @@ ContainerFormat parseSignature(const char *buffer, int bufferSize) return ContainerFormat::Elf; case Flac: return ContainerFormat::Flac; + case Ivf: + return ContainerFormat::Ivf; case JavaClassFile: return ContainerFormat::JavaClassFile; case Ebml: @@ -260,6 +263,8 @@ const char *containerFormatAbbreviation(ContainerFormat containerFormat, MediaTy case ContainerFormat::Gif87a: case ContainerFormat::Gif89a: return "gif"; + case ContainerFormat::Ivf: + return "ivf"; case ContainerFormat::JavaClassFile: return "class"; case ContainerFormat::Jpeg: @@ -380,6 +385,8 @@ const char *containerFormatName(ContainerFormat containerFormat) case ContainerFormat::Gif87a: case ContainerFormat::Gif89a: return "Graphics Interchange Format"; + case ContainerFormat::Ivf: + return "IVF"; case ContainerFormat::JavaClassFile: return "Java class file"; case ContainerFormat::Jpeg: diff --git a/signature.h b/signature.h index 9a08d13..dc02185 100644 --- a/signature.h +++ b/signature.h @@ -30,6 +30,7 @@ enum class ContainerFormat : unsigned int { Gif89a, /**< Graphics Interchange Format (1989) */ Gzip, /**< gzip compressed file */ Id2v2Tag, /**< file holding an ID2v2 tag only */ + Ivf, /**< IVF (simple file format that transports raw VP8/VP9/AV1 data) */ JavaClassFile, /**< Java class file */ Jpeg, /**< JPEG File Interchange Format */ Lha, /**< LHA */ diff --git a/wav/waveaudiostream.cpp b/wav/waveaudiostream.cpp index 8ef59eb..9466ca7 100644 --- a/wav/waveaudiostream.cpp +++ b/wav/waveaudiostream.cpp @@ -134,7 +134,7 @@ void WaveAudioStream::internalParseHeader(Diagnostics &diag) } m_istream->seekg(static_cast(m_dataOffset)); MpegAudioFrame frame; - frame.parseHeader(m_reader); + frame.parseHeader(m_reader, diag); MpegAudioFrameStream::addInfo(frame, *this); m_bitrate = frame.isXingFramefieldPresent() ? ((static_cast(m_size) * 8.0)