diff --git a/src/PyNvCodec/__init__.pyi b/src/PyNvCodec/__init__.pyi index 13bac71b..9e4a28c5 100644 --- a/src/PyNvCodec/__init__.pyi +++ b/src/PyNvCodec/__init__.pyi @@ -351,6 +351,7 @@ class PyFFmpegDemuxer: class PyFfmpegDecoder: def __init__(self, input: str, opts: Dict[str,str], gpu_id: int = ...) -> None: ... + def AvgFramerate(self) -> float: ... def Codec(self) -> CudaVideoCodec: ... def ColorRange(self) -> ColorRange: ... def ColorSpace(self) -> ColorSpace: ... @@ -360,6 +361,8 @@ class PyFfmpegDecoder: def Framerate(self) -> float: ... def GetMotionVectors(self) -> numpy.ndarray[MotionVector]: ... def Height(self) -> int: ... + def Numframes(self) -> int: ... + def Timebase(self) -> float: ... def Width(self) -> int: ... class PyFrameUploader: diff --git a/src/PyNvCodec/inc/PyNvCodec.hpp b/src/PyNvCodec/inc/PyNvCodec.hpp index 1fbaabd4..101e823c 100644 --- a/src/PyNvCodec/inc/PyNvCodec.hpp +++ b/src/PyNvCodec/inc/PyNvCodec.hpp @@ -295,6 +295,9 @@ class PyFfmpegDecoder { uint32_t Width() const; uint32_t Height() const; double Framerate() const; + double AvgFramerate() const; + double Timebase() const; + uint32_t Numframes() const; ColorSpace Color_Space() const; ColorRange Color_Range() const; cudaVideoCodec Codec() const; diff --git a/src/PyNvCodec/src/PyFFMpegDecoder.cpp b/src/PyNvCodec/src/PyFFMpegDecoder.cpp index 08043a2a..48514234 100644 --- a/src/PyNvCodec/src/PyFFMpegDecoder.cpp +++ b/src/PyNvCodec/src/PyFFMpegDecoder.cpp @@ -188,6 +188,27 @@ cudaVideoCodec PyFfmpegDecoder::Codec() const return params.videoContext.codec; }; +double PyFfmpegDecoder::AvgFramerate() const +{ + MuxingParams params; + upDecoder->GetParams(params); + return params.videoContext.avgFrameRate; +}; + +double PyFfmpegDecoder::Timebase() const +{ + MuxingParams params; + upDecoder->GetParams(params); + return params.videoContext.timeBase; +}; + +uint32_t PyFfmpegDecoder::Numframes() const +{ + MuxingParams params; + upDecoder->GetParams(params); + return params.videoContext.num_frames; +}; + Pixel_Format PyFfmpegDecoder::PixelFormat() const { MuxingParams params; @@ -246,6 +267,19 @@ void Init_PyFFMpegDecoder(py::module& m) .def("Framerate", &PyFfmpegDecoder::Framerate, R"pbdoc( Return encoded video file framerate. + )pbdoc") + .def("AvgFramerate", &PyFfmpegDecoder::AvgFramerate, + R"pbdoc( + Return encoded video file average framerate. + )pbdoc") + .def("Timebase", &PyFfmpegDecoder::Timebase, + R"pbdoc( + Return encoded video file time base. + )pbdoc") + .def("Numframes", &PyFfmpegDecoder::Numframes, + R"pbdoc( + Return number of video frames in encoded video file. + Please note that some video containers doesn't store this infomation. )pbdoc") .def("ColorSpace", &PyFfmpegDecoder::Color_Space, R"pbdoc( diff --git a/src/TC/src/FFmpegDemuxer.cpp b/src/TC/src/FFmpegDemuxer.cpp index 878b5e2b..e33b5ee6 100644 --- a/src/TC/src/FFmpegDemuxer.cpp +++ b/src/TC/src/FFmpegDemuxer.cpp @@ -521,7 +521,6 @@ FFmpegDemuxer::FFmpegDemuxer(AVFormatContext *fmtcx) : fmtc(fmtcx) { throw runtime_error(ss.str()); } - //gop_size = fmtc->streams[videoStream]->codec->gop_size; eVideoCodec = fmtc->streams[videoStream]->codecpar->codec_id; width = fmtc->streams[videoStream]->codecpar->width; height = fmtc->streams[videoStream]->codecpar->height; diff --git a/src/TC/src/FfmpegSwDecoder.cpp b/src/TC/src/FfmpegSwDecoder.cpp index 8b6a3041..57f2d215 100644 --- a/src/TC/src/FfmpegSwDecoder.cpp +++ b/src/TC/src/FfmpegSwDecoder.cpp @@ -65,7 +65,8 @@ struct FfmpegDecodeFrame_Impl { map side_data; int video_stream_idx = -1; - bool end_encode = false; + bool end_decode = false; + bool eof = false; FfmpegDecodeFrame_Impl(const char* URL, AVDictionary* pOptions) { @@ -251,9 +252,9 @@ struct FfmpegDecodeFrame_Impl { return true; } - bool DecodeSingleFrame() +bool DecodeSingleFrame() { - if (end_encode) { + if (end_decode) { return false; } @@ -261,22 +262,31 @@ struct FfmpegDecodeFrame_Impl { do { // Read packets from stream until we find a video packet; do { + if (eof) { + break; + } + auto ret = av_read_frame(fmt_ctx, &pktSrc); - if (ret < 0) { - // Flush decoder; - end_encode = true; - return DecodeSinglePacket(nullptr); + + if (AVERROR_EOF == ret) { + eof = true; + break; + } else if (ret < 0) { + end_decode = true; + return false; } } while (pktSrc.stream_index != video_stream_idx); - auto status = DecodeSinglePacket(&pktSrc); + auto status = DecodeSinglePacket(eof ? nullptr : &pktSrc); switch (status) { case DEC_SUCCESS: return true; case DEC_ERROR: + end_decode = true; return false; case DEC_EOS: + end_decode = true; return false; case DEC_MORE: continue; @@ -332,7 +342,10 @@ struct FfmpegDecodeFrame_Impl { DECODE_STATUS DecodeSinglePacket(const AVPacket* pktSrc) { auto res = avcodec_send_packet(avctx, pktSrc); - if (res < 0) { + if (AVERROR_EOF == res) { + // Flush decoder; + res = 0; + } else if (res < 0) { cerr << "Error while sending a packet to the decoder" << endl; cerr << "Error description: " << AvErrorToString(res) << endl; return DEC_ERROR; @@ -341,7 +354,6 @@ struct FfmpegDecodeFrame_Impl { while (res >= 0) { res = avcodec_receive_frame(avctx, frame); if (res == AVERROR_EOF) { - cerr << "Input file is over" << endl; return DEC_EOS; } else if (res == AVERROR(EAGAIN)) { return DEC_MORE; @@ -394,13 +406,28 @@ TaskExecStatus FfmpegDecodeFrame::Run() void FfmpegDecodeFrame::GetParams(MuxingParams& params) { memset((void*)¶ms, 0, sizeof(params)); + auto fmtc = pImpl->fmt_ctx; + auto videoStream = + av_find_best_stream(fmtc, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0); + if (videoStream < 0) { + stringstream ss; + ss << __FUNCTION__ << ": can't find video stream in input file." << endl; + throw runtime_error(ss.str()); + } - params.videoContext.width = pImpl->avctx->width; - params.videoContext.height = pImpl->avctx->height; - params.videoContext.gop_size = pImpl->avctx->gop_size; + params.videoContext.width = fmtc->streams[videoStream]->codecpar->width; + params.videoContext.height = fmtc->streams[videoStream]->codecpar->height; params.videoContext.frameRate = - (1.0 * pImpl->avctx->framerate.num) / (1.0 * pImpl->avctx->framerate.den); + (double)fmtc->streams[videoStream]->r_frame_rate.num / + (double)fmtc->streams[videoStream]->r_frame_rate.den; + params.videoContext.avgFrameRate = + (double)fmtc->streams[videoStream]->avg_frame_rate.num / + (double)fmtc->streams[videoStream]->avg_frame_rate.den; + params.videoContext.timeBase = + (double)fmtc->streams[videoStream]->time_base.num / + (double)fmtc->streams[videoStream]->time_base.den; params.videoContext.codec = FFmpeg2NvCodecId(pImpl->avctx->codec_id); + params.videoContext.num_frames = fmtc->streams[videoStream]->nb_frames; switch (pImpl->avctx->pix_fmt) { case AV_PIX_FMT_YUVJ420P: @@ -432,7 +459,7 @@ void FfmpegDecodeFrame::GetParams(MuxingParams& params) break; } - switch (pImpl->avctx->colorspace) { + switch (fmtc->streams[videoStream]->codecpar->color_space) { case AVCOL_SPC_BT709: params.videoContext.color_space = BT_709; break; @@ -445,7 +472,7 @@ void FfmpegDecodeFrame::GetParams(MuxingParams& params) break; } - switch (pImpl->avctx->color_range) { + switch (fmtc->streams[videoStream]->codecpar->color_range) { case AVCOL_RANGE_MPEG: params.videoContext.color_range = MPEG; break; diff --git a/tests/gt_files.json b/tests/gt_files.json new file mode 100644 index 00000000..80ae608e --- /dev/null +++ b/tests/gt_files.json @@ -0,0 +1,27 @@ +{ + "basic": { + "uri": "test.mp4", + "width": 848, + "height": 464, + "is_vfr": false, + "pix_fmt": "PixelFormat.NV12", + "framerate": 30, + "num_frames": 96, + "timebase": 8.1380e-5, + "color_space": "ColorSpace.BT_709", + "color_range": "ColorRange.MPEG" + }, + "res_change": { + "uri": "test_res_change.h264", + "width": 848, + "height": 464, + "res_change_factor": 0.5, + "is_vfr": false, + "pix_fmt": "PixelFormat.NV12", + "framerate": 30, + "num_frames": 47, + "timebase": 8.1380e-5, + "color_space": "ColorSpace.BT_709", + "color_range": "ColorRange.MPEG" + } +} \ No newline at end of file diff --git a/tests/test_PyFFmpegDecoder.py b/tests/test_PyFFmpegDecoder.py new file mode 100644 index 00000000..86d816af --- /dev/null +++ b/tests/test_PyFFmpegDecoder.py @@ -0,0 +1,107 @@ +# +# Copyright 2023 Vision Labs LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Starting from Python 3.8 DLL search policy has changed. +# We need to add path to CUDA DLLs explicitly. +import sys +import os +from os.path import join, dirname + + +if os.name == "nt": + # Add CUDA_PATH env variable + cuda_path = os.environ["CUDA_PATH"] + if cuda_path: + os.add_dll_directory(cuda_path) + else: + print("CUDA_PATH environment variable is not set.", file=sys.stderr) + print("Can't set CUDA DLLs search path.", file=sys.stderr) + exit(1) + + # Add PATH as well for minor CUDA releases + sys_path = os.environ["PATH"] + if sys_path: + paths = sys_path.split(";") + for path in paths: + if os.path.isdir(path): + os.add_dll_directory(path) + else: + print("PATH environment variable is not set.", file=sys.stderr) + exit(1) + +import PyNvCodec as nvc +import numpy as np +import unittest +import random +import json +from pydantic import BaseModel + +class GroundTruth(BaseModel): + uri: str + width: int + height: int + is_vfr: bool + pix_fmt: str + framerate: float + num_frames: int + timebase: float + color_space: str + color_range: str + + +class TestDecoderBasic(unittest.TestCase): + def __init__(self, methodName): + super().__init__(methodName=methodName) + + f = open("gt_files.json") + data = json.load(f)["basic"] + self.gtInfo = GroundTruth(**data) + self.ffDec = nvc.PyFfmpegDecoder(self.gtInfo.uri, {}) + + def test_width(self): + self.assertEqual(self.gtInfo.width, self.ffDec.Width()) + + def test_height(self): + self.assertEqual(self.gtInfo.height, self.ffDec.Height()) + + def test_color_space(self): + self.assertEqual(self.gtInfo.color_space, str(self.ffDec.ColorSpace())) + + def test_color_range(self): + self.assertEqual(self.gtInfo.color_range, str(self.ffDec.ColorRange())) + + def test_format(self): + self.assertEqual(self.gtInfo.pix_fmt, str(self.ffDec.Format())) + + def test_framerate(self): + self.assertEqual(self.gtInfo.framerate, self.ffDec.Framerate()) + + def test_avgframerate(self): + self.assertEqual(self.gtInfo.framerate, self.ffDec.AvgFramerate()) + + def test_timebase(self): + epsilon = 1e-4 + self.assertLessEqual(np.abs(self.gtInfo.timebase - self.ffDec.Timebase()), epsilon) + + def test_decode_all_frames(self): + dec_frames = 0 + frame = np.ndarray(dtype=np.uint8, shape=()) + while self.ffDec.DecodeSingleFrame(frame): + dec_frames += 1 + self.assertEqual(self.gtInfo.num_frames, dec_frames) + +if __name__ == "__main__": + unittest.main()