Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/streaming_video_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def process(
continue

assert isinstance(video_packets, VideoPackets)
if (frames := video_decoder.decode(video_packets)) is not None:
for frames in video_decoder.streaming_decode_packets(video_packets):
buffer = spdl.io.convert_frames(frames)
array = spdl.io.to_numpy(buffer)

Expand Down Expand Up @@ -194,7 +194,7 @@ def process(
# -------------------------------------------------------------

# Flush decoder
if (frames := video_decoder.flush()) is not None:
for frames in video_decoder.flush():
buffer = spdl.io.convert_frames(frames)
array = spdl.io.to_numpy(buffer)

Expand Down
30 changes: 11 additions & 19 deletions src/libspdl/core/decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@

#include <libspdl/core/decoder.h>

#include "libspdl/core/detail/ffmpeg/ctx_utils.h"
#include "libspdl/core/detail/ffmpeg/decoder.h"
#include "libspdl/core/detail/ffmpeg/filter_graph.h"
#include "libspdl/core/detail/logging.h"
#include "libspdl/core/detail/tracing.h"

namespace spdl::core {
////////////////////////////////////////////////////////////////////////////////
Expand All @@ -31,29 +27,25 @@ Decoder<media>::~Decoder() {
}

template <MediaType media>
FramesPtr<media> Decoder<media>::decode_and_flush(
FramesPtr<media> Decoder<media>::decode_packets(
PacketsPtr<media> packets,
int num_frames) {
return pImpl_->decode_and_flush(std::move(packets), num_frames);
return pImpl_->decode_packets(std::move(packets), num_frames);
}

template <MediaType media>
std::optional<FramesPtr<media>> Decoder<media>::decode(
PacketsPtr<media> packets) {
auto frames = pImpl_->decode(std::move(packets));
if (frames->get_frames().size() == 0) {
return std::nullopt;
}
return frames;
Generator<FramesPtr<media>> Decoder<media>::streaming_decode_packets(
PacketsPtr<media> packets)
requires(media == MediaType::Video || media == MediaType::Audio)
{
return pImpl_->streaming_decode_packets(std::move(packets));
}

template <MediaType media>
std::optional<FramesPtr<media>> Decoder<media>::flush() {
auto frames = pImpl_->flush();
if (frames->get_frames().size() == 0) {
return std::nullopt;
}
return frames;
Generator<FramesPtr<media>> Decoder<media>::flush()
requires(media == MediaType::Video || media == MediaType::Audio)
{
return pImpl_->flush();
}

template class Decoder<MediaType::Audio>;
Expand Down
34 changes: 25 additions & 9 deletions src/libspdl/core/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include <libspdl/core/codec.h>
#include <libspdl/core/frames.h>
#include <libspdl/core/generator.h>
#include <libspdl/core/packets.h>
#include <libspdl/core/types.h>

Expand Down Expand Up @@ -62,27 +63,42 @@ class Decoder {
/// Destructor.
~Decoder();

/// Decode packets and flush the decoder in one operation.
////////////////////////////////////////////////////////////////////////////
// One-off decoding
////////////////////////////////////////////////////////////////////////////

/// Decode all packets and flush the decoder in one operation.
///
/// @param packets Packets to decode.
/// @param num_frames Maximum number of frames to decode. Negative values
/// decode all frames.
/// @return Decoded frames.
FramesPtr<media> decode_and_flush(
FramesPtr<media> decode_packets(
PacketsPtr<media> packets,
int num_frames = -1);

/// Decode packets without flushing.
////////////////////////////////////////////////////////////////////////////
// Streaming decoding
////////////////////////////////////////////////////////////////////////////

/// Streaming decode packets and yield frames.
///
/// This method decodes packets and yields frames as they become ready.
///
/// @param packets Packets to decode.
/// @return Decoded frames, or std::nullopt if no frames are available yet.
std::optional<FramesPtr<media>> decode(PacketsPtr<media> packets);
/// @return Generator that yields decoded frames.
Generator<FramesPtr<media>> streaming_decode_packets(
PacketsPtr<media> packets)
requires(media == MediaType::Video || media == MediaType::Audio);

/// Flush the decoder to retrieve any remaining frames.
/// Flush the decoder and yield remaining frames.
///
/// Call this method at the end of stream to flush the decoder
/// and retrieve any remaining buffered frames.
///
/// @return Remaining decoded frames, or std::nullopt if no frames are
/// available.
std::optional<FramesPtr<media>> flush();
/// @return Generator that yields remaining decoded frames.
Generator<FramesPtr<media>> flush()
requires(media == MediaType::Video || media == MediaType::Audio);
};

/// Unique pointer to a Decoder instance.
Expand Down
109 changes: 63 additions & 46 deletions src/libspdl/core/detail/ffmpeg/decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,13 @@ Rational DecoderImpl<media>::get_output_time_base() const {
// for handling timestamp.
// This is handled through high-level Python interface.
template <MediaType media>
FramesPtr<media> DecoderImpl<media>::decode_and_flush(
FramesPtr<media> DecoderImpl<media>::decode_packets(
PacketsPtr<media> packets,
int num_frames) {
auto ret =
std::make_unique<Frames<media>>(packets->id, get_output_time_base());
int num_yielded = 0;
for (auto&& frame : decode_packets(
for (auto&& frame : detail::decode_packets(
codec_ctx_, packets->pkts.get_packets(), filter_graph_, true)) {
ret->push_back(frame.release());
num_yielded += 1;
Expand All @@ -169,7 +169,7 @@ FramesPtr<media> DecoderImpl<media>::decode_and_flush(
// but this was not properly handling the half-open range, so we have
// specialization for video.
template <>
VideoFramesPtr DecoderImpl<MediaType::Video>::decode_and_flush(
VideoFramesPtr DecoderImpl<MediaType::Video>::decode_packets(
VideoPacketsPtr packets,
int num_frames) {
auto tb = get_output_time_base();
Expand All @@ -180,7 +180,7 @@ VideoFramesPtr DecoderImpl<MediaType::Video>::decode_and_flush(

auto ret = std::make_unique<VideoFrames>(packets->id, tb);
int num_yielded = 0;
for (auto&& frame : decode_packets(
for (auto&& frame : detail::decode_packets(
codec_ctx_, packets->pkts.get_packets(), filter_graph_, true)) {
// For video, we manualy apply timestamps.
auto* raw_frame = frame.release();
Expand All @@ -199,59 +199,76 @@ VideoFramesPtr DecoderImpl<MediaType::Video>::decode_and_flush(
return ret;
}

// For audio and image.
// Note: when decoding audio with timestamp, we rely on `atrim` filter
// for handling timestamp.
// This is handled through high-level Python interface.
template <MediaType media>
FramesPtr<media> DecoderImpl<media>::decode(PacketsPtr<media> packets) {
auto ret =
std::make_unique<Frames<media>>(packets->id, get_output_time_base());
for (auto&& frame : decode_packets(
codec_ctx_, packets->pkts.get_packets(), filter_graph_, false)) {
ret->push_back(frame.release());
Generator<FramesPtr<media>> DecoderImpl<media>::flush()
requires(media == MediaType::Video || media == MediaType::Audio)
{
auto frames = std::make_unique<Frames<media>>(
reinterpret_cast<uintptr_t>(this), get_output_time_base());
std::vector<AVPacket*> dummy{};
for (auto&& frame :
detail::decode_packets(codec_ctx_, dummy, filter_graph_, true)) {
frames->push_back(frame.release());
}
if (frames->get_frames().size() > 0) {
co_yield std::move(frames);
}
return ret;
}

// Specialization for video.
// For video we want to ensure the half-open range.
// Originally we used `trim` filter like how audio is processed above,
// but this was not properly handling the half-open range, so we have
// specialization for video.
template <>
VideoFramesPtr DecoderImpl<MediaType::Video>::decode(VideoPacketsPtr packets) {
template <MediaType media>
Generator<FramesPtr<media>> DecoderImpl<media>::streaming_decode_packets(
PacketsPtr<media> packets)
requires(media == MediaType::Video || media == MediaType::Audio)
{
auto tb = get_output_time_base();
AVRational s, e;
if (packets->timestamp) {
std::tie(s, e) = *(packets->timestamp);
}
auto decoding = detail::decode_packets(
codec_ctx_, packets->pkts.get_packets(), filter_graph_, false);

auto ret = std::make_unique<VideoFrames>(packets->id, tb);
for (auto&& frame : decode_packets(
codec_ctx_, packets->pkts.get_packets(), filter_graph_, false)) {
auto* raw_frame = frame.release();
if (packets->timestamp && raw_frame) {
if (!is_within_window(to_rational(raw_frame->pts, tb), s, e)) {
av_frame_free(&raw_frame);
continue;
}
// Specialization for video.
// For video we want to ensure the half-open range.
// Originally we used `trim` filter like how audio is processed above,
// but this was not properly handling the half-open range, so we have
// specialization for video.
if constexpr (media == MediaType::Video) {
// Temporary solution to support Generator. Yield all the frames.
// TODO: support pre-fixed number of frames here.
AVRational s, e;
if (packets->timestamp) {
std::tie(s, e) = *(packets->timestamp);
}

ret->push_back(raw_frame);
auto ret = std::make_unique<VideoFrames>(packets->id, tb);
for (auto&& frame : decoding) {
if (packets->timestamp && frame) {
if (!is_within_window(to_rational(frame->pts, tb), s, e)) {
auto* raw_frame = frame.release();
av_frame_free(&raw_frame);
continue;
}
}

ret->push_back(frame.release());
}
if (ret->get_frames().size() > 0) {
co_yield std::move(ret);
}
}
return ret;
}

template <MediaType media>
FramesPtr<media> DecoderImpl<media>::flush() {
auto ret = std::make_unique<Frames<media>>(
reinterpret_cast<uintptr_t>(this), get_output_time_base());
std::vector<AVPacket*> dummy{};
for (auto&& frame : decode_packets(codec_ctx_, dummy, filter_graph_, true)) {
ret->push_back(frame.release());
// For audio.
// Note: when decoding audio with timestamp, we rely on `atrim` filter
// for handling timestamp.
// This is handled through high-level Python interface.
if constexpr (media == MediaType::Audio) {
auto ret = std::make_unique<AudioFrames>(packets->id, tb);
// Temporary solution to support Generator. Yield all the frames.
// TODO: support pre-fixed number of samples here.
for (auto&& frame : decoding) {
ret->push_back(frame.release());
}
if (ret->get_frames().size() > 0) {
co_yield std::move(ret);
}
}
return ret;
}

template class DecoderImpl<MediaType::Audio>;
Expand Down
9 changes: 6 additions & 3 deletions src/libspdl/core/detail/ffmpeg/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,14 @@ class DecoderImpl {

Rational get_output_time_base() const;

FramesPtr<media> decode_and_flush(
FramesPtr<media> decode_packets(
PacketsPtr<media> packets,
int num_frames = -1);
FramesPtr<media> decode(PacketsPtr<media> packets);
FramesPtr<media> flush();
Generator<FramesPtr<media>> streaming_decode_packets(
PacketsPtr<media> packets)
requires(media == MediaType::Video || media == MediaType::Audio);
Generator<FramesPtr<media>> flush()
requires(media == MediaType::Video || media == MediaType::Audio);
};

} // namespace spdl::core::detail
14 changes: 7 additions & 7 deletions src/libspdl/tests/smoke_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ TEST(SmokeTest, DecodeAudio) {
EXPECT_GT(packets->pkts.get_packets().size(), 0);

Decoder<MediaType::Audio> decoder(codec, std::nullopt, std::nullopt);
auto frames = decoder.decode_and_flush(std::move(packets));
auto frames = decoder.decode_packets(std::move(packets));
ASSERT_NE(frames, nullptr);
EXPECT_GT(frames->get_num_frames(), 0);

Expand All @@ -70,7 +70,7 @@ TEST(SmokeTest, DecodeImage) {
EXPECT_GT(packets->pkts.get_packets().size(), 0);

Decoder<MediaType::Image> decoder(codec, std::nullopt, std::nullopt);
auto frames = decoder.decode_and_flush(std::move(packets));
auto frames = decoder.decode_packets(std::move(packets));
ASSERT_NE(frames, nullptr);
EXPECT_GT(frames->get_num_frames(), 0);

Expand All @@ -92,7 +92,7 @@ TEST(SmokeTest, DecodeVideo) {
EXPECT_GT(packets->pkts.get_packets().size(), 0);

Decoder<MediaType::Video> decoder(codec, std::nullopt, std::nullopt);
auto frames = decoder.decode_and_flush(std::move(packets));
auto frames = decoder.decode_packets(std::move(packets));
ASSERT_NE(frames, nullptr);
EXPECT_GT(frames->get_num_frames(), 0);

Expand Down Expand Up @@ -126,7 +126,7 @@ TEST(SmokeTest, DecodeAudioWithFormatFilter) {
fmt::format("{},aformat=sample_fmts=fltp,abuffersink", abuffer);

Decoder<MediaType::Audio> decoder(codec, std::nullopt, filterDesc);
auto frames = decoder.decode_and_flush(std::move(packets));
auto frames = decoder.decode_packets(std::move(packets));
ASSERT_NE(frames, nullptr);
EXPECT_GT(frames->get_num_frames(), 0);

Expand Down Expand Up @@ -162,7 +162,7 @@ TEST(SmokeTest, DecodeVideoWithFormatFilter) {
fmt::format("{},format=pix_fmts=rgb24,buffersink", buffer);

Decoder<MediaType::Video> decoder(codec, std::nullopt, filterDesc);
auto frames = decoder.decode_and_flush(std::move(packets));
auto frames = decoder.decode_packets(std::move(packets));
ASSERT_NE(frames, nullptr);
EXPECT_GT(frames->get_num_frames(), 0);

Expand Down Expand Up @@ -198,7 +198,7 @@ TEST(SmokeTest, DecodeImageWithFormatFilter) {
fmt::format("{},format=pix_fmts=yuv420p,buffersink", buffer);

Decoder<MediaType::Image> decoder(codec, std::nullopt, filterDesc);
auto frames = decoder.decode_and_flush(std::move(packets));
auto frames = decoder.decode_packets(std::move(packets));
ASSERT_NE(frames, nullptr);
EXPECT_GT(frames->get_num_frames(), 0);

Expand Down Expand Up @@ -235,7 +235,7 @@ TEST(SmokeTest, DecodeVideoWithChainedFilters) {
fmt::format("{},format=pix_fmts=rgb24,hflip,buffersink", buffer);

Decoder<MediaType::Video> decoder(codec, std::nullopt, filterDesc);
auto frames = decoder.decode_and_flush(std::move(packets));
auto frames = decoder.decode_packets(std::move(packets));
ASSERT_NE(frames, nullptr);
EXPECT_GT(frames->get_num_frames(), 0);

Expand Down
2 changes: 0 additions & 2 deletions src/spdl/io/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ from spdl.io.lib._libspdl import (
DemuxConfig as DemuxConfig,
FilterGraph as FilterGraph,
ImageCodec as ImageCodec,
ImageDecoder as ImageDecoder,
ImageFrames as ImageFrames,
ImagePackets as ImagePackets,
VideoCodec as VideoCodec,
Expand Down Expand Up @@ -190,7 +189,6 @@ __all__ = [
"DecodeConfig",
"DemuxConfig",
"ImageCodec",
"ImageDecoder",
"ImageFrames",
"ImagePackets",
"VideoCodec",
Expand Down
Loading
Loading