video2x/src/encoder.cpp

296 lines
9.0 KiB
C++

#include "encoder.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <spdlog/spdlog.h>
#include "conversions.h"
static enum AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder) {
const enum AVPixelFormat *p = encoder->pix_fmts;
if (!p) {
spdlog::error("No pixel formats supported by encoder");
return AV_PIX_FMT_NONE;
}
return *p;
}
int init_encoder(
AVBufferRef *hw_ctx,
std::filesystem::path out_fpath,
AVFormatContext *ifmt_ctx,
AVFormatContext **ofmt_ctx,
AVCodecContext **enc_ctx,
AVCodecContext *dec_ctx,
EncoderConfig *encoder_config,
int vstream_idx,
int **stream_map
) {
AVFormatContext *fmt_ctx = NULL;
AVCodecContext *codec_ctx = NULL;
int stream_index = 0;
int ret;
avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fpath.u8string().c_str());
if (!fmt_ctx) {
spdlog::error("Could not create output context");
return AVERROR_UNKNOWN;
}
const AVCodec *encoder = avcodec_find_encoder(encoder_config->codec);
if (!encoder) {
spdlog::error(
"Required video encoder not found for vcodec {}",
avcodec_get_name(encoder_config->codec)
);
return AVERROR_ENCODER_NOT_FOUND;
}
// Create a new video stream in the output file
AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL);
if (!out_stream) {
spdlog::error("Failed to allocate the output video stream");
return AVERROR_UNKNOWN;
}
codec_ctx = avcodec_alloc_context3(encoder);
if (!codec_ctx) {
spdlog::error("Failed to allocate the encoder context");
return AVERROR(ENOMEM);
}
// Set hardware device context
if (hw_ctx != nullptr) {
codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx);
}
// Set encoding parameters
codec_ctx->height = encoder_config->out_height;
codec_ctx->width = encoder_config->out_width;
codec_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
codec_ctx->bit_rate = encoder_config->bit_rate;
// Set the pixel format
if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) {
// Use the specified pixel format
codec_ctx->pix_fmt = encoder_config->pix_fmt;
} else {
// Fall back to the default pixel format
codec_ctx->pix_fmt = get_encoder_default_pix_fmt(encoder);
if (codec_ctx->pix_fmt == AV_PIX_FMT_NONE) {
spdlog::error("Could not get the default pixel format for the encoder");
return AVERROR(EINVAL);
}
}
// Set the output video's time base
if (dec_ctx->time_base.num > 0 && dec_ctx->time_base.den > 0) {
codec_ctx->time_base = dec_ctx->time_base;
} else {
codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_stream, NULL));
}
// Set the output video's frame rate
if (dec_ctx->framerate.num > 0 && dec_ctx->framerate.den > 0) {
codec_ctx->framerate = dec_ctx->framerate;
} else {
codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, out_stream, NULL);
}
// Set the CRF and preset for any codecs that support it
std::string crf_str = std::to_string(encoder_config->crf);
av_opt_set(codec_ctx->priv_data, "crf", crf_str.c_str(), 0);
av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0);
if (fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) {
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
if ((ret = avcodec_open2(codec_ctx, encoder, NULL)) < 0) {
spdlog::error("Cannot open video encoder");
return ret;
}
ret = avcodec_parameters_from_context(out_stream->codecpar, codec_ctx);
if (ret < 0) {
spdlog::error("Failed to copy encoder parameters to output video stream");
return ret;
}
out_stream->time_base = codec_ctx->time_base;
out_stream->avg_frame_rate = codec_ctx->framerate;
out_stream->r_frame_rate = codec_ctx->framerate;
if (encoder_config->copy_streams) {
// Allocate the stream map
*stream_map =
reinterpret_cast<int *>(av_malloc_array(ifmt_ctx->nb_streams, sizeof(**stream_map)));
if (!*stream_map) {
spdlog::error("Could not allocate stream mapping");
return AVERROR(ENOMEM);
}
// Map the video stream
(*stream_map)[vstream_idx] = stream_index++;
// Loop through each stream in the input file
for (int i = 0; i < static_cast<int>(ifmt_ctx->nb_streams); i++) {
AVStream *in_stream = ifmt_ctx->streams[i];
AVCodecParameters *in_codecpar = in_stream->codecpar;
if (i == vstream_idx) {
// Video stream is already handled
continue;
}
if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&
in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
(*stream_map)[i] = -1;
continue;
}
// Create corresponding output stream
AVStream *out_copied_stream = avformat_new_stream(fmt_ctx, NULL);
if (!out_copied_stream) {
spdlog::error("Failed allocating output stream");
return AVERROR_UNKNOWN;
}
ret = avcodec_parameters_copy(out_copied_stream->codecpar, in_codecpar);
if (ret < 0) {
spdlog::error("Failed to copy codec parameters");
return ret;
}
out_copied_stream->codecpar->codec_tag = 0;
// Copy time base
out_copied_stream->time_base = in_stream->time_base;
(*stream_map)[i] = stream_index++;
}
}
// Open the output file
if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
ret = avio_open(&fmt_ctx->pb, out_fpath.u8string().c_str(), AVIO_FLAG_WRITE);
if (ret < 0) {
spdlog::error("Could not open output file '{}'", out_fpath.u8string().c_str());
return ret;
}
}
*ofmt_ctx = fmt_ctx;
*enc_ctx = codec_ctx;
return 0;
}
int write_frame(
AVFrame *frame,
AVCodecContext *enc_ctx,
AVFormatContext *ofmt_ctx,
int vstream_idx
) {
AVFrame *converted_frame = nullptr;
int ret;
// Convert the frame to the encoder's pixel format if needed
if (frame->format != enc_ctx->pix_fmt) {
converted_frame = convert_avframe_pix_fmt(frame, enc_ctx->pix_fmt);
if (!converted_frame) {
spdlog::error("Error converting frame to encoder's pixel format");
return AVERROR_EXTERNAL;
}
converted_frame->pts = frame->pts;
}
AVPacket *enc_pkt = av_packet_alloc();
if (!enc_pkt) {
spdlog::error("Could not allocate AVPacket");
return AVERROR(ENOMEM);
}
if (converted_frame != nullptr) {
ret = avcodec_send_frame(enc_ctx, converted_frame);
av_frame_free(&converted_frame);
} else {
ret = avcodec_send_frame(enc_ctx, frame);
}
if (ret < 0) {
spdlog::error("Error sending frame to encoder");
av_packet_free(&enc_pkt);
return ret;
}
while (ret >= 0) {
ret = avcodec_receive_packet(enc_ctx, enc_pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
av_packet_unref(enc_pkt);
break;
} else if (ret < 0) {
spdlog::error("Error encoding frame");
av_packet_free(&enc_pkt);
return ret;
}
// Rescale packet timestamps
av_packet_rescale_ts(
enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[vstream_idx]->time_base
);
enc_pkt->stream_index = vstream_idx;
// Write the packet
ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
av_packet_unref(enc_pkt);
if (ret < 0) {
spdlog::error("Error muxing packet");
av_packet_free(&enc_pkt);
return ret;
}
}
av_packet_free(&enc_pkt);
return 0;
}
int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx) {
int ret;
AVPacket *enc_pkt = av_packet_alloc();
if (!enc_pkt) {
spdlog::error("Could not allocate AVPacket");
return AVERROR(ENOMEM);
}
ret = avcodec_send_frame(enc_ctx, NULL);
while (ret >= 0) {
ret = avcodec_receive_packet(enc_ctx, enc_pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
av_packet_unref(enc_pkt);
break;
} else if (ret < 0) {
spdlog::error("Error encoding frame");
av_packet_free(&enc_pkt);
return ret;
}
// Rescale packet timestamps
av_packet_rescale_ts(enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[0]->time_base);
enc_pkt->stream_index = ofmt_ctx->streams[0]->index;
// Write the packet
ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
av_packet_unref(enc_pkt);
if (ret < 0) {
spdlog::error("Error muxing packet");
av_packet_free(&enc_pkt);
return ret;
}
}
av_packet_free(&enc_pkt);
return 0;
}