359 lines
13 KiB
C
359 lines
13 KiB
C
|
|
/*
|
||
|
|
* Copyright (c) 2022, Alliance for Open Media. All rights reserved
|
||
|
|
*
|
||
|
|
* This source code is subject to the terms of the BSD 2 Clause License and
|
||
|
|
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||
|
|
* was not distributed with this source code in the LICENSE file, you can
|
||
|
|
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||
|
|
* Media Patent License 1.0 was not distributed with this source code in the
|
||
|
|
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||
|
|
*/
|
||
|
|
|
||
|
|
#ifndef AOM_AV1_QMODE_RC_RATECTRL_QMODE_INTERFACE_H_
|
||
|
|
#define AOM_AV1_QMODE_RC_RATECTRL_QMODE_INTERFACE_H_
|
||
|
|
|
||
|
|
#include <array>
|
||
|
|
#include <string>
|
||
|
|
#include <vector>
|
||
|
|
|
||
|
|
#include "aom/aom_codec.h"
|
||
|
|
#include "av1/encoder/firstpass.h"
|
||
|
|
|
||
|
|
namespace aom {
|
||
|
|
|
||
|
|
constexpr int kBlockRefCount = 2;
|
||
|
|
|
||
|
|
struct MotionVector {
|
||
|
|
int row; // subpel row
|
||
|
|
int col; // subpel col
|
||
|
|
// TODO(b/241589513): Move this to TplFrameStats; it's wasteful to code it
|
||
|
|
// separately for each block.
|
||
|
|
int subpel_bits; // number of fractional bits used by row/col
|
||
|
|
};
|
||
|
|
|
||
|
|
enum class TplPassCount {
|
||
|
|
kOneTplPass = 1,
|
||
|
|
kTwoTplPasses = 2,
|
||
|
|
};
|
||
|
|
|
||
|
|
struct RateControlParam {
|
||
|
|
// Range of allowed GOP sizes (number of displayed frames).
|
||
|
|
int max_gop_show_frame_count;
|
||
|
|
int min_gop_show_frame_count;
|
||
|
|
// Number of reference frame buffers, i.e., size of the DPB.
|
||
|
|
int ref_frame_table_size;
|
||
|
|
// Maximum number of references a single frame may use.
|
||
|
|
int max_ref_frames;
|
||
|
|
|
||
|
|
int base_q_index;
|
||
|
|
|
||
|
|
// If greater than 1, enables per-superblock q_index, and limits the number of
|
||
|
|
// unique q_index values which may be used in a frame (each of which will have
|
||
|
|
// its own unique rdmult value).
|
||
|
|
int max_distinct_q_indices_per_frame;
|
||
|
|
|
||
|
|
// If per-superblock q_index is enabled and this is greater than 1, enables
|
||
|
|
// additional per-superblock scaling of lambda, and limits the number of
|
||
|
|
// unique lambda scale values which may be used in a frame.
|
||
|
|
int max_distinct_lambda_scales_per_frame;
|
||
|
|
|
||
|
|
int frame_width;
|
||
|
|
int frame_height;
|
||
|
|
|
||
|
|
// Total number of TPL passes.
|
||
|
|
TplPassCount tpl_pass_count = TplPassCount::kOneTplPass;
|
||
|
|
// Current TPL pass number, 0 or 1 (for GetTplPassGopEncodeInfo).
|
||
|
|
int tpl_pass_index = 0;
|
||
|
|
};
|
||
|
|
|
||
|
|
struct TplBlockStats {
|
||
|
|
int16_t height; // Pixel height.
|
||
|
|
int16_t width; // Pixel width.
|
||
|
|
int16_t row; // Pixel row of the top left corner.
|
||
|
|
int16_t col; // Pixel col of the top left corner.
|
||
|
|
int64_t intra_cost; // Rd cost of the best intra mode.
|
||
|
|
int64_t inter_cost; // Rd cost of the best inter mode.
|
||
|
|
|
||
|
|
// Valid only if TplFrameStats::rate_dist_present is true:
|
||
|
|
int64_t recrf_rate; // Bits when using recon as reference.
|
||
|
|
int64_t recrf_dist; // Distortion when using recon as reference.
|
||
|
|
int64_t intra_pred_err; // Prediction residual of the intra mode.
|
||
|
|
int64_t inter_pred_err; // Prediction residual of the inter mode.
|
||
|
|
|
||
|
|
std::array<MotionVector, kBlockRefCount> mv;
|
||
|
|
std::array<int, kBlockRefCount> ref_frame_index;
|
||
|
|
};
|
||
|
|
|
||
|
|
// gop frame type used for facilitate setting up GopFrame
|
||
|
|
// TODO(angiebird): Define names for forward key frame and
|
||
|
|
// key frame with overlay
|
||
|
|
enum class GopFrameType {
|
||
|
|
kRegularKey, // High quality key frame without overlay
|
||
|
|
kRegularLeaf, // Regular leaf frame
|
||
|
|
kRegularGolden, // Regular golden frame
|
||
|
|
kRegularArf, // High quality arf with strong filtering followed by an overlay
|
||
|
|
// later
|
||
|
|
kOverlay, // Overlay frame
|
||
|
|
kIntermediateOverlay, // Intermediate overlay frame
|
||
|
|
kIntermediateArf, // Good quality arf with weak or no filtering followed by a
|
||
|
|
// show_existing later
|
||
|
|
};
|
||
|
|
|
||
|
|
enum class EncodeRefMode {
|
||
|
|
kRegular,
|
||
|
|
kOverlay,
|
||
|
|
kShowExisting,
|
||
|
|
};
|
||
|
|
|
||
|
|
enum class ReferenceName {
|
||
|
|
kNoneFrame = -1,
|
||
|
|
kIntraFrame = 0,
|
||
|
|
kLastFrame = 1,
|
||
|
|
kLast2Frame = 2,
|
||
|
|
kLast3Frame = 3,
|
||
|
|
kGoldenFrame = 4,
|
||
|
|
kBwdrefFrame = 5,
|
||
|
|
kAltref2Frame = 6,
|
||
|
|
kAltrefFrame = 7,
|
||
|
|
};
|
||
|
|
|
||
|
|
struct Status {
|
||
|
|
aom_codec_err_t code;
|
||
|
|
std::string message; // Empty if code == AOM_CODEC_OK.
|
||
|
|
bool ok() const { return code == AOM_CODEC_OK; }
|
||
|
|
};
|
||
|
|
|
||
|
|
// A very simple imitation of absl::StatusOr, this is conceptually a union of a
|
||
|
|
// Status struct and an object of type T. It models an object that is either a
|
||
|
|
// usable object, or an error explaining why such an object is not present. A
|
||
|
|
// StatusOr<T> may never hold a status with a code of AOM_CODEC_OK.
|
||
|
|
template <typename T>
|
||
|
|
class StatusOr {
|
||
|
|
public:
|
||
|
|
StatusOr(const T &value) : value_(value) {}
|
||
|
|
StatusOr(T &&value) : value_(std::move(value)) {}
|
||
|
|
StatusOr(Status status) : status_(std::move(status)) {
|
||
|
|
assert(status_.code != AOM_CODEC_OK);
|
||
|
|
}
|
||
|
|
|
||
|
|
const Status &status() const { return status_; }
|
||
|
|
bool ok() const { return status().ok(); }
|
||
|
|
|
||
|
|
// operator* returns the value; it should only be called after checking that
|
||
|
|
// ok() returns true.
|
||
|
|
const T &operator*() const & { return value_; }
|
||
|
|
T &operator*() & { return value_; }
|
||
|
|
const T &&operator*() const && { return value_; }
|
||
|
|
T &&operator*() && { return std::move(value_); }
|
||
|
|
|
||
|
|
// sor->field is equivalent to (*sor).field.
|
||
|
|
const T *operator->() const & { return &value_; }
|
||
|
|
T *operator->() & { return &value_; }
|
||
|
|
|
||
|
|
// value() is equivalent to operator*, but asserts that ok() is true.
|
||
|
|
const T &value() const & {
|
||
|
|
assert(ok());
|
||
|
|
return value_;
|
||
|
|
}
|
||
|
|
T &value() & {
|
||
|
|
assert(ok());
|
||
|
|
return value_;
|
||
|
|
}
|
||
|
|
const T &&value() const && {
|
||
|
|
assert(ok());
|
||
|
|
return value_;
|
||
|
|
}
|
||
|
|
T &&value() && {
|
||
|
|
assert(ok());
|
||
|
|
return std::move(value_);
|
||
|
|
}
|
||
|
|
|
||
|
|
private:
|
||
|
|
T value_; // This could be std::optional<T> if it were available.
|
||
|
|
Status status_ = { AOM_CODEC_OK, "" };
|
||
|
|
};
|
||
|
|
|
||
|
|
struct ReferenceFrame {
|
||
|
|
int index; // Index of reference slot containing the reference frame
|
||
|
|
ReferenceName name;
|
||
|
|
};
|
||
|
|
|
||
|
|
struct GopFrame {
|
||
|
|
// basic info
|
||
|
|
bool is_valid;
|
||
|
|
int order_idx; // Index in display order in a GOP
|
||
|
|
int coding_idx; // Index in coding order in a GOP
|
||
|
|
int display_idx; // The number of displayed frames preceding this frame in
|
||
|
|
// a GOP
|
||
|
|
|
||
|
|
int global_order_idx; // Index in display order in the whole video chunk
|
||
|
|
int global_coding_idx; // Index in coding order in the whole video chunk
|
||
|
|
|
||
|
|
bool is_key_frame; // If this is key frame, reset reference buffers are
|
||
|
|
// required
|
||
|
|
bool is_arf_frame; // Is this a forward frame, a frame with order_idx
|
||
|
|
// higher than the current display order
|
||
|
|
bool is_show_frame; // Is this frame a show frame after coding
|
||
|
|
bool is_golden_frame; // Is this a high quality frame
|
||
|
|
|
||
|
|
GopFrameType update_type; // This is a redundant field. It is only used for
|
||
|
|
// easy conversion in SW integration.
|
||
|
|
|
||
|
|
// reference frame info
|
||
|
|
EncodeRefMode encode_ref_mode;
|
||
|
|
int colocated_ref_idx; // colocated_ref_idx == -1 when encode_ref_mode ==
|
||
|
|
// EncodeRefMode::kRegular
|
||
|
|
int update_ref_idx; // The reference index that this frame should be
|
||
|
|
// updated to. update_ref_idx == -1 when this frame
|
||
|
|
// will not serve as a reference frame
|
||
|
|
std::vector<ReferenceFrame>
|
||
|
|
ref_frame_list; // A list of available reference frames in priority order
|
||
|
|
// for the current to-be-coded frame. The list size
|
||
|
|
// should be less or equal to ref_frame_table_size. The
|
||
|
|
// reference frames with smaller indices are more likely
|
||
|
|
// to be a good reference frame. Therefore, they should
|
||
|
|
// be prioritized when the reference frame count is
|
||
|
|
// limited. For example, if we plan to use 3 reference
|
||
|
|
// frames, we should choose ref_frame_list[0],
|
||
|
|
// ref_frame_list[1] and ref_frame_list[2].
|
||
|
|
int layer_depth; // Layer depth in the GOP structure
|
||
|
|
ReferenceFrame primary_ref_frame; // We will use the primary reference frame
|
||
|
|
// to update current frame's initial
|
||
|
|
// probability model
|
||
|
|
};
|
||
|
|
|
||
|
|
struct GopStruct {
|
||
|
|
int show_frame_count;
|
||
|
|
int global_coding_idx_offset;
|
||
|
|
int global_order_idx_offset;
|
||
|
|
// TODO(jingning): This can be removed once the framework is up running.
|
||
|
|
int display_tracker; // Track the number of frames displayed proceeding a
|
||
|
|
// current coding frame.
|
||
|
|
std::vector<GopFrame> gop_frame_list;
|
||
|
|
};
|
||
|
|
|
||
|
|
using GopStructList = std::vector<GopStruct>;
|
||
|
|
|
||
|
|
struct SuperblockEncodeParameters {
|
||
|
|
int q_index;
|
||
|
|
int rdmult;
|
||
|
|
};
|
||
|
|
|
||
|
|
struct FrameEncodeParameters {
|
||
|
|
// Base q_index for the frame.
|
||
|
|
int q_index;
|
||
|
|
|
||
|
|
// Frame level Lagrangian multiplier.
|
||
|
|
int rdmult;
|
||
|
|
|
||
|
|
// If max_distinct_q_indices_per_frame <= 1, this will be empty.
|
||
|
|
// Otherwise:
|
||
|
|
// - There must be one entry per 64x64 superblock, in row-major order
|
||
|
|
// - There may be no more than max_distinct_q_indices_per_frame unique q_index
|
||
|
|
// values
|
||
|
|
// - All entries with the same q_index must have the same rdmult
|
||
|
|
// (If it's desired to use different rdmult values with the same q_index, this
|
||
|
|
// must be done with superblock_lambda_scales.)
|
||
|
|
std::vector<SuperblockEncodeParameters> superblock_encode_params;
|
||
|
|
|
||
|
|
// If max_distinct_q_indices_per_frame <= 1 or
|
||
|
|
// max_distinct_lambda_scales_per_frame <= 1, this will be empty. Otherwise,
|
||
|
|
// it will have one entry per 64x64 superblock, in row-major order, with no
|
||
|
|
// more than max_distinct_lambda_scales_per_frame unique values. Each entry
|
||
|
|
// should be multiplied by the rdmult in the corresponding superblock's entry
|
||
|
|
// in superblock_encode_params.
|
||
|
|
std::vector<float> superblock_lambda_scales;
|
||
|
|
};
|
||
|
|
|
||
|
|
struct FirstpassInfo {
|
||
|
|
int num_mbs_16x16; // Count of 16x16 unit blocks in each frame.
|
||
|
|
// FIRSTPASS_STATS's unit block size is 16x16
|
||
|
|
std::vector<FIRSTPASS_STATS> stats_list;
|
||
|
|
};
|
||
|
|
|
||
|
|
// In general, the number of elements in RefFrameTable must always equal
|
||
|
|
// ref_frame_table_size (as specified in RateControlParam), but see
|
||
|
|
// GetGopEncodeInfo for the one exception.
|
||
|
|
using RefFrameTable = std::vector<GopFrame>;
|
||
|
|
|
||
|
|
struct GopEncodeInfo {
|
||
|
|
std::vector<FrameEncodeParameters> param_list;
|
||
|
|
RefFrameTable final_snapshot; // RefFrameTable snapshot after coding this GOP
|
||
|
|
};
|
||
|
|
|
||
|
|
struct TplFrameStats {
|
||
|
|
int min_block_size;
|
||
|
|
int frame_width;
|
||
|
|
int frame_height;
|
||
|
|
bool rate_dist_present; // True if recrf_rate and recrf_dist are populated.
|
||
|
|
std::vector<TplBlockStats> block_stats_list;
|
||
|
|
// Optional stats computed with different settings, should be empty unless
|
||
|
|
// tpl_pass_count == kTwoTplPasses.
|
||
|
|
std::vector<TplBlockStats> alternate_block_stats_list;
|
||
|
|
};
|
||
|
|
|
||
|
|
struct TplGopStats {
|
||
|
|
std::vector<TplFrameStats> frame_stats_list;
|
||
|
|
};
|
||
|
|
|
||
|
|
// Structure and TPL stats for a single GOP, to be used for lookahead.
|
||
|
|
struct LookaheadStats {
|
||
|
|
const GopStruct *gop_struct; // Not owned, may not be nullptr.
|
||
|
|
const TplGopStats *tpl_gop_stats; // Not owned, may not be nullptr.
|
||
|
|
};
|
||
|
|
|
||
|
|
class AV1RateControlQModeInterface {
|
||
|
|
public:
|
||
|
|
AV1RateControlQModeInterface();
|
||
|
|
virtual ~AV1RateControlQModeInterface();
|
||
|
|
|
||
|
|
virtual Status SetRcParam(const RateControlParam &rc_param) = 0;
|
||
|
|
virtual StatusOr<GopStructList> DetermineGopInfo(
|
||
|
|
const FirstpassInfo &firstpass_info) = 0;
|
||
|
|
|
||
|
|
// Accepts GOP structure and TPL info from the encoder and returns q index and
|
||
|
|
// rdmult for each frame. This should be called with consecutive GOPs as
|
||
|
|
// returned by DetermineGopInfo.
|
||
|
|
//
|
||
|
|
// GOP structure and TPL info from zero or more subsequent GOPs may optionally
|
||
|
|
// be passed in lookahead_stats.
|
||
|
|
//
|
||
|
|
// For the first GOP, a default-constructed RefFrameTable may be passed in as
|
||
|
|
// ref_frame_table_snapshot_init; for subsequent GOPs, it should be the
|
||
|
|
// final_snapshot returned on the previous call.
|
||
|
|
//
|
||
|
|
// TODO(b/260859962): Remove these once all callers and overrides are gone.
|
||
|
|
virtual StatusOr<GopEncodeInfo> GetGopEncodeInfo(
|
||
|
|
const GopStruct &gop_struct AOM_UNUSED,
|
||
|
|
const TplGopStats &tpl_gop_stats AOM_UNUSED,
|
||
|
|
const std::vector<LookaheadStats> &lookahead_stats AOM_UNUSED,
|
||
|
|
const RefFrameTable &ref_frame_table_snapshot AOM_UNUSED) {
|
||
|
|
return Status{ AOM_CODEC_UNSUP_FEATURE, "Deprecated" };
|
||
|
|
}
|
||
|
|
virtual StatusOr<GopEncodeInfo> GetTplPassGopEncodeInfo(
|
||
|
|
const GopStruct &gop_struct AOM_UNUSED) {
|
||
|
|
return Status{ AOM_CODEC_UNSUP_FEATURE, "Deprecated" };
|
||
|
|
}
|
||
|
|
|
||
|
|
// Extensions to the API to pass in the first pass info. There should be stats
|
||
|
|
// for all frames starting from the first frame of the GOP and continuing to
|
||
|
|
// the end of the sequence.
|
||
|
|
// TODO(b/260859962): Make pure virtual once all derived classes implement it.
|
||
|
|
virtual StatusOr<GopEncodeInfo> GetGopEncodeInfo(
|
||
|
|
const GopStruct &gop_struct AOM_UNUSED,
|
||
|
|
const TplGopStats &tpl_gop_stats AOM_UNUSED,
|
||
|
|
const std::vector<LookaheadStats> &lookahead_stats AOM_UNUSED,
|
||
|
|
const FirstpassInfo &firstpass_info AOM_UNUSED,
|
||
|
|
const RefFrameTable &ref_frame_table_snapshot AOM_UNUSED) {
|
||
|
|
return Status{ AOM_CODEC_UNSUP_FEATURE, "Not yet implemented" };
|
||
|
|
}
|
||
|
|
virtual StatusOr<GopEncodeInfo> GetTplPassGopEncodeInfo(
|
||
|
|
const GopStruct &gop_struct AOM_UNUSED,
|
||
|
|
const FirstpassInfo &firstpass_info AOM_UNUSED) {
|
||
|
|
return Status{ AOM_CODEC_UNSUP_FEATURE, "Not yet implemented" };
|
||
|
|
}
|
||
|
|
};
|
||
|
|
} // namespace aom
|
||
|
|
|
||
|
|
#endif // AOM_AV1_QMODE_RC_RATECTRL_QMODE_INTERFACE_H_
|