2335 lines
87 KiB
C
2335 lines
87 KiB
C
/******************************************************************************
|
|
*
|
|
* Copyright (C) 2022 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at:
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
*****************************************************************************
|
|
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
|
*/
|
|
|
|
/**
|
|
*******************************************************************************
|
|
* @file
|
|
* isvce_intra_modes_eval.c
|
|
*
|
|
* @brief
|
|
* This file contains definitions of routines that perform rate distortion
|
|
* analysis on a macroblock if they are to be coded as intra.
|
|
*
|
|
* @author
|
|
* ittiam
|
|
*
|
|
* @par List of Functions:
|
|
* - isvce_derive_neighbor_availability_of_mbs()
|
|
* - isvce_derive_ngbr_avbl_of_mb_partitions()
|
|
* - isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff()
|
|
* - isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff()
|
|
* - isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff()
|
|
* - isvce_evaluate_intra4x4_modes_for_least_cost_rdopton()
|
|
* - isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff()
|
|
* - isvce_evaluate_intra16x16_modes()
|
|
* - isvce_evaluate_intra4x4_modes()
|
|
* - isvce_evaluate_intra_chroma_modes()
|
|
*
|
|
* @remarks
|
|
* None
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
/*****************************************************************************/
|
|
/* File Includes */
|
|
/*****************************************************************************/
|
|
|
|
/* System include files */
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <limits.h>
|
|
#include <assert.h>
|
|
|
|
/* User include files */
|
|
#include "ih264e_config.h"
|
|
#include "ih264_typedefs.h"
|
|
#include "iv2.h"
|
|
#include "ive2.h"
|
|
#include "ih264_debug.h"
|
|
#include "isvc_defs.h"
|
|
#include "isvc_macros.h"
|
|
#include "ih264_intra_pred_filters.h"
|
|
#include "isvc_structs.h"
|
|
#include "isvc_common_tables.h"
|
|
#include "isvc_trans_quant_itrans_iquant.h"
|
|
#include "isvc_inter_pred_filters.h"
|
|
#include "isvc_mem_fns.h"
|
|
#include "ih264_padding.h"
|
|
#include "ih264_size_defs.h"
|
|
#include "ih264_deblk_edge_filters.h"
|
|
#include "isvc_cabac_tables.h"
|
|
#include "isvce_defs.h"
|
|
#include "ime_distortion_metrics.h"
|
|
#include "ih264e_error.h"
|
|
#include "ih264e_bitstream.h"
|
|
#include "ime_defs.h"
|
|
#include "ime_structs.h"
|
|
#include "irc_cntrl_param.h"
|
|
#include "irc_frame_info_collector.h"
|
|
#include "isvce_rate_control.h"
|
|
#include "isvce_cabac_structs.h"
|
|
#include "isvce_structs.h"
|
|
#include "ih264e_intra_modes_eval.h"
|
|
#include "isvce_globals.h"
|
|
#include "ime_platform_macros.h"
|
|
|
|
/*****************************************************************************/
|
|
/* Function Definitions */
|
|
/*****************************************************************************/
|
|
|
|
/**
|
|
******************************************************************************
|
|
*
|
|
* @brief
|
|
* derivation process for subblock/partition availability
|
|
*
|
|
* @par Description
|
|
* Calculates the availability of the left, top, topright and topleft subblock
|
|
* or partitions.
|
|
*
|
|
* @param[in] ps_proc_ctxt
|
|
* pointer to macroblock context (handle)
|
|
*
|
|
* @param[in] i1_pel_pos_x
|
|
* column position of the pel wrt the current block
|
|
*
|
|
* @param[in] i1_pel_pos_y
|
|
* row position of the pel in wrt current block
|
|
*
|
|
* @remarks Assumptions: before calling this function it is assumed that
|
|
* the neighbor availability of the current macroblock is already derived.
|
|
* Based on table 6-3 of H264 specification
|
|
*
|
|
* @return availability status (yes or no)
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
UWORD8 isvce_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl, WORD8 i1_pel_pos_x,
|
|
WORD8 i1_pel_pos_y)
|
|
{
|
|
UWORD8 u1_neighbor_avail = 0;
|
|
|
|
/**********************************************************************/
|
|
/* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */
|
|
/* various columns of a macroblock */
|
|
/* */
|
|
/* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */
|
|
/* various rows of a macroblock */
|
|
/* */
|
|
/* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */
|
|
/* outside the bound of an mb ie., represents its neighbors. */
|
|
/**********************************************************************/
|
|
if(i1_pel_pos_x < 0)
|
|
{ /* column(-1) */
|
|
if(i1_pel_pos_y < 0)
|
|
{ /* row(-1) */
|
|
u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */
|
|
}
|
|
else if(i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
|
|
{ /* all rows of a macroblock */
|
|
u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */
|
|
}
|
|
else /* if (i1_pel_pos_y >= 16) */
|
|
{ /* rows(+16) */
|
|
u1_neighbor_avail = 0; /* current mb bottom left availability */
|
|
}
|
|
}
|
|
else if(i1_pel_pos_x >= 0 && i1_pel_pos_x < 16)
|
|
{ /* all columns of a macroblock */
|
|
if(i1_pel_pos_y < 0)
|
|
{ /* row(-1) */
|
|
u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */
|
|
}
|
|
else if(i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
|
|
{ /* all rows of a macroblock */
|
|
u1_neighbor_avail = 1; /* current mb availability */
|
|
/* availability of the partition is dependent on the position of the
|
|
* partition inside the mb */
|
|
/* although the availability is declared as 1 in all cases these needs to
|
|
* be corrected somewhere else and this is not done in here */
|
|
}
|
|
else /* if (i1_pel_pos_y >= 16) */
|
|
{ /* rows(+16) */
|
|
u1_neighbor_avail = 0; /* current mb bottom availability */
|
|
}
|
|
}
|
|
else if(i1_pel_pos_x >= 16)
|
|
{ /* column(+16) */
|
|
if(i1_pel_pos_y < 0)
|
|
{ /* row(-1) */
|
|
u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */
|
|
}
|
|
else /* if (i1_pel_pos_y >= 0) */
|
|
{ /* all other rows */
|
|
u1_neighbor_avail = 0; /* current mb right & bottom right availability */
|
|
}
|
|
}
|
|
|
|
return u1_neighbor_avail;
|
|
}
|
|
|
|
/**
|
|
******************************************************************************
|
|
*
|
|
* @brief
|
|
* evaluate best intra 16x16 mode (rate distortion opt off)
|
|
*
|
|
* @par Description
|
|
* This function evaluates all the possible intra 16x16 modes and finds the mode
|
|
* that best represents the macro-block (least distortion) and occupies fewer
|
|
* bits in the bit-stream.
|
|
*
|
|
* @param[in] ps_proc_ctxt
|
|
* pointer to process context (handle)
|
|
*
|
|
* @remarks
|
|
* Ideally the cost of encoding a macroblock is calculated as
|
|
* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
|
|
* input block and the reconstructed block and rate is the number of bits taken
|
|
* to place the macroblock in the bit-stream. In this routine the rate does not
|
|
* exactly point to the total number of bits it takes, rather it points to
|
|
*header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
|
|
*bits and residual bits fall in to texture bits the number of bits taken to
|
|
*encoding mbtype is considered as rate, we compute cost. Further we will
|
|
*approximate the distortion as the deviation b/w input and the predicted block
|
|
*as opposed to input and reconstructed block.
|
|
*
|
|
* NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
|
|
* the SAD and cost are one and the same.
|
|
*
|
|
* @return none
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
|
|
void isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
|
|
{
|
|
/* Codec Context */
|
|
isvce_codec_t *ps_codec = ps_proc->ps_codec;
|
|
isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
|
|
mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
|
|
|
|
/* SAD(distortion metric) of an 8x8 block */
|
|
WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX;
|
|
|
|
/* lambda */
|
|
UWORD32 u4_lambda = ps_proc->u4_lambda;
|
|
|
|
/* cost = distortion + lambda*rate */
|
|
WORD32 i4_mb_cost = INT_MAX, i4_mb_cost_least = INT_MAX;
|
|
|
|
/* intra mode */
|
|
UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16;
|
|
|
|
/* neighbor pels for intra prediction */
|
|
UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels;
|
|
|
|
/* neighbor availability */
|
|
WORD32 i4_ngbr_avbl;
|
|
|
|
/* pointer to src macro block */
|
|
UWORD8 *pu1_curr_mb = ps_proc->s_src_buf_props.as_component_bufs[0].pv_data;
|
|
UWORD8 *pu1_ref_mb = ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data;
|
|
|
|
/* pointer to prediction macro block */
|
|
UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16;
|
|
UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane;
|
|
|
|
/* strides */
|
|
WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
|
|
WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
|
|
WORD32 i4_rec_strd = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride;
|
|
|
|
/* pointer to neighbors left, top, topleft */
|
|
UWORD8 *pu1_mb_a = pu1_ref_mb - 1;
|
|
UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd;
|
|
UWORD8 *pu1_mb_d = pu1_mb_b - 1;
|
|
UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
|
|
/* valid intra modes map */
|
|
UWORD32 u4_valid_intra_modes;
|
|
|
|
/* lut for valid intra modes */
|
|
const UWORD8 u1_valid_intra_modes[8] = {4, 6, 4, 6, 5, 7, 5, 15};
|
|
|
|
UWORD32 i, u4_enable_fast_sad = 0, offset = 0;
|
|
isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
|
|
UWORD32 u4_constrained_intra_pred =
|
|
ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
|
|
|
|
if(ps_proc->i4_slice_type != ISLICE)
|
|
{
|
|
/* Offset for MBtype */
|
|
offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23;
|
|
u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad;
|
|
}
|
|
|
|
/* locating neighbors that are available for prediction */
|
|
|
|
/* gather prediction pels from the neighbors, if particular set is not
|
|
* available it is set to zero*/
|
|
/* left pels */
|
|
u1_mb_a =
|
|
((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
|
|
(u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
|
|
!ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
|
|
: 1));
|
|
if(u1_mb_a)
|
|
{
|
|
for(i = 0; i < 16; i++) pu1_ngbr_pels_i16[16 - 1 - i] = pu1_mb_a[i * i4_rec_strd];
|
|
}
|
|
else
|
|
{
|
|
ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_i16, 0, MB_SIZE);
|
|
}
|
|
/* top pels */
|
|
u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
|
|
(u4_constrained_intra_pred
|
|
? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag)
|
|
: 1));
|
|
if(u1_mb_b)
|
|
{
|
|
ps_mem_fxns->pf_mem_cpy_mul8(pu1_ngbr_pels_i16 + 16 + 1, pu1_mb_b, 16);
|
|
}
|
|
else
|
|
{
|
|
ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_i16 + 16 + 1, 0, MB_SIZE);
|
|
}
|
|
/* topleft pels */
|
|
u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
|
|
(u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra &&
|
|
!ps_top_mb_syn_ele[-1].u1_base_mode_flag)
|
|
: 1));
|
|
if(u1_mb_d)
|
|
{
|
|
pu1_ngbr_pels_i16[16] = *pu1_mb_d;
|
|
}
|
|
else
|
|
{
|
|
pu1_ngbr_pels_i16[16] = 0;
|
|
}
|
|
|
|
i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
|
|
ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl;
|
|
|
|
/* set valid intra modes for evaluation */
|
|
u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
|
|
|
|
if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
|
|
ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
|
|
u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
|
|
|
|
/* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */
|
|
ps_codec->pf_ih264e_evaluate_intra16x16_modes(
|
|
pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16, i4_src_strd, i4_pred_strd,
|
|
i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least, u4_valid_intra_modes);
|
|
|
|
/* cost = distortion + lambda*rate */
|
|
i4_mb_cost_least = i4_mb_distortion_least;
|
|
|
|
if(((u4_valid_intra_modes >> 3) & 1) != 0)
|
|
{
|
|
/* intra prediction for PLANE mode*/
|
|
(ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](
|
|
pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl);
|
|
|
|
/* evaluate distortion between the actual blk and the estimated blk for the
|
|
* given mode */
|
|
ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](
|
|
pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least,
|
|
&i4_mb_distortion);
|
|
|
|
/* cost = distortion + lambda*rate */
|
|
i4_mb_cost = i4_mb_distortion;
|
|
|
|
/* update the least cost information if necessary */
|
|
if(i4_mb_cost < i4_mb_distortion_least)
|
|
{
|
|
u4_intra_mode = PLANE_I16x16;
|
|
|
|
i4_mb_cost_least = i4_mb_cost;
|
|
i4_mb_distortion_least = i4_mb_distortion;
|
|
}
|
|
}
|
|
|
|
u4_best_intra_16x16_mode = u4_intra_mode;
|
|
|
|
DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode);
|
|
|
|
ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode;
|
|
|
|
/* cost = distortion + lambda*rate */
|
|
i4_mb_cost_least =
|
|
i4_mb_distortion_least + u4_lambda * u1_uev_codelength[offset + u4_best_intra_16x16_mode];
|
|
|
|
/* update the type of the mb if necessary */
|
|
if(i4_mb_cost_least < ps_proc->i4_mb_cost)
|
|
{
|
|
ps_proc->i4_mb_cost = i4_mb_cost_least;
|
|
ps_proc->i4_mb_distortion = i4_mb_distortion_least;
|
|
ps_proc->ps_mb_info->u2_mb_type = I16x16;
|
|
}
|
|
}
|
|
|
|
/**
|
|
******************************************************************************
|
|
*
|
|
* @brief
|
|
* evaluate best intra 8x8 mode (rate distortion opt on)
|
|
*
|
|
* @par Description
|
|
* This function evaluates all the possible intra 8x8 modes and finds the mode
|
|
* that best represents the macro-block (least distortion) and occupies fewer
|
|
* bits in the bit-stream.
|
|
*
|
|
* @param[in] ps_proc_ctxt
|
|
* pointer to proc ctxt
|
|
*
|
|
* @remarks Ideally the cost of encoding a macroblock is calculated as
|
|
* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
|
|
* input block and the reconstructed block and rate is the number of bits taken
|
|
* to place the macroblock in the bit-stream. In this routine the rate does not
|
|
* exactly point to the total number of bits it takes, rather it points to
|
|
*header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
|
|
*bits and residual bits fall in to texture bits the number of bits taken to
|
|
*encoding mbtype is considered as rate, we compute cost. Further we will
|
|
*approximate the distortion as the deviation b/w input and the predicted block
|
|
*as opposed to input and reconstructed block.
|
|
*
|
|
* NOTE: TODO: This function needs to be tested
|
|
*
|
|
* @return none
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
void isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
|
|
{
|
|
/* Codec Context */
|
|
isvce_codec_t *ps_codec = ps_proc->ps_codec;
|
|
|
|
/* SAD(distortion metric) of an 4x4 block */
|
|
WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX,
|
|
i4_total_distortion = 0;
|
|
|
|
/* lambda */
|
|
UWORD32 u4_lambda = ps_proc->u4_lambda;
|
|
|
|
/* cost = distortion + lambda*rate */
|
|
WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda;
|
|
|
|
/* cost due to mbtype */
|
|
UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
|
|
|
|
/* intra mode */
|
|
UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode;
|
|
|
|
/* neighbor pels for intra prediction */
|
|
UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels;
|
|
|
|
/* pointer to curr partition */
|
|
UWORD8 *pu1_mb_curr;
|
|
|
|
/* pointer to prediction macro block */
|
|
UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
|
|
|
|
/* strides */
|
|
WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
|
|
WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
|
|
|
|
/* neighbors left, top, top right, top left */
|
|
UWORD8 *pu1_mb_a;
|
|
UWORD8 *pu1_mb_b;
|
|
UWORD8 *pu1_mb_d;
|
|
|
|
/* neighbor availability */
|
|
WORD32 i4_ngbr_avbl;
|
|
block_neighbors_t s_ngbr_avbl;
|
|
|
|
/* temp vars */
|
|
UWORD32 b8, u4_pix_x, u4_pix_y;
|
|
UWORD32 u4_constrained_intra_pred =
|
|
ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
|
|
block_neighbors_t s_ngbr_avbl_MB;
|
|
|
|
/* ngbr mb syntax information */
|
|
UWORD8 *pu1_top_mb_intra_modes =
|
|
ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes;
|
|
isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
|
|
isvce_mb_info_t *ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
|
|
/* valid intra modes map */
|
|
UWORD32 u4_valid_intra_modes;
|
|
|
|
if(ps_proc->ps_ngbr_avbl->u1_mb_c)
|
|
{
|
|
ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
|
|
}
|
|
/* left pels */
|
|
s_ngbr_avbl_MB.u1_mb_a =
|
|
((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
|
|
(u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
|
|
!ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
|
|
: 1));
|
|
|
|
/* top pels */
|
|
s_ngbr_avbl_MB.u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
|
|
(u4_constrained_intra_pred ? (ps_top_mb_syn_ele->u1_is_intra &&
|
|
!ps_top_mb_syn_ele->u1_base_mode_flag)
|
|
: 1));
|
|
|
|
/* topleft pels */
|
|
s_ngbr_avbl_MB.u1_mb_d =
|
|
((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
|
|
(u4_constrained_intra_pred
|
|
? (ps_top_mb_syn_ele[-1].u1_is_intra && !ps_top_mb_syn_ele[-1].u1_base_mode_flag)
|
|
: 1));
|
|
|
|
/* top right */
|
|
s_ngbr_avbl_MB.u1_mb_c =
|
|
((ps_proc->ps_ngbr_avbl->u1_mb_c) &&
|
|
(u4_constrained_intra_pred ? (ps_top_right_mb_syn_ele->u1_is_intra &&
|
|
!ps_top_right_mb_syn_ele->u1_base_mode_flag)
|
|
: 1));
|
|
|
|
for(b8 = 0; b8 < 4; b8++)
|
|
{
|
|
u4_pix_x = (b8 & 0x01) << 3;
|
|
u4_pix_y = (b8 >> 1) << 3;
|
|
|
|
pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) +
|
|
u4_pix_x + (u4_pix_y * i4_src_strd);
|
|
/* when rdopt is off, we use the input as reference for constructing
|
|
* prediction buffer */
|
|
/* as opposed to using the recon pels. (open loop intra prediction) */
|
|
pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
|
|
pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
|
|
pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
|
|
|
|
/* locating neighbors that are available for prediction */
|
|
/* TODO : update the neighbor availability information basing on constrained
|
|
* intra pred information */
|
|
/* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be
|
|
* split in to distinct routines */
|
|
/* basing on neighbors available and hence evade the computation of neighbor
|
|
* availability totally. */
|
|
s_ngbr_avbl.u1_mb_a = isvce_derive_ngbr_avbl_of_mb_partitions(
|
|
&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */
|
|
s_ngbr_avbl.u1_mb_b = isvce_derive_ngbr_avbl_of_mb_partitions(
|
|
&s_ngbr_avbl_MB, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */
|
|
s_ngbr_avbl.u1_mb_c = isvce_derive_ngbr_avbl_of_mb_partitions(
|
|
&s_ngbr_avbl_MB, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */
|
|
s_ngbr_avbl.u1_mb_d = isvce_derive_ngbr_avbl_of_mb_partitions(
|
|
&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */
|
|
|
|
/* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b *
|
|
* TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d *
|
|
* TOP_LEFT_MB_AVAILABLE_MASK */
|
|
i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
|
|
(s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) +
|
|
(s_ngbr_avbl.u1_mb_a << 4);
|
|
/* if top partition is available and top right is not available for intra
|
|
* prediction, then */
|
|
/* padd top right samples using top sample and make top right also available
|
|
*/
|
|
/* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
|
|
* (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b |
|
|
* s_ngbr_avbl.u1_mb_c) << 3); */
|
|
ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl;
|
|
|
|
ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8,
|
|
i4_src_strd, i4_ngbr_avbl);
|
|
|
|
i4_partition_cost_least = INT_MAX;
|
|
/* set valid intra modes for evaluation */
|
|
u4_valid_intra_modes = 0x1ff;
|
|
|
|
if(!s_ngbr_avbl.u1_mb_b)
|
|
{
|
|
u4_valid_intra_modes &= ~(1 << VERT_I4x4);
|
|
u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4);
|
|
u4_valid_intra_modes &= ~(1 << VERT_L_I4x4);
|
|
}
|
|
if(!s_ngbr_avbl.u1_mb_a)
|
|
{
|
|
u4_valid_intra_modes &= ~(1 << HORZ_I4x4);
|
|
u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4);
|
|
}
|
|
if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d)
|
|
{
|
|
u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4);
|
|
u4_valid_intra_modes &= ~(1 << VERT_R_I4x4);
|
|
u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4);
|
|
}
|
|
|
|
/* estimate the intra 8x8 mode for the current partition (for evaluating
|
|
* cost) */
|
|
if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
|
|
{
|
|
u4_estimated_intra_8x8_mode = DC_I8x8;
|
|
}
|
|
else
|
|
{
|
|
UWORD32 u4_left_intra_8x8_mode = DC_I8x8;
|
|
UWORD32 u4_top_intra_8x8_mode = DC_I8x8;
|
|
|
|
if(u4_pix_x == 0)
|
|
{
|
|
if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8)
|
|
{
|
|
u4_left_intra_8x8_mode =
|
|
ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1];
|
|
}
|
|
else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4)
|
|
{
|
|
u4_left_intra_8x8_mode = ps_proc->s_nbr_info.ps_left_mb_intra_modes
|
|
->au1_intra_modes[(b8 + 1) * 4 + 2];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8 - 1];
|
|
}
|
|
|
|
if(u4_pix_y == 0)
|
|
{
|
|
if(ps_top_mb_syn_ele->u2_mb_type == I8x8)
|
|
{
|
|
u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8 + 2];
|
|
}
|
|
else if(ps_top_mb_syn_ele->u2_mb_type == I4x4)
|
|
{
|
|
u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8 + 2) * 4 + 2];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8 - 2];
|
|
}
|
|
|
|
u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode);
|
|
}
|
|
|
|
/* perform intra mode 8x8 evaluation */
|
|
for(u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0;
|
|
u4_intra_mode++, u4_valid_intra_modes >>= 1)
|
|
{
|
|
if((u4_valid_intra_modes & 1) == 0) continue;
|
|
|
|
/* intra prediction */
|
|
(ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0,
|
|
i4_pred_strd, i4_ngbr_avbl);
|
|
|
|
/* evaluate distortion between the actual blk and the estimated blk for
|
|
* the given mode */
|
|
ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd,
|
|
i4_partition_cost_least, &i4_partition_distortion);
|
|
|
|
i4_partition_cost =
|
|
i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)
|
|
? u4_cost_one_bit
|
|
: u4_cost_four_bits);
|
|
|
|
/* update the least cost information if necessary */
|
|
if(i4_partition_cost < i4_partition_cost_least)
|
|
{
|
|
i4_partition_cost_least = i4_partition_cost;
|
|
i4_partition_distortion_least = i4_partition_distortion;
|
|
u4_best_intra_8x8_mode = u4_intra_mode;
|
|
}
|
|
}
|
|
/* macroblock distortion */
|
|
i4_total_cost += i4_partition_cost_least;
|
|
i4_total_distortion += i4_partition_distortion_least;
|
|
/* mb partition mode */
|
|
ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode;
|
|
}
|
|
|
|
/* update the type of the mb if necessary */
|
|
if(i4_total_cost < ps_proc->i4_mb_cost)
|
|
{
|
|
ps_proc->i4_mb_cost = i4_total_cost;
|
|
ps_proc->i4_mb_distortion = i4_total_distortion;
|
|
ps_proc->ps_mb_info->u2_mb_type = I8x8;
|
|
}
|
|
}
|
|
|
|
/**
|
|
******************************************************************************
|
|
*
|
|
* @brief
|
|
* evaluate best intra 4x4 mode (rate distortion opt off)
|
|
*
|
|
* @par Description
|
|
* This function evaluates all the possible intra 4x4 modes and finds the mode
|
|
* that best represents the macro-block (least distortion) and occupies fewer
|
|
* bits in the bit-stream.
|
|
*
|
|
* @param[in] ps_proc_ctxt
|
|
* pointer to proc ctxt
|
|
*
|
|
* @remarks
|
|
* Ideally the cost of encoding a macroblock is calculated as
|
|
* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
|
|
* input block and the reconstructed block and rate is the number of bits taken
|
|
* to place the macroblock in the bit-stream. In this routine the rate does not
|
|
* exactly point to the total number of bits it takes, rather it points to
|
|
*header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
|
|
*bits and residual bits fall in to texture bits the number of bits taken to
|
|
*encoding mbtype is considered as rate, we compute cost. Further we will
|
|
*approximate the distortion as the deviation b/w input and the predicted block
|
|
*as opposed to input and reconstructed block.
|
|
*
|
|
* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
|
|
* 24*lambda is added to the SAD before comparison with the best SAD for
|
|
* inter prediction. This is an empirical value to prevent using too many intra
|
|
* blocks.
|
|
*
|
|
* @return none
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
void isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
|
|
{
|
|
/* Codec Context */
|
|
isvce_codec_t *ps_codec = ps_proc->ps_codec;
|
|
|
|
/* SAD(distortion metric) of an 4x4 block */
|
|
WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
|
|
|
|
/* lambda */
|
|
UWORD32 u4_lambda = ps_proc->u4_lambda;
|
|
|
|
/* cost = distortion + lambda*rate */
|
|
WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
|
|
|
|
/* cost due to mbtype */
|
|
UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
|
|
|
|
/* intra mode */
|
|
UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
|
|
|
|
/* neighbor pels for intra prediction */
|
|
UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
|
|
|
|
/* pointer to curr partition */
|
|
UWORD8 *pu1_mb_curr;
|
|
|
|
/* pointer to prediction macro block */
|
|
UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
|
|
|
|
/* strides */
|
|
WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
|
|
WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
|
|
|
|
/* neighbors left, top, top right, top left */
|
|
UWORD8 *pu1_mb_a;
|
|
UWORD8 *pu1_mb_b;
|
|
UWORD8 *pu1_mb_c;
|
|
UWORD8 *pu1_mb_d;
|
|
|
|
/* neighbor availability */
|
|
WORD32 i4_ngbr_avbl;
|
|
block_neighbors_t s_ngbr_avbl;
|
|
|
|
/* temp vars */
|
|
UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
|
|
|
|
/* ngbr sub mb modes */
|
|
UWORD8 *pu1_top_mb_intra_modes =
|
|
ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes;
|
|
isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
|
|
isvce_mb_info_t *ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
|
|
|
|
/* valid intra modes map */
|
|
UWORD32 u4_valid_intra_modes;
|
|
UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
|
|
|
|
UWORD32 u4_constrained_intra_pred =
|
|
ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
|
|
UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
|
|
if(ps_proc->ps_ngbr_avbl->u1_mb_c)
|
|
{
|
|
ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
|
|
}
|
|
/* left pels */
|
|
u1_mb_a =
|
|
((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
|
|
(u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
|
|
!ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
|
|
: 1));
|
|
|
|
/* top pels */
|
|
u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
|
|
(u4_constrained_intra_pred
|
|
? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag)
|
|
: 1));
|
|
|
|
/* topleft pels */
|
|
u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
|
|
(u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra &&
|
|
!ps_top_mb_syn_ele[-1].u1_base_mode_flag)
|
|
: 1));
|
|
|
|
/* top right */
|
|
u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) &&
|
|
(u4_constrained_intra_pred ? (ps_top_right_mb_syn_ele->u1_is_intra &&
|
|
!ps_top_right_mb_syn_ele->u1_base_mode_flag)
|
|
: 1));
|
|
|
|
i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
|
|
memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
|
|
|
|
for(b8 = 0; b8 < 4; b8++)
|
|
{
|
|
u4_blk_x = (b8 & 0x01) << 3;
|
|
u4_blk_y = (b8 >> 1) << 3;
|
|
for(b4 = 0; b4 < 4; b4++)
|
|
{
|
|
u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
|
|
u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
|
|
|
|
pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) +
|
|
u4_pix_x + (u4_pix_y * i4_src_strd);
|
|
/* when rdopt is off, we use the input as reference for constructing
|
|
* prediction buffer */
|
|
/* as opposed to using the recon pels. (open loop intra prediction) */
|
|
pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
|
|
pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
|
|
pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */
|
|
pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
|
|
|
|
/* locating neighbors that are available for prediction */
|
|
/* TODO : update the neighbor availability information basing on
|
|
* constrained intra pred information */
|
|
/* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be
|
|
* split in to distinct routines */
|
|
/* basing on neighbors available and hence evade the computation of
|
|
* neighbor availability totally. */
|
|
|
|
i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
|
|
s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
|
|
s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
|
|
s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
|
|
s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
|
|
/* set valid intra modes for evaluation */
|
|
u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
|
|
|
|
/* if top partition is available and top right is not available for intra
|
|
* prediction, then */
|
|
/* padd top right samples using top sample and make top right also
|
|
* available */
|
|
/* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
|
|
* (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b |
|
|
* s_ngbr_avbl.u1_mb_c) << 3); */
|
|
|
|
/* gather prediction pels from the neighbors */
|
|
if(s_ngbr_avbl.u1_mb_a)
|
|
{
|
|
for(i = 0; i < 4; i++) pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_src_strd];
|
|
}
|
|
else
|
|
{
|
|
memset(pu1_ngbr_pels_i4, 0, 4);
|
|
}
|
|
|
|
if(s_ngbr_avbl.u1_mb_b)
|
|
{
|
|
memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
|
|
}
|
|
else
|
|
{
|
|
memset(pu1_ngbr_pels_i4 + 5, 0, 4);
|
|
}
|
|
|
|
if(s_ngbr_avbl.u1_mb_d)
|
|
pu1_ngbr_pels_i4[4] = *pu1_mb_d;
|
|
else
|
|
pu1_ngbr_pels_i4[4] = 0;
|
|
|
|
if(s_ngbr_avbl.u1_mb_c)
|
|
{
|
|
memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
|
|
}
|
|
else if(s_ngbr_avbl.u1_mb_b)
|
|
{
|
|
memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
|
|
s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
|
|
}
|
|
|
|
i4_partition_cost_least = INT_MAX;
|
|
|
|
/* predict the intra 4x4 mode for the current partition (for evaluating
|
|
* cost) */
|
|
if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
|
|
{
|
|
u4_estimated_intra_4x4_mode = DC_I4x4;
|
|
}
|
|
else
|
|
{
|
|
UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
|
|
UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
|
|
|
|
if(u4_pix_x == 0)
|
|
{
|
|
if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4)
|
|
{
|
|
u4_left_intra_4x4_mode =
|
|
ps_proc->s_nbr_info.ps_left_mb_intra_modes
|
|
->au1_intra_modes[gau1_raster_to_zscan_map[3 + u4_pix_y]];
|
|
}
|
|
else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8)
|
|
{
|
|
u4_left_intra_4x4_mode =
|
|
ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
u4_left_intra_4x4_mode =
|
|
ps_proc->au1_intra_luma_mb_4x4_modes
|
|
[gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 1]];
|
|
}
|
|
|
|
if(u4_pix_y == 0)
|
|
{
|
|
if(ps_top_mb_syn_ele->u2_mb_type == I4x4)
|
|
{
|
|
u4_top_intra_4x4_mode =
|
|
pu1_top_mb_intra_modes[gau1_raster_to_zscan_map[12 + (u4_pix_x >> 2)]];
|
|
}
|
|
else if(ps_top_mb_syn_ele->u2_mb_type == I8x8)
|
|
{
|
|
u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
u4_top_intra_4x4_mode =
|
|
ps_proc->au1_intra_luma_mb_4x4_modes
|
|
[gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 4]];
|
|
}
|
|
|
|
u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
|
|
}
|
|
|
|
ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] =
|
|
u4_estimated_intra_4x4_mode;
|
|
|
|
/* mode evaluation and prediction */
|
|
ps_codec->pf_ih264e_evaluate_intra_4x4_modes(
|
|
pu1_mb_curr, pu1_ngbr_pels_i4, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_ngbr_avbl,
|
|
&u4_best_intra_4x4_mode, &i4_partition_cost_least, u4_valid_intra_modes, u4_lambda,
|
|
u4_estimated_intra_4x4_mode);
|
|
|
|
i4_partition_distortion_least =
|
|
i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)
|
|
? u4_cost_one_bit
|
|
: u4_cost_four_bits);
|
|
|
|
DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least,
|
|
u4_best_intra_4x4_mode);
|
|
/* macroblock distortion */
|
|
i4_total_distortion += i4_partition_distortion_least;
|
|
i4_total_cost += i4_partition_cost_least;
|
|
/* mb partition mode */
|
|
ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
|
|
}
|
|
}
|
|
|
|
/* update the type of the mb if necessary */
|
|
if(i4_total_cost < ps_proc->i4_mb_cost)
|
|
{
|
|
ps_proc->i4_mb_cost = i4_total_cost;
|
|
ps_proc->i4_mb_distortion = i4_total_distortion;
|
|
ps_proc->ps_mb_info->u2_mb_type = I4x4;
|
|
}
|
|
}
|
|
|
|
/**
|
|
******************************************************************************
|
|
*
|
|
* @brief evaluate best intra 4x4 mode (rate distortion opt on)
|
|
*
|
|
* @par Description
|
|
* This function evaluates all the possible intra 4x4 modes and finds the mode
|
|
* that best represents the macro-block (least distortion) and occupies fewer
|
|
* bits in the bit-stream.
|
|
*
|
|
* @param[in] ps_proc_ctxt
|
|
* pointer to proc ctxt
|
|
*
|
|
* @remarks
|
|
* Ideally the cost of encoding a macroblock is calculated as
|
|
* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
|
|
* input block and the reconstructed block and rate is the number of bits taken
|
|
* to place the macroblock in the bit-stream. In this routine the rate does not
|
|
* exactly point to the total number of bits it takes, rather it points to
|
|
*header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
|
|
*bits and residual bits fall in to texture bits the number of bits taken to
|
|
*encoding mbtype is considered as rate, we compute cost. Further we will
|
|
*approximate the distortion as the deviation b/w input and the predicted block
|
|
*as opposed to input and reconstructed block.
|
|
*
|
|
* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
|
|
* 24*lambda is added to the SAD before comparison with the best SAD for
|
|
* inter prediction. This is an empirical value to prevent using too many intra
|
|
* blocks.
|
|
*
|
|
* @return none
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
void isvce_evaluate_intra4x4_modes_for_least_cost_rdopton(isvce_process_ctxt_t *ps_proc)
|
|
{
|
|
block_neighbors_t s_ngbr_avbl;
|
|
buffer_container_t s_src;
|
|
buffer_container_t s_pred;
|
|
buffer_container_t s_recon;
|
|
buffer_container_t s_quant_coeffs;
|
|
buffer_container_t s_res_pred;
|
|
|
|
/* neighbors left, top, top right, top left */
|
|
UWORD8 *pu1_mb_a;
|
|
UWORD8 *pu1_mb_b;
|
|
UWORD8 *pu1_mb_c;
|
|
UWORD8 *pu1_mb_d;
|
|
UWORD8 *pu1_mb_curr;
|
|
UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top;
|
|
UWORD8 *pu1_ref_mb_intra_4x4;
|
|
WORD32 i4_ref_strd_left, i4_ref_strd_top;
|
|
WORD32 i4_ngbr_avbl;
|
|
UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
|
|
/* valid intra modes map */
|
|
UWORD32 u4_valid_intra_modes;
|
|
/* Dummy variable for 4x4 trans function */
|
|
WORD16 i2_dc_dummy;
|
|
UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
|
|
|
|
isvce_codec_t *ps_codec = ps_proc->ps_codec;
|
|
quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
|
|
isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
|
|
isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1;
|
|
isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1;
|
|
isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
|
|
enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
|
|
resi_trans_quant_constants_t s_resi_trans_quant_constants = {
|
|
.pu2_scale_matrix = ps_qp_params->pu2_scale_mat,
|
|
.pu2_threshold_matrix = ps_qp_params->pu2_thres_mat,
|
|
.u4_qbits = ps_qp_params->u1_qbits,
|
|
.u4_round_factor = ps_qp_params->u4_dead_zone};
|
|
iq_it_res_rec_constants_t s_iq_it_res_rec_constants = {
|
|
.pu2_iscal_mat = ps_qp_params->pu2_iscale_mat,
|
|
.pu2_weigh_mat = ps_qp_params->pu2_weigh_mat,
|
|
.u4_qp_div_6 = ps_qp_params->u1_qp_div};
|
|
|
|
const UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
|
|
WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
|
|
UWORD32 u4_lambda = ps_proc->u4_lambda;
|
|
WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
|
|
/* cost due to mbtype */
|
|
UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
|
|
UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
|
|
UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
|
|
WORD16 *pi2_quant_coeffs = ps_proc->pi2_res_buf_intra_4x4;
|
|
UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
|
|
WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
|
|
WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
|
|
UWORD8 *pu1_nnz = (UWORD8 *) ps_proc->au4_nnz_intra_4x4;
|
|
UWORD8 *pu1_top_mb_intra_modes =
|
|
ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes;
|
|
UWORD32 u4_constrained_intra_pred =
|
|
ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
|
|
UWORD8 u1_resi_trans_fxn_idx = isvc_get_resi_trans_quant_variant_idx(0);
|
|
UWORD8 u1_iq_it_recon_fxn_idx = isvc_get_iq_it_recon_variant_idx(1, 0);
|
|
|
|
s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[Y];
|
|
|
|
/* compute ngbr availability for sub blks */
|
|
if(ps_proc->ps_ngbr_avbl->u1_mb_c)
|
|
{
|
|
ps_top_right_mb = ps_top_mb + 1;
|
|
}
|
|
|
|
/* left pels */
|
|
u1_mb_a =
|
|
((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
|
|
(u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
|
|
!ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
|
|
: 1));
|
|
|
|
/* top pels */
|
|
u1_mb_b =
|
|
((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
|
|
(u4_constrained_intra_pred ? (ps_top_mb->u1_is_intra && !ps_top_mb->u1_base_mode_flag)
|
|
: 1));
|
|
|
|
/* topleft pels */
|
|
u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
|
|
(u4_constrained_intra_pred
|
|
? (ps_top_left_mb->u1_is_intra && !ps_top_left_mb->u1_base_mode_flag)
|
|
: 1));
|
|
|
|
/* top right pels */
|
|
u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) &&
|
|
(u4_constrained_intra_pred
|
|
? (ps_top_right_mb->u1_is_intra && !ps_top_right_mb->u1_base_mode_flag)
|
|
: 1));
|
|
|
|
i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
|
|
memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
|
|
|
|
for(b8 = 0; b8 < 4; b8++)
|
|
{
|
|
u4_blk_x = (b8 & 0x01) << 3;
|
|
u4_blk_y = (b8 >> 1) << 3;
|
|
for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_quant_coeffs += MB_SIZE)
|
|
{
|
|
u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
|
|
u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
|
|
|
|
pu1_ref_mb_intra_4x4 =
|
|
ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd);
|
|
pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) +
|
|
u4_pix_x + (u4_pix_y * i4_src_strd);
|
|
pu1_pred_mb = ps_proc->pu1_pred_mb + u4_pix_x + (u4_pix_y * i4_pred_strd);
|
|
if(u4_pix_x == 0)
|
|
{
|
|
i4_ref_strd_left = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride;
|
|
pu1_mb_ref_left =
|
|
((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + u4_pix_x +
|
|
(u4_pix_y * i4_ref_strd_left);
|
|
}
|
|
else
|
|
{
|
|
i4_ref_strd_left = i4_pred_strd;
|
|
pu1_mb_ref_left = pu1_ref_mb_intra_4x4;
|
|
}
|
|
if(u4_pix_y == 0)
|
|
{
|
|
i4_ref_strd_top = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride;
|
|
pu1_mb_ref_top =
|
|
((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + u4_pix_x +
|
|
(u4_pix_y * i4_ref_strd_top);
|
|
}
|
|
else
|
|
{
|
|
i4_ref_strd_top = i4_pred_strd;
|
|
pu1_mb_ref_top = pu1_ref_mb_intra_4x4;
|
|
}
|
|
|
|
pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */
|
|
pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */
|
|
pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */
|
|
if(u4_pix_y == 0)
|
|
pu1_mb_d = pu1_mb_b - 1;
|
|
else
|
|
pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */
|
|
|
|
/* locating neighbors that are available for prediction */
|
|
/* TODO : update the neighbor availability information basing on
|
|
* constrained intra pred information */
|
|
/* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be
|
|
* split in to distinct routines */
|
|
/* basing on neighbors available and hence evade the computation of
|
|
* neighbor availability totally. */
|
|
|
|
i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
|
|
s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
|
|
s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
|
|
s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
|
|
s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
|
|
/* set valid intra modes for evaluation */
|
|
u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
|
|
|
|
/* if top partition is available and top right is not available for intra
|
|
* prediction, then */
|
|
/* padd top right samples using top sample and make top right also
|
|
* available */
|
|
/* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
|
|
* (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b |
|
|
* s_ngbr_avbl.u1_mb_c) << 3); */
|
|
|
|
/* gather prediction pels from the neighbors */
|
|
if(s_ngbr_avbl.u1_mb_a)
|
|
{
|
|
for(i = 0; i < 4; i++) pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_ref_strd_left];
|
|
}
|
|
else
|
|
{
|
|
memset(pu1_ngbr_pels_i4, 0, 4);
|
|
}
|
|
if(s_ngbr_avbl.u1_mb_b)
|
|
{
|
|
memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
|
|
}
|
|
else
|
|
{
|
|
memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4);
|
|
}
|
|
if(s_ngbr_avbl.u1_mb_d)
|
|
pu1_ngbr_pels_i4[4] = *pu1_mb_d;
|
|
else
|
|
pu1_ngbr_pels_i4[4] = 0;
|
|
if(s_ngbr_avbl.u1_mb_c)
|
|
{
|
|
memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
|
|
}
|
|
else if(s_ngbr_avbl.u1_mb_b)
|
|
{
|
|
memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
|
|
s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
|
|
}
|
|
|
|
i4_partition_cost_least = INT_MAX;
|
|
|
|
/* predict the intra 4x4 mode for the current partition (for evaluating
|
|
* cost) */
|
|
if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
|
|
{
|
|
u4_estimated_intra_4x4_mode = DC_I4x4;
|
|
}
|
|
else
|
|
{
|
|
UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
|
|
UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
|
|
|
|
if(u4_pix_x == 0)
|
|
{
|
|
if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4)
|
|
{
|
|
u4_left_intra_4x4_mode =
|
|
ps_proc->s_nbr_info.ps_left_mb_intra_modes
|
|
->au1_intra_modes[gau1_raster_to_zscan_map[3 + u4_pix_y]];
|
|
}
|
|
else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8)
|
|
{
|
|
u4_left_intra_4x4_mode =
|
|
ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
u4_left_intra_4x4_mode =
|
|
ps_proc->au1_intra_luma_mb_4x4_modes
|
|
[gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 1]];
|
|
}
|
|
|
|
if(u4_pix_y == 0)
|
|
{
|
|
if(ps_top_mb->u2_mb_type == I4x4)
|
|
{
|
|
u4_top_intra_4x4_mode =
|
|
pu1_top_mb_intra_modes[gau1_raster_to_zscan_map[12 + (u4_pix_x >> 2)]];
|
|
}
|
|
else if(ps_top_mb->u2_mb_type == I8x8)
|
|
{
|
|
u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
u4_top_intra_4x4_mode =
|
|
ps_proc->au1_intra_luma_mb_4x4_modes
|
|
[gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 4]];
|
|
}
|
|
|
|
u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
|
|
}
|
|
|
|
ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] =
|
|
u4_estimated_intra_4x4_mode;
|
|
|
|
/*mode evaluation and prediction*/
|
|
ps_codec->pf_ih264e_evaluate_intra_4x4_modes(
|
|
pu1_mb_curr, pu1_ngbr_pels_i4, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_ngbr_avbl,
|
|
&u4_best_intra_4x4_mode, &i4_partition_cost_least, u4_valid_intra_modes, u4_lambda,
|
|
u4_estimated_intra_4x4_mode);
|
|
|
|
i4_partition_distortion_least =
|
|
i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)
|
|
? u4_cost_one_bit
|
|
: u4_cost_four_bits);
|
|
|
|
DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least,
|
|
u4_best_intra_4x4_mode);
|
|
|
|
/* macroblock distortion */
|
|
i4_total_distortion += i4_partition_distortion_least;
|
|
i4_total_cost += i4_partition_cost_least;
|
|
|
|
/* mb partition mode */
|
|
ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
|
|
|
|
/********************************************************/
|
|
/* error estimation, */
|
|
/* transform */
|
|
/* quantization */
|
|
/********************************************************/
|
|
s_src.pv_data = pu1_mb_curr;
|
|
s_src.i4_data_stride = i4_src_strd;
|
|
|
|
s_pred.pv_data = pu1_pred_mb;
|
|
s_pred.i4_data_stride = i4_pred_strd;
|
|
|
|
s_quant_coeffs.pv_data = pi2_quant_coeffs;
|
|
s_quant_coeffs.i4_data_stride = 4;
|
|
|
|
ps_enc_loop_fxns->apf_resi_trans_quant_4x4[u1_resi_trans_fxn_idx](
|
|
&s_src, &s_pred, &s_quant_coeffs, &s_res_pred,
|
|
/* No op stride, this implies a buff of lenght 1x16 */
|
|
&s_resi_trans_quant_constants, pu1_nnz, &i2_dc_dummy, 0);
|
|
|
|
/********************************************************/
|
|
/* ierror estimation, */
|
|
/* itransform */
|
|
/* iquantization */
|
|
/********************************************************/
|
|
|
|
/* Tx blk coeffs are stored blk by blk */
|
|
/* Hence, in order to access rows of each Tx blk, one needs to stride of
|
|
* TxxSize */
|
|
s_quant_coeffs.i4_data_stride = 4;
|
|
|
|
s_recon.pv_data = pu1_ref_mb_intra_4x4;
|
|
s_recon.i4_data_stride = i4_pred_strd;
|
|
|
|
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[u1_iq_it_recon_fxn_idx](
|
|
&s_quant_coeffs, &s_pred, &s_res_pred, &s_res_pred, &s_recon,
|
|
&s_iq_it_res_rec_constants, ps_proc->pv_scratch_buff, s_quant_coeffs.pv_data, 0, 0);
|
|
}
|
|
}
|
|
|
|
/* update the type of the mb if necessary */
|
|
if(i4_total_cost < ps_proc->i4_mb_cost)
|
|
{
|
|
ps_proc->i4_mb_cost = i4_total_cost;
|
|
ps_proc->i4_mb_distortion = i4_total_distortion;
|
|
ps_proc->ps_mb_info->u2_mb_type = I4x4;
|
|
}
|
|
}
|
|
|
|
/**
|
|
******************************************************************************
|
|
*
|
|
* @brief
|
|
* evaluate best chroma intra 8x8 mode (rate distortion opt off)
|
|
*
|
|
* @par Description
|
|
* This function evaluates all the possible chroma intra 8x8 modes and finds
|
|
* the mode that best represents the macroblock (least distortion) and occupies
|
|
* fewer bits in the bitstream.
|
|
*
|
|
* @param[in] ps_proc_ctxt
|
|
* pointer to macroblock context (handle)
|
|
*
|
|
* @remarks
|
|
* For chroma best intra pred mode is calculated based only on SAD
|
|
*
|
|
* @returns none
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
|
|
void isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
|
|
{
|
|
/* Codec Context */
|
|
isvce_codec_t *ps_codec = ps_proc->ps_codec;
|
|
isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
|
|
mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
|
|
|
|
/* SAD(distortion metric) of an 8x8 block */
|
|
WORD32 i4_mb_distortion, i4_chroma_mb_distortion;
|
|
|
|
/* intra mode */
|
|
UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8;
|
|
|
|
/* neighbor pels for intra prediction */
|
|
UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels;
|
|
|
|
/* pointer to curr macro block */
|
|
UWORD8 *pu1_curr_mb = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data);
|
|
UWORD8 *pu1_ref_mb = ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data);
|
|
|
|
/* pointer to prediction macro block */
|
|
UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
|
|
UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane;
|
|
|
|
/* strides */
|
|
WORD32 i4_src_strd_c = ps_proc->s_src_buf_props.as_component_bufs[1].i4_data_stride;
|
|
WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
|
|
WORD32 i4_rec_strd_c = ps_proc->s_rec_buf_props.as_component_bufs[1].i4_data_stride;
|
|
|
|
/* neighbors left, top, top left */
|
|
UWORD8 *pu1_mb_a = pu1_ref_mb - 2;
|
|
UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c;
|
|
UWORD8 *pu1_mb_d = pu1_mb_b - 2;
|
|
|
|
/* neighbor availability */
|
|
const UWORD8 u1_valid_intra_modes[8] = {1, 3, 1, 3, 5, 7, 5, 15};
|
|
WORD32 i4_ngbr_avbl;
|
|
|
|
/* valid intra modes map */
|
|
UWORD32 u4_valid_intra_modes;
|
|
isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
|
|
|
|
/* temp var */
|
|
UWORD8 i;
|
|
UWORD32 u4_constrained_intra_pred =
|
|
ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
|
|
UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
|
|
/* locating neighbors that are available for prediction */
|
|
|
|
/* gather prediction pels from the neighbors */
|
|
/* left pels */
|
|
u1_mb_a =
|
|
((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
|
|
(u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
|
|
!ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
|
|
: 1));
|
|
if(u1_mb_a)
|
|
{
|
|
for(i = 0; i < 16; i += 2)
|
|
{
|
|
pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c];
|
|
pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE);
|
|
}
|
|
|
|
/* top pels */
|
|
u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
|
|
(u4_constrained_intra_pred
|
|
? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag)
|
|
: 1));
|
|
if(u1_mb_b)
|
|
{
|
|
ps_mem_fxns->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16);
|
|
}
|
|
else
|
|
{
|
|
ps_mem_fxns->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE);
|
|
}
|
|
|
|
/* top left pels */
|
|
u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
|
|
(u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra &&
|
|
!ps_top_mb_syn_ele[-1].u1_base_mode_flag)
|
|
: 1));
|
|
if(u1_mb_d)
|
|
{
|
|
pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d;
|
|
pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1);
|
|
}
|
|
i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
|
|
ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl;
|
|
|
|
u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
|
|
|
|
if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
|
|
ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
|
|
u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8);
|
|
|
|
i4_chroma_mb_distortion = INT_MAX;
|
|
|
|
/* perform intra mode chroma 8x8 evaluation */
|
|
/* intra prediction */
|
|
ps_codec->pf_ih264e_evaluate_intra_chroma_modes(
|
|
pu1_curr_mb, pu1_ngbr_pels_c_i8x8, pu1_pred_mb, i4_src_strd_c, i4_pred_strd, i4_ngbr_avbl,
|
|
&u4_best_chroma_intra_8x8_mode, &i4_chroma_mb_distortion, u4_valid_intra_modes);
|
|
|
|
if(u4_valid_intra_modes & 8) /* if Chroma PLANE is valid*/
|
|
{
|
|
(ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0,
|
|
i4_pred_strd, i4_ngbr_avbl);
|
|
|
|
/* evaluate distortion(sad) */
|
|
ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd,
|
|
i4_chroma_mb_distortion, &i4_mb_distortion);
|
|
|
|
/* update the least distortion information if necessary */
|
|
if(i4_mb_distortion < i4_chroma_mb_distortion)
|
|
{
|
|
i4_chroma_mb_distortion = i4_mb_distortion;
|
|
u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8;
|
|
}
|
|
}
|
|
|
|
DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion,
|
|
u4_best_chroma_intra_8x8_mode);
|
|
|
|
ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode;
|
|
}
|
|
|
|
/**
|
|
******************************************************************************
|
|
*
|
|
* @brief
|
|
* Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
|
|
* prediction.
|
|
*
|
|
* @par Description
|
|
* This function evaluates first three 16x16 modes and compute corresponding sad
|
|
* and return the buffer predicted with best mode.
|
|
*
|
|
* @param[in] pu1_src
|
|
* UWORD8 pointer to the source
|
|
*
|
|
* @param[in] pu1_ngbr_pels_i16
|
|
* UWORD8 pointer to neighbouring pels
|
|
*
|
|
* @param[out] pu1_dst
|
|
* UWORD8 pointer to the destination
|
|
*
|
|
* @param[in] src_strd
|
|
* integer source stride
|
|
*
|
|
* @param[in] dst_strd
|
|
* integer destination stride
|
|
*
|
|
* @param[in] u4_n_avblty
|
|
* availability of neighbouring pixels
|
|
*
|
|
* @param[in] u4_intra_mode
|
|
* Pointer to the variable in which best mode is returned
|
|
*
|
|
* @param[in] pu4_sadmin
|
|
* Pointer to the variable in which minimum sad is returned
|
|
*
|
|
* @param[in] u4_valid_intra_modes
|
|
* Says what all modes are valid
|
|
*
|
|
* @returns none
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
void isvce_evaluate_intra16x16_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16, UWORD8 *pu1_dst,
|
|
UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty,
|
|
UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin,
|
|
UWORD32 u4_valid_intra_modes)
|
|
{
|
|
UWORD8 *pu1_neighbour;
|
|
UWORD8 *pu1_src_temp = pu1_src;
|
|
UWORD8 left = 0, top = 0;
|
|
WORD32 u4_dcval = 0;
|
|
WORD32 i, j;
|
|
WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX;
|
|
UWORD8 val;
|
|
|
|
left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
|
|
top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
|
|
|
|
/* left available */
|
|
if(left)
|
|
{
|
|
i4_sad_horz = 0;
|
|
|
|
for(i = 0; i < 16; i++)
|
|
{
|
|
val = pu1_ngbr_pels_i16[15 - i];
|
|
|
|
u4_dcval += val;
|
|
|
|
for(j = 0; j < 16; j++)
|
|
{
|
|
i4_sad_horz += ABS(val - pu1_src_temp[j]);
|
|
}
|
|
|
|
pu1_src_temp += src_strd;
|
|
}
|
|
u4_dcval += 8;
|
|
}
|
|
|
|
pu1_src_temp = pu1_src;
|
|
/* top available */
|
|
if(top)
|
|
{
|
|
i4_sad_vert = 0;
|
|
|
|
for(i = 0; i < 16; i++)
|
|
{
|
|
u4_dcval += pu1_ngbr_pels_i16[17 + i];
|
|
|
|
for(j = 0; j < 16; j++)
|
|
{
|
|
i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]);
|
|
}
|
|
pu1_src_temp += src_strd;
|
|
}
|
|
u4_dcval += 8;
|
|
}
|
|
|
|
u4_dcval = (u4_dcval) >> (3 + left + top);
|
|
|
|
pu1_src_temp = pu1_src;
|
|
|
|
/* none available */
|
|
u4_dcval += (left == 0) * (top == 0) * 128;
|
|
|
|
i4_sad_dc = 0;
|
|
|
|
for(i = 0; i < 16; i++)
|
|
{
|
|
for(j = 0; j < 16; j++)
|
|
{
|
|
i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]);
|
|
}
|
|
pu1_src_temp += src_strd;
|
|
}
|
|
|
|
if((u4_valid_intra_modes & 04) == 0) /* If DC is disabled */
|
|
i4_sad_dc = INT_MAX;
|
|
|
|
if((u4_valid_intra_modes & 01) == 0) /* If VERT is disabled */
|
|
i4_sad_vert = INT_MAX;
|
|
|
|
if((u4_valid_intra_modes & 02) == 0) /* If HORZ is disabled */
|
|
i4_sad_horz = INT_MAX;
|
|
|
|
i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
|
|
|
|
/* Finding Minimum sad and doing corresponding prediction */
|
|
if(i4_min_sad < *pu4_sadmin)
|
|
{
|
|
*pu4_sadmin = i4_min_sad;
|
|
if(i4_min_sad == i4_sad_vert)
|
|
{
|
|
*u4_intra_mode = VERT_I16x16;
|
|
pu1_neighbour = pu1_ngbr_pels_i16 + 17;
|
|
for(j = 0; j < 16; j++)
|
|
{
|
|
memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
|
|
pu1_dst += dst_strd;
|
|
}
|
|
}
|
|
else if(i4_min_sad == i4_sad_horz)
|
|
{
|
|
*u4_intra_mode = HORZ_I16x16;
|
|
for(j = 0; j < 16; j++)
|
|
{
|
|
val = pu1_ngbr_pels_i16[15 - j];
|
|
memset(pu1_dst, val, MB_SIZE);
|
|
pu1_dst += dst_strd;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*u4_intra_mode = DC_I16x16;
|
|
for(j = 0; j < 16; j++)
|
|
{
|
|
memset(pu1_dst, u4_dcval, MB_SIZE);
|
|
pu1_dst += dst_strd;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
******************************************************************************
|
|
*
|
|
* @brief
|
|
* Evaluate best intra 4x4 mode and perform prediction.
|
|
*
|
|
* @par Description
|
|
* This function evaluates 4x4 modes and compute corresponding sad
|
|
* and return the buffer predicted with best mode.
|
|
*
|
|
* @param[in] pu1_src
|
|
* UWORD8 pointer to the source
|
|
*
|
|
* @param[in] pu1_ngbr_pels
|
|
* UWORD8 pointer to neighbouring pels
|
|
*
|
|
* @param[out] pu1_dst
|
|
* UWORD8 pointer to the destination
|
|
*
|
|
* @param[in] src_strd
|
|
* integer source stride
|
|
*
|
|
* @param[in] dst_strd
|
|
* integer destination stride
|
|
*
|
|
* @param[in] u4_n_avblty
|
|
* availability of neighbouring pixels
|
|
*
|
|
* @param[in] u4_intra_mode
|
|
* Pointer to the variable in which best mode is returned
|
|
*
|
|
* @param[in] pu4_sadmin
|
|
* Pointer to the variable in which minimum cost is returned
|
|
*
|
|
* @param[in] u4_valid_intra_modes
|
|
* Says what all modes are valid
|
|
*
|
|
* @param[in] u4_lambda
|
|
* Lamda value for computing cost from SAD
|
|
*
|
|
* @param[in] u4_predictd_mode
|
|
* Predicted mode for cost computation
|
|
*
|
|
* @returns none
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
void isvce_evaluate_intra_4x4_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst,
|
|
UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty,
|
|
UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin,
|
|
UWORD32 u4_valid_intra_modes, UWORD32 u4_lambda,
|
|
UWORD32 u4_predictd_mode)
|
|
{
|
|
UWORD8 *pu1_src_temp = pu1_src;
|
|
UWORD8 *pu1_pred = pu1_ngbr_pels;
|
|
UWORD8 left = 0, top = 0;
|
|
UWORD8 u1_pred_val = 0;
|
|
UWORD8 u1_pred_vals[4] = {0};
|
|
UWORD8 *pu1_pred_val = NULL;
|
|
/* To store FILT121 operated values*/
|
|
UWORD8 u1_pred_vals_diag_121[15] = {0};
|
|
/* To store FILT11 operated values*/
|
|
UWORD8 u1_pred_vals_diag_11[15] = {0};
|
|
UWORD8 u1_pred_vals_vert_r[8] = {0};
|
|
UWORD8 u1_pred_vals_horz_d[10] = {0};
|
|
UWORD8 u1_pred_vals_horz_u[10] = {0};
|
|
WORD32 u4_dcval = 0;
|
|
WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
|
|
INT_MAX, INT_MAX, INT_MAX, INT_MAX};
|
|
|
|
WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
|
|
INT_MAX, INT_MAX, INT_MAX, INT_MAX};
|
|
WORD32 i, i4_min_cost = INT_MAX;
|
|
|
|
left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
|
|
top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
|
|
|
|
/* Computing SAD */
|
|
|
|
/* VERT mode valid */
|
|
if(u4_valid_intra_modes & 1)
|
|
{
|
|
pu1_pred = pu1_ngbr_pels + 5;
|
|
i4_sad[VERT_I4x4] = 0;
|
|
i4_cost[VERT_I4x4] = 0;
|
|
|
|
USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
|
|
|
|
i4_cost[VERT_I4x4] =
|
|
i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? u4_lambda : 4 * u4_lambda);
|
|
}
|
|
|
|
/* HORZ mode valid */
|
|
if(u4_valid_intra_modes & 2)
|
|
{
|
|
i4_sad[HORZ_I4x4] = 0;
|
|
i4_cost[HORZ_I4x4] = 0;
|
|
pu1_src_temp = pu1_src;
|
|
|
|
u1_pred_val = pu1_ngbr_pels[3];
|
|
|
|
i4_sad[HORZ_I4x4] +=
|
|
ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
|
|
ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
|
|
pu1_src_temp += src_strd;
|
|
|
|
u1_pred_val = pu1_ngbr_pels[2];
|
|
|
|
i4_sad[HORZ_I4x4] +=
|
|
ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
|
|
ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
|
|
pu1_src_temp += src_strd;
|
|
|
|
u1_pred_val = pu1_ngbr_pels[1];
|
|
|
|
i4_sad[HORZ_I4x4] +=
|
|
ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
|
|
ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
|
|
pu1_src_temp += src_strd;
|
|
|
|
u1_pred_val = pu1_ngbr_pels[0];
|
|
|
|
i4_sad[HORZ_I4x4] +=
|
|
ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
|
|
ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
|
|
|
|
i4_cost[HORZ_I4x4] =
|
|
i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? u4_lambda : 4 * u4_lambda);
|
|
}
|
|
|
|
/* DC mode valid */
|
|
if(u4_valid_intra_modes & 4)
|
|
{
|
|
i4_sad[DC_I4x4] = 0;
|
|
i4_cost[DC_I4x4] = 0;
|
|
pu1_src_temp = pu1_src;
|
|
|
|
if(left)
|
|
u4_dcval =
|
|
pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2] + pu1_ngbr_pels[3] + 2;
|
|
if(top)
|
|
u4_dcval +=
|
|
pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7] + pu1_ngbr_pels[8] + 2;
|
|
|
|
u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128;
|
|
|
|
/* none available */
|
|
memset(u1_pred_vals, u4_dcval, 4);
|
|
USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
|
|
i4_cost[DC_I4x4] =
|
|
i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? u4_lambda : 4 * u4_lambda);
|
|
}
|
|
|
|
/* if modes other than VERT, HORZ and DC are valid */
|
|
if(u4_valid_intra_modes > 7)
|
|
{
|
|
pu1_pred = pu1_ngbr_pels;
|
|
pu1_pred[13] = pu1_pred[14] = pu1_pred[12];
|
|
|
|
/* Performing FILT121 and FILT11 operation for all neighbour values*/
|
|
for(i = 0; i < 13; i++)
|
|
{
|
|
u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]);
|
|
u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]);
|
|
|
|
pu1_pred++;
|
|
}
|
|
|
|
if(u4_valid_intra_modes & 8) /* DIAG_DL */
|
|
{
|
|
i4_sad[DIAG_DL_I4x4] = 0;
|
|
i4_cost[DIAG_DL_I4x4] = 0;
|
|
pu1_src_temp = pu1_src;
|
|
pu1_pred_val = u1_pred_vals_diag_121 + 5;
|
|
|
|
USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
i4_cost[DIAG_DL_I4x4] =
|
|
i4_sad[DIAG_DL_I4x4] +
|
|
((u4_predictd_mode == DIAG_DL_I4x4) ? u4_lambda : 4 * u4_lambda);
|
|
}
|
|
|
|
if(u4_valid_intra_modes & 16) /* DIAG_DR */
|
|
{
|
|
i4_sad[DIAG_DR_I4x4] = 0;
|
|
i4_cost[DIAG_DR_I4x4] = 0;
|
|
pu1_src_temp = pu1_src;
|
|
pu1_pred_val = u1_pred_vals_diag_121 + 3;
|
|
|
|
USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
i4_cost[DIAG_DR_I4x4] =
|
|
i4_sad[DIAG_DR_I4x4] +
|
|
((u4_predictd_mode == DIAG_DR_I4x4) ? u4_lambda : 4 * u4_lambda);
|
|
}
|
|
|
|
if(u4_valid_intra_modes & 32) /* VERT_R mode valid ????*/
|
|
{
|
|
i4_sad[VERT_R_I4x4] = 0;
|
|
|
|
pu1_src_temp = pu1_src;
|
|
u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2];
|
|
memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3);
|
|
u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1];
|
|
memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3);
|
|
|
|
pu1_pred_val = u1_pred_vals_diag_11 + 4;
|
|
USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
|
|
pu1_pred_val = u1_pred_vals_diag_121 + 3;
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4), i4_sad[VERT_R_I4x4]);
|
|
|
|
i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] +
|
|
((u4_predictd_mode == VERT_R_I4x4) ? u4_lambda : 4 * u4_lambda);
|
|
}
|
|
|
|
if(u4_valid_intra_modes & 64) /* HORZ_D mode valid ????*/
|
|
{
|
|
i4_sad[HORZ_D_I4x4] = 0;
|
|
|
|
pu1_src_temp = pu1_src;
|
|
u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3];
|
|
memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3);
|
|
u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0];
|
|
u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0];
|
|
u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1];
|
|
u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1];
|
|
u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2];
|
|
u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2];
|
|
|
|
pu1_pred_val = u1_pred_vals_horz_d;
|
|
USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]);
|
|
|
|
i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] +
|
|
((u4_predictd_mode == HORZ_D_I4x4) ? u4_lambda : 4 * u4_lambda);
|
|
}
|
|
|
|
if(u4_valid_intra_modes & 128) /* VERT_L mode valid ????*/
|
|
{
|
|
i4_sad[VERT_L_I4x4] = 0;
|
|
pu1_src_temp = pu1_src;
|
|
pu1_pred_val = u1_pred_vals_diag_11 + 5;
|
|
USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
pu1_pred_val = u1_pred_vals_diag_121 + 5;
|
|
USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
pu1_pred_val = u1_pred_vals_diag_11 + 6;
|
|
USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
pu1_pred_val = u1_pred_vals_diag_121 + 6;
|
|
USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
|
|
|
|
i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] +
|
|
((u4_predictd_mode == VERT_L_I4x4) ? u4_lambda : 4 * u4_lambda);
|
|
}
|
|
|
|
if(u4_valid_intra_modes & 256) /* HORZ_U mode valid ????*/
|
|
{
|
|
i4_sad[HORZ_U_I4x4] = 0;
|
|
pu1_src_temp = pu1_src;
|
|
u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2];
|
|
u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1];
|
|
u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1];
|
|
u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0];
|
|
u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0];
|
|
u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]);
|
|
|
|
memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4);
|
|
|
|
pu1_pred_val = u1_pred_vals_horz_u;
|
|
USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]);
|
|
pu1_src_temp += src_strd;
|
|
USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]);
|
|
|
|
i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] +
|
|
((u4_predictd_mode == HORZ_U_I4x4) ? u4_lambda : 4 * u4_lambda);
|
|
}
|
|
|
|
i4_min_cost =
|
|
MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]), MIN3(i4_cost[3], i4_cost[4], i4_cost[5]),
|
|
MIN3(i4_cost[6], i4_cost[7], i4_cost[8]));
|
|
}
|
|
else
|
|
{
|
|
/* Only first three modes valid */
|
|
i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]);
|
|
}
|
|
|
|
*pu4_sadmin = i4_min_cost;
|
|
|
|
if(i4_min_cost == i4_cost[0])
|
|
{
|
|
*u4_intra_mode = VERT_I4x4;
|
|
pu1_pred_val = pu1_ngbr_pels + 5;
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
}
|
|
else if(i4_min_cost == i4_cost[1])
|
|
{
|
|
*u4_intra_mode = HORZ_I4x4;
|
|
memset(pu1_dst, pu1_ngbr_pels[3], 4);
|
|
pu1_dst += dst_strd;
|
|
memset(pu1_dst, pu1_ngbr_pels[2], 4);
|
|
pu1_dst += dst_strd;
|
|
memset(pu1_dst, pu1_ngbr_pels[1], 4);
|
|
pu1_dst += dst_strd;
|
|
memset(pu1_dst, pu1_ngbr_pels[0], 4);
|
|
}
|
|
else if(i4_min_cost == i4_cost[2])
|
|
{
|
|
*u4_intra_mode = DC_I4x4;
|
|
memset(pu1_dst, u4_dcval, 4);
|
|
pu1_dst += dst_strd;
|
|
memset(pu1_dst, u4_dcval, 4);
|
|
pu1_dst += dst_strd;
|
|
memset(pu1_dst, u4_dcval, 4);
|
|
pu1_dst += dst_strd;
|
|
memset(pu1_dst, u4_dcval, 4);
|
|
}
|
|
|
|
else if(i4_min_cost == i4_cost[3])
|
|
{
|
|
*u4_intra_mode = DIAG_DL_I4x4;
|
|
pu1_pred_val = u1_pred_vals_diag_121 + 5;
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val + 1), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val + 2), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val + 3), 4);
|
|
}
|
|
else if(i4_min_cost == i4_cost[4])
|
|
{
|
|
*u4_intra_mode = DIAG_DR_I4x4;
|
|
pu1_pred_val = u1_pred_vals_diag_121 + 3;
|
|
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val - 1), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val - 2), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val - 3), 4);
|
|
}
|
|
|
|
else if(i4_min_cost == i4_cost[5])
|
|
{
|
|
*u4_intra_mode = VERT_R_I4x4;
|
|
pu1_pred_val = u1_pred_vals_diag_11 + 4;
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
pu1_dst += dst_strd;
|
|
pu1_pred_val = u1_pred_vals_diag_121 + 3;
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (u1_pred_vals_vert_r), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4);
|
|
}
|
|
else if(i4_min_cost == i4_cost[6])
|
|
{
|
|
*u4_intra_mode = HORZ_D_I4x4;
|
|
pu1_pred_val = u1_pred_vals_horz_d;
|
|
memcpy(pu1_dst, (pu1_pred_val + 6), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val + 4), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val + 2), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
pu1_dst += dst_strd;
|
|
}
|
|
else if(i4_min_cost == i4_cost[7])
|
|
{
|
|
*u4_intra_mode = VERT_L_I4x4;
|
|
pu1_pred_val = u1_pred_vals_diag_11 + 5;
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
pu1_dst += dst_strd;
|
|
pu1_pred_val = u1_pred_vals_diag_121 + 5;
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
pu1_dst += dst_strd;
|
|
pu1_pred_val = u1_pred_vals_diag_11 + 6;
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
pu1_dst += dst_strd;
|
|
pu1_pred_val = u1_pred_vals_diag_121 + 6;
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
}
|
|
else if(i4_min_cost == i4_cost[8])
|
|
{
|
|
*u4_intra_mode = HORZ_U_I4x4;
|
|
pu1_pred_val = u1_pred_vals_horz_u;
|
|
memcpy(pu1_dst, (pu1_pred_val), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val + 2), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val + 4), 4);
|
|
pu1_dst += dst_strd;
|
|
memcpy(pu1_dst, (pu1_pred_val + 6), 4);
|
|
pu1_dst += dst_strd;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/**
|
|
******************************************************************************
|
|
*
|
|
* @brief:
|
|
* Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the
|
|
*prediction.
|
|
*
|
|
* @par Description
|
|
* This function evaluates first three intra chroma modes and compute
|
|
*corresponding sad and return the buffer predicted with best mode.
|
|
*
|
|
* @param[in] pu1_src
|
|
* UWORD8 pointer to the source
|
|
*
|
|
* @param[in] pu1_ngbr_pels
|
|
* UWORD8 pointer to neighbouring pels
|
|
*
|
|
* @param[out] pu1_dst
|
|
* UWORD8 pointer to the destination
|
|
*
|
|
* @param[in] src_strd
|
|
* integer source stride
|
|
*
|
|
* @param[in] dst_strd
|
|
* integer destination stride
|
|
*
|
|
* @param[in] u4_n_avblty
|
|
* availability of neighbouring pixels
|
|
*
|
|
* @param[in] u4_intra_mode
|
|
* Pointer to the variable in which best mode is returned
|
|
*
|
|
* @param[in] pu4_sadmin
|
|
* Pointer to the variable in which minimum sad is returned
|
|
*
|
|
* @param[in] u4_valid_intra_modes
|
|
* Says what all modes are valid
|
|
*
|
|
* @return none
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
void isvce_evaluate_intra_chroma_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst,
|
|
UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty,
|
|
UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin,
|
|
UWORD32 u4_valid_intra_modes)
|
|
{
|
|
UWORD8 *pu1_neighbour;
|
|
UWORD8 *pu1_src_temp = pu1_src;
|
|
UWORD8 left = 0, top = 0;
|
|
WORD32 u4_dcval_u_l[2] = {0, 0}, /*sum left neighbours for 'U' ,two separate sets - sum of
|
|
first four from top,and sum of four values from bottom */
|
|
u4_dcval_u_t[2] = {0, 0}; /*sum top neighbours for 'U'*/
|
|
|
|
WORD32 u4_dcval_v_l[2] = {0, 0}, /*sum left neighbours for 'V'*/
|
|
u4_dcval_v_t[2] = {0, 0}; /*sum top neighbours for 'V'*/
|
|
|
|
WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX,
|
|
i4_min_sad = INT_MAX;
|
|
UWORD8 val_u, val_v;
|
|
|
|
WORD32 u4_dc_val[2][2][2]; /* -----------
|
|
| | | Chroma can have four
|
|
| 00 | 01 | separate dc value...
|
|
----------- u4_dc_val corresponds to this dc
|
|
values | | | with u4_dc_val[2][2][U] and
|
|
u4_dc_val[2][2][V] | 10 | 11 |
|
|
----------- */
|
|
left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
|
|
top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
|
|
|
|
/*Evaluating HORZ*/
|
|
if(left) /* Ifleft available*/
|
|
{
|
|
i4_sad_horz = 0;
|
|
|
|
for(i = 0; i < 8; i++)
|
|
{
|
|
val_v = pu1_ngbr_pels[15 - 2 * i];
|
|
val_u = pu1_ngbr_pels[15 - 2 * i - 1];
|
|
row = i / 4;
|
|
u4_dcval_u_l[row] += val_u;
|
|
u4_dcval_v_l[row] += val_v;
|
|
for(j = 0; j < 8; j++)
|
|
{
|
|
i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]); /* Finding SAD for HORZ mode*/
|
|
i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]);
|
|
}
|
|
|
|
pu1_src_temp += src_strd;
|
|
}
|
|
u4_dcval_u_l[0] += 2;
|
|
u4_dcval_u_l[1] += 2;
|
|
u4_dcval_v_l[0] += 2;
|
|
u4_dcval_v_l[1] += 2;
|
|
}
|
|
|
|
/*Evaluating VERT**/
|
|
pu1_src_temp = pu1_src;
|
|
if(top) /* top available*/
|
|
{
|
|
i4_sad_vert = 0;
|
|
|
|
for(i = 0; i < 8; i++)
|
|
{
|
|
col = i / 4;
|
|
|
|
val_u = pu1_ngbr_pels[18 + i * 2];
|
|
val_v = pu1_ngbr_pels[18 + i * 2 + 1];
|
|
u4_dcval_u_t[col] += val_u;
|
|
u4_dcval_v_t[col] += val_v;
|
|
|
|
for(j = 0; j < 16; j++)
|
|
{
|
|
i4_sad_vert +=
|
|
ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]); /* Finding SAD for VERT mode*/
|
|
}
|
|
pu1_src_temp += src_strd;
|
|
}
|
|
u4_dcval_u_t[0] += 2;
|
|
u4_dcval_u_t[1] += 2;
|
|
u4_dcval_v_t[0] += 2;
|
|
u4_dcval_v_t[1] += 2;
|
|
}
|
|
|
|
/* computing DC value*/
|
|
/* Equation 8-128 in spec*/
|
|
u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top);
|
|
u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top);
|
|
u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top);
|
|
u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top);
|
|
|
|
if(top)
|
|
{
|
|
/* Equation 8-132 in spec*/
|
|
u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top);
|
|
u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top);
|
|
}
|
|
else
|
|
{
|
|
u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left);
|
|
u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left);
|
|
}
|
|
|
|
if(left)
|
|
{
|
|
u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left);
|
|
u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left);
|
|
}
|
|
else
|
|
{
|
|
u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top);
|
|
u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top);
|
|
}
|
|
|
|
if(!(left || top))
|
|
{
|
|
/*none available*/
|
|
u4_dc_val[0][0][0] = u4_dc_val[0][0][1] = u4_dc_val[0][1][0] = u4_dc_val[0][1][1] =
|
|
u4_dc_val[1][0][0] = u4_dc_val[1][0][1] = u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128;
|
|
}
|
|
|
|
/* Evaluating DC */
|
|
pu1_src_temp = pu1_src;
|
|
i4_sad_dc = 0;
|
|
for(i = 0; i < 8; i++)
|
|
{
|
|
for(j = 0; j < 8; j++)
|
|
{
|
|
col = j / 4;
|
|
row = i / 4;
|
|
val_u = u4_dc_val[row][col][0];
|
|
val_v = u4_dc_val[row][col][1];
|
|
|
|
i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]); /* Finding SAD for DC mode*/
|
|
i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]);
|
|
}
|
|
pu1_src_temp += src_strd;
|
|
}
|
|
|
|
if((u4_valid_intra_modes & 01) == 0) /* If DC is disabled*/
|
|
i4_sad_dc = INT_MAX;
|
|
if((u4_valid_intra_modes & 02) == 0) /* If HORZ is disabled*/
|
|
i4_sad_horz = INT_MAX;
|
|
if((u4_valid_intra_modes & 04) == 0) /* If VERT is disabled*/
|
|
i4_sad_vert = INT_MAX;
|
|
|
|
i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
|
|
|
|
/* Finding Minimum sad and doing corresponding prediction*/
|
|
if(i4_min_sad < *pu4_sadmin)
|
|
{
|
|
*pu4_sadmin = i4_min_sad;
|
|
|
|
if(i4_min_sad == i4_sad_dc)
|
|
{
|
|
*u4_intra_mode = DC_CH_I8x8;
|
|
for(i = 0; i < 8; i++)
|
|
{
|
|
for(j = 0; j < 8; j++)
|
|
{
|
|
col = j / 4;
|
|
row = i / 4;
|
|
|
|
pu1_dst[2 * j] = u4_dc_val[row][col][0];
|
|
pu1_dst[2 * j + 1] = u4_dc_val[row][col][1];
|
|
}
|
|
pu1_dst += dst_strd;
|
|
}
|
|
}
|
|
else if(i4_min_sad == i4_sad_horz)
|
|
{
|
|
*u4_intra_mode = HORZ_CH_I8x8;
|
|
for(j = 0; j < 8; j++)
|
|
{
|
|
val_v = pu1_ngbr_pels[15 - 2 * j];
|
|
val_u = pu1_ngbr_pels[15 - 2 * j - 1];
|
|
|
|
for(i = 0; i < 8; i++)
|
|
{
|
|
pu1_dst[2 * i] = val_u;
|
|
pu1_dst[2 * i + 1] = val_v;
|
|
}
|
|
pu1_dst += dst_strd;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*u4_intra_mode = VERT_CH_I8x8;
|
|
pu1_neighbour = pu1_ngbr_pels + 18;
|
|
for(j = 0; j < 8; j++)
|
|
{
|
|
memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
|
|
pu1_dst += dst_strd;
|
|
}
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|