unplugged-system/external/libavc/encoder/svc/isvce_residual_pred.c

1951 lines
73 KiB
C

/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_residual_pred.c
*
* @brief
* Contains functions used for SVC residual prediction
*
*******************************************************************************
*/
#include <stdint.h>
#include <math.h>
#include "ih264_typedefs.h"
#include "iv2.h"
#include "isvc_macros.h"
#include "ih264_debug.h"
#include "isvc_defs.h"
#include "isvc_structs.h"
#include "isvce_defs.h"
#include "isvce_structs.h"
#include "isvce_res_pred_private_defs.h"
#include "isvce_residual_pred.h"
#include "isvce_utils.h"
#include "isvc_defs.h"
void isvce_chroma_residual_sampler_2x(coordinates_t *ps_ref_array_positions,
coordinates_t *ps_ref_array_phases,
buffer_container_t *ps_inp, buffer_container_t *ps_out,
buffer_container_t *ps_scratch, UWORD32 u4_ref_nnz,
UWORD8 u1_ref_tx_size)
{
WORD32 i4_i;
WORD16 *pi2_ref_data_byte;
WORD32 *pi4_ref_array;
WORD32 i4_phase1, i4_phase2;
WORD16 *pi2_inp_data = ps_inp->pv_data;
WORD16 *pi2_out_res = ps_out->pv_data;
WORD32 i4_inp_data_stride = ps_inp->i4_data_stride;
WORD32 i4_out_res_stride = ps_out->i4_data_stride;
UNUSED(u4_ref_nnz);
UNUSED(ps_ref_array_positions);
UNUSED(u1_ref_tx_size);
/* For 2x scaling, offsets always point to TL pixel outside MB */
/* Hence, refTransBlkIdc will be different and since phase */
/* for first refArray pos for horiz filtering samples > 8, */
/* first row and first column from the refArray is never used */
pi2_inp_data += 2 + i4_inp_data_stride;
pi2_ref_data_byte = pi2_inp_data;
i4_phase1 = ps_ref_array_phases[0].i4_abscissa;
i4_phase2 = ps_ref_array_phases[1].i4_abscissa;
ASSERT(i4_phase1 >= 8);
pi4_ref_array = (WORD32 *) ps_scratch->pv_data;
for(i4_i = 0; i4_i < BLK_SIZE; i4_i++)
{
WORD16 i2_coeff1, i2_coeff2;
i2_coeff1 = (WORD16) (pi2_ref_data_byte[0]);
/* populate the first inter sample */
*pi4_ref_array++ = i2_coeff1 << 4;
{
/* unroll count 1 */
i2_coeff2 = (WORD16) (pi2_ref_data_byte[2]);
/* populate 2 samples based on current coeffs */
*pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff1 + i4_phase2 * i2_coeff2);
/* unroll count 2 */
*pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff1 + i4_phase1 * i2_coeff2);
/* unroll count 3 */
i2_coeff1 = (WORD16) (pi2_ref_data_byte[4]);
/* populate 2 samples based on current coeffs */
*pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff2 + i4_phase2 * i2_coeff1);
/* unroll count 4 */
*pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff2 + i4_phase1 * i2_coeff1);
/* unroll count 5 */
i2_coeff2 = (WORD16) (pi2_ref_data_byte[6]);
/* populate 2 samples based on current coeffs */
*pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff1 + i4_phase2 * i2_coeff2);
/* unroll count 6 */
*pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff1 + i4_phase1 * i2_coeff2);
}
/* populate the last inter sample */
*pi4_ref_array++ = i2_coeff2 << 4;
/* vertical loop uopdates */
pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride);
}
/* ----------- Vertical Interpolation ---------------- */
pi4_ref_array = (WORD32 *) ps_scratch->pv_data;
i4_phase1 = ps_ref_array_phases[0].i4_ordinate;
i4_phase2 = ps_ref_array_phases[2].i4_ordinate;
for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i++)
{
WORD16 *pi2_out;
WORD32 *pi4_ref_array_temp;
WORD32 i4_horz_samp_1, i4_horz_samp_2;
pi2_out = pi2_out_res;
pi4_ref_array_temp = pi4_ref_array;
/* populate the first inter sample */
i4_horz_samp_1 = *pi4_ref_array_temp;
pi4_ref_array_temp += BLK8x8SIZE;
*pi2_out = (i4_horz_samp_1 + 8) >> 4;
pi2_out += i4_out_res_stride;
{
/* unroll count 1 */
i4_horz_samp_2 = *pi4_ref_array_temp;
pi4_ref_array_temp += BLK8x8SIZE;
/* populate 2 samples based on current coeffs */
*pi2_out = ((16 - i4_phase2) * i4_horz_samp_1 + i4_phase2 * i4_horz_samp_2 + 128) >> 8;
pi2_out += i4_out_res_stride;
/* unroll count 2 */
*pi2_out = ((16 - i4_phase1) * i4_horz_samp_1 + i4_phase1 * i4_horz_samp_2 + 128) >> 8;
pi2_out += i4_out_res_stride;
/* unroll count 3 */
i4_horz_samp_1 = *pi4_ref_array_temp;
pi4_ref_array_temp += BLK8x8SIZE;
/* populate 2 samples based on current coeffs */
*pi2_out = ((16 - i4_phase2) * i4_horz_samp_2 + i4_phase2 * i4_horz_samp_1 + 128) >> 8;
pi2_out += i4_out_res_stride;
/* unroll count 4 */
*pi2_out = ((16 - i4_phase1) * i4_horz_samp_2 + i4_phase1 * i4_horz_samp_1 + 128) >> 8;
pi2_out += i4_out_res_stride;
/* unroll count 5 */
i4_horz_samp_2 = *pi4_ref_array_temp;
/* populate 2 samples based on current coeffs */
*pi2_out = ((16 - i4_phase2) * i4_horz_samp_1 + i4_phase2 * i4_horz_samp_2 + 128) >> 8;
pi2_out += i4_out_res_stride;
/* unroll count 6 */
*pi2_out = ((16 - i4_phase1) * i4_horz_samp_1 + i4_phase1 * i4_horz_samp_2 + 128) >> 8;
pi2_out += i4_out_res_stride;
}
/* populate the last inter sample */
*pi2_out = (i4_horz_samp_2 + 8) >> 4;
/* horizontal loop updates */
pi4_ref_array++;
pi2_out_res += 2;
}
}
void isvce_luma_residual_sampler_2x(coordinates_t *ps_ref_array_positions,
coordinates_t *ps_ref_array_phases, buffer_container_t *ps_inp,
buffer_container_t *ps_out, buffer_container_t *ps_scratch,
UWORD32 u4_ref_nnz, UWORD8 u1_ref_tx_size)
{
WORD16 *pi2_inp_data = ps_inp->pv_data;
WORD16 *pi2_out_res = ps_out->pv_data;
WORD32 i4_inp_data_stride = ps_inp->i4_data_stride;
WORD32 i4_out_res_stride = ps_out->i4_data_stride;
WORD16 *pi2_refarray_buffer = ps_scratch->pv_data;
WORD32 i4_blk_ctr;
UNUSED(ps_ref_array_positions);
UNUSED(ps_ref_array_phases);
/* For 2x scaling, offsets always point to TL pixel outside MB */
/* Hence, refTransBlkIdc will be different and since phase */
/* for first refArray pos for horiz filtering samples > 8, */
/* first row and first column from the refArray is never used */
pi2_inp_data += 1 + i4_inp_data_stride;
if((u1_ref_tx_size) && (0 != u4_ref_nnz))
{
WORD16 *pi2_ref_data_byte;
WORD32 *pi4_ref_array;
WORD32 i4_i, i4_j;
pi2_ref_data_byte = pi2_inp_data;
/* ----------- Horizontal Interpolation ---------------- */
pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i++)
{
WORD16 i2_coeff1, i2_coeff2;
i2_coeff1 = (WORD16) (*pi2_ref_data_byte++);
/* populate the first inter sample */
*pi4_ref_array++ = i2_coeff1 << 2;
for(i4_j = 0; i4_j < 14; i4_j += 2)
{
i2_coeff2 = (WORD16) (*pi2_ref_data_byte++);
/* populate 2 samples based on current coeffs */
*pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2));
*pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1));
/* store the coeff 2 to coeff 1 */
/* (used in next iteration) */
i2_coeff1 = i2_coeff2;
}
/* populate the last inter sample */
*pi4_ref_array++ = i2_coeff1 << 2;
/* vertical loop uopdates */
pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride);
}
/* ----------- Vertical Interpolation ---------------- */
pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
for(i4_i = 0; i4_i < MB_SIZE; i4_i++)
{
WORD32 *pi4_ref_array_temp;
WORD16 *pi2_out;
WORD32 i4_horz_samp_1, i4_horz_samp_2;
pi4_ref_array_temp = pi4_ref_array;
pi2_out = pi2_out_res;
i4_horz_samp_1 = *pi4_ref_array_temp;
/* populate the first inter sample */
*pi2_out = (i4_horz_samp_1 + 2) >> 2;
pi2_out += i4_out_res_stride;
for(i4_j = 0; i4_j < 14; i4_j += 2)
{
pi4_ref_array_temp += MB_SIZE;
i4_horz_samp_2 = *pi4_ref_array_temp;
/* populate 2 samples based on current coeffs */
*pi2_out = ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4;
pi2_out += i4_out_res_stride;
*pi2_out = ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4;
pi2_out += i4_out_res_stride;
/* store the coeff 2 to coeff 1 */
/* (used in next iteration) */
i4_horz_samp_1 = i4_horz_samp_2;
}
/* populate the first inter sample */
*pi2_out = (i4_horz_samp_1 + 2) >> 2;
/* horizontal loop updates */
pi4_ref_array++;
pi2_out_res++;
}
}
else
{
/* ----------------------------------------------------------------- */
/* LOOP over number of blocks */
/* ----------------------------------------------------------------- */
for(i4_blk_ctr = 0; i4_blk_ctr < BLK_SIZE; i4_blk_ctr++)
{
WORD16 *pi2_ref_data_byte;
WORD32 *pi4_ref_array;
WORD32 i4_i;
/* if reference layer is not coded then no processing */
if(0 != (u4_ref_nnz & 0x1))
{
pi2_ref_data_byte = pi2_inp_data;
/* ----------- Horizontal Interpolation ---------------- */
pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
for(i4_i = 0; i4_i < BLK_SIZE; i4_i++)
{
WORD16 i2_coeff1, i2_coeff2;
i2_coeff1 = (WORD16) (*pi2_ref_data_byte++);
/* populate the first inter sample */
*pi4_ref_array++ = i2_coeff1 << 2;
{
i2_coeff2 = (WORD16) (*pi2_ref_data_byte++);
/* populate 2 samples based on current coeffs */
*pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2));
*pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1));
i2_coeff1 = (WORD16) (*pi2_ref_data_byte++);
/* populate 2 samples based on current coeffs */
*pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1));
*pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2));
i2_coeff2 = (WORD16) (*pi2_ref_data_byte++);
/* populate 2 samples based on current coeffs */
*pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2));
*pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1));
}
/* populate the last inter sample */
*pi4_ref_array++ = i2_coeff2 << 2;
/* vertical loop uopdates */
pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride);
}
/* ----------- Vertical Interpolation ---------------- */
pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i++)
{
WORD32 *pi4_ref_array_temp;
WORD16 *pi2_out;
WORD32 i4_horz_samp_1, i4_horz_samp_2;
pi4_ref_array_temp = pi4_ref_array;
pi2_out = pi2_out_res;
i4_horz_samp_1 = *pi4_ref_array_temp;
/* populate the first inter sample */
*pi2_out = (i4_horz_samp_1 + 2) >> 2;
pi2_out += i4_out_res_stride;
{
/* unroll loop count 1 */
pi4_ref_array_temp += BLK8x8SIZE;
i4_horz_samp_2 = *pi4_ref_array_temp;
/* populate 2 samples based on current coeffs */
*pi2_out =
((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4;
pi2_out += i4_out_res_stride;
*pi2_out =
((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4;
pi2_out += i4_out_res_stride;
/* unroll loop count 2 */
pi4_ref_array_temp += BLK8x8SIZE;
i4_horz_samp_1 = *pi4_ref_array_temp;
/* populate 2 samples based on current coeffs */
*pi2_out =
((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4;
pi2_out += i4_out_res_stride;
*pi2_out =
((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4;
pi2_out += i4_out_res_stride;
/* unroll loop count 3 */
pi4_ref_array_temp += BLK8x8SIZE;
i4_horz_samp_2 = *pi4_ref_array_temp;
/* populate 2 samples based on current coeffs */
*pi2_out =
((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4;
pi2_out += i4_out_res_stride;
*pi2_out =
((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4;
pi2_out += i4_out_res_stride;
}
/* populate the last inter sample */
*pi2_out = (i4_horz_samp_2 + 2) >> 2;
/* horizontal loop updates */
pi4_ref_array++;
pi2_out_res++;
}
}
else
{
pi2_out_res += BLK8x8SIZE;
}
if(1 == i4_blk_ctr)
{
pi2_inp_data -= BLK_SIZE;
pi2_inp_data += (i4_inp_data_stride * BLK_SIZE);
pi2_out_res -= MB_SIZE;
pi2_out_res += (i4_out_res_stride * BLK8x8SIZE);
u4_ref_nnz >>= 2;
}
else
{
pi2_inp_data += BLK_SIZE;
}
u4_ref_nnz >>= 1;
}
}
}
/**
*******************************************************************************
*
* @brief
* Returns size of buffers for storing residual pred ctxt
*
* @param[in] u1_num_spatial_layers
* Num Spatial Layers
*
* @param[in] d_spatial_res_ratio
* Resolution Ratio b/w spatial layers
*
* @param[in] u4_wd
* Input Width
*
* @param[in] u4_ht
* Input Height
*
* @returns Size of buffers
*
*******************************************************************************
*/
UWORD32 isvce_get_svc_res_pred_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio,
UWORD32 u4_wd, UWORD32 u4_ht)
{
UWORD32 u4_size = 0;
if(u1_num_spatial_layers > 1)
{
WORD32 i;
u4_size += MAX_PROCESS_CTXT * sizeof(svc_res_pred_ctxt_t);
u4_size += MAX_PROCESS_CTXT * sizeof(res_pred_state_t);
/* Mem for storing pred */
u4_size += MAX_PROCESS_CTXT * MB_SIZE * MB_SIZE * sizeof(WORD16);
u4_size += MAX_PROCESS_CTXT * MB_SIZE * (MB_SIZE / 2) * sizeof(WORD16);
/* Mem for storing intermediates */
u4_size += MAX_PROCESS_CTXT * REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(WORD16);
/* Mem for pu1_ref_x_ptr_incr and pu1_ref_y_ptr_incr*/
u4_size +=
2 * MAX_PROCESS_CTXT * REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(UWORD8);
u4_size += MAX_PROCESS_CTXT * u1_num_spatial_layers * sizeof(res_pred_layer_state_t);
for(i = u1_num_spatial_layers - 1; i >= 1; i--)
{
WORD32 i4_layer_luma_wd =
(WORD32) ((DOUBLE) u4_wd /
pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) +
0.99;
WORD32 i4_layer_luma_ht =
((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99;
WORD32 i4_layer_luma_mbs = (i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE);
WORD32 i4_layer_u_wd = i4_layer_luma_wd / 2.0 + 0.99;
WORD32 i4_layer_u_ht = i4_layer_luma_ht / 2.0 + 0.99;
WORD32 i4_layer_u_mbs =
(i4_layer_u_wd / (MB_SIZE / 2)) * (i4_layer_u_ht / (MB_SIZE / 2));
/* ps_luma_mb_states */
{
u4_size += i4_layer_luma_mbs * sizeof(res_pred_mb_state_t);
/* ps_ref_array_positions */
u4_size +=
((1.5 == d_spatial_res_ratio) ? (i4_layer_luma_mbs * MB_SIZE * MB_SIZE) : 0) *
sizeof(coordinates_t);
/* ps_ref_array_phases */
u4_size += ((1.5 == d_spatial_res_ratio) ? (i4_layer_luma_mbs * 5) : 0) *
sizeof(coordinates_t);
}
/* ps_chroma_mb_states */
{
u4_size += i4_layer_u_mbs * sizeof(res_pred_mb_state_t);
/* ps_ref_array_positions */
u4_size +=
((1.5 == d_spatial_res_ratio) ? (i4_layer_u_mbs * (MB_SIZE / 2) * (MB_SIZE / 2))
: 0) *
sizeof(coordinates_t);
/* ps_ref_array_phases */
u4_size += ((1.5 == d_spatial_res_ratio) ? (i4_layer_u_mbs * 5) : 3) *
sizeof(coordinates_t);
}
}
for(i = u1_num_spatial_layers - 1; i >= 0; i--)
{
WORD32 i4_layer_luma_wd =
(WORD32) ((DOUBLE) u4_wd /
pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) +
0.99;
WORD32 i4_layer_luma_ht =
((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99;
WORD32 i4_layer_luma_mbs =
((i4_layer_luma_wd / MB_SIZE) + 2) * ((i4_layer_luma_ht / MB_SIZE) + 2);
/* pi1_mb_mode */
u4_size += i4_layer_luma_mbs * sizeof(WORD8);
}
}
else
{
u4_size += MAX_PROCESS_CTXT * sizeof(yuv_buf_props_t);
/* Mem for storing pred */
u4_size += MAX_PROCESS_CTXT * MB_SIZE * MB_SIZE * sizeof(WORD16);
u4_size += MAX_PROCESS_CTXT * MB_SIZE * (MB_SIZE / 2) * sizeof(WORD16);
}
return u4_size;
}
static FORCEINLINE WORD32 isvce_get_scaled_pixel_pos(layer_resampler_props_t *ps_layer_props,
WORD32 i4_pixel_pos, UWORD8 u1_dim_id)
{
if(1 == u1_dim_id)
{
return (((i4_pixel_pos - ps_layer_props->i4_offset_y) *
((WORD64) ps_layer_props->u4_scale_y) +
ps_layer_props->i4_add_y) >>
(ps_layer_props->u4_shift_y - 4)) -
ps_layer_props->i4_delta_y;
}
else
{
return (((i4_pixel_pos - ps_layer_props->i4_offset_x) *
((WORD64) ps_layer_props->u4_scale_x) +
ps_layer_props->i4_add_x) >>
(ps_layer_props->u4_shift_x - 4)) -
ps_layer_props->i4_delta_x;
}
}
static FORCEINLINE void isvce_ref_array_pos_and_phase_init_dyadic(
layer_resampler_props_t *ps_layer_props, res_pred_mb_state_t *ps_mb_state,
coordinates_t *ps_mb_pos, UWORD8 u1_frame_mbs_only_flag, UWORD8 u1_field_mb_flag,
UWORD8 u1_ref_layer_frame_mbs_only_flag)
{
UWORD32 i, j;
coordinates_t *ps_ref_array_phases = ps_mb_state->ps_ref_array_phases;
WORD32 i4_x_offset = ps_mb_state->s_offsets.i4_abscissa;
WORD32 i4_y_offset = ps_mb_state->s_offsets.i4_ordinate;
for(i = 0; i < 2; i++)
{
WORD32 i4_y_ref16;
WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i;
if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag))
{
i4_yc = i4_yc >> (1 - u1_field_mb_flag);
}
i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1);
for(j = 0; j < ((0 == i) ? 2 : 1); j++)
{
WORD32 i4_x_ref16;
WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + j;
i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0);
ps_ref_array_phases[j + i * 2].i4_abscissa = (i4_x_ref16 - (16 * i4_x_offset)) & 15;
ps_ref_array_phases[j + i * 2].i4_ordinate = (i4_y_ref16 - (16 * i4_y_offset)) & 15;
}
}
}
static FORCEINLINE void isvce_ref_array_pos_and_phase_init(layer_resampler_props_t *ps_layer_props,
res_pred_mb_state_t *ps_mb_state,
coordinates_t *ps_mb_pos,
UWORD8 u1_frame_mbs_only_flag,
UWORD8 u1_field_mb_flag,
UWORD8 u1_ref_layer_frame_mbs_only_flag)
{
UWORD32 i, j;
coordinates_t *ps_ref_array_positions = ps_mb_state->ps_ref_array_positions;
coordinates_t *ps_ref_array_phases = ps_mb_state->ps_ref_array_phases;
WORD32 i4_x_offset = ps_mb_state->s_offsets.i4_abscissa;
WORD32 i4_y_offset = ps_mb_state->s_offsets.i4_ordinate;
UWORD32 u4_phase_array_idx = 0;
for(i = 0; i < ps_layer_props->u4_mb_ht; i++)
{
WORD32 i4_y_ref16;
WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i;
if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag))
{
i4_yc = i4_yc >> (1 - u1_field_mb_flag);
}
i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1);
for(j = 0; j < ps_layer_props->u4_mb_wd; j++)
{
WORD32 i4_x_ref16;
WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + j;
i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0);
ps_ref_array_positions[j + i * ps_layer_props->u4_mb_wd].i4_abscissa =
(i4_x_ref16 >> 4) - i4_x_offset;
ps_ref_array_positions[j + i * ps_layer_props->u4_mb_wd].i4_ordinate =
(i4_y_ref16 >> 4) - i4_y_offset;
if(((0 == i) && (j < 3)) || ((0 == j) && (i < 3)))
{
ps_ref_array_phases[u4_phase_array_idx].i4_abscissa =
(i4_x_ref16 - (16 * i4_x_offset)) & 15;
ps_ref_array_phases[u4_phase_array_idx].i4_ordinate =
(i4_y_ref16 - (16 * i4_y_offset)) & 15;
u4_phase_array_idx++;
}
}
}
}
static void isvce_res_pred_layer_state_init(res_pred_layer_state_t *ps_layer_state,
DOUBLE d_spatial_res_ratio, UWORD32 u4_wd,
UWORD32 u4_ht, IV_COLOR_FORMAT_T e_color_format)
{
UWORD32 i, j, k;
const UWORD8 u1_ref_layer_field_pic_flag = 0;
const UWORD8 u1_field_pic_flag = 0;
const UWORD8 u1_frame_mbs_only_flag = 1;
const UWORD8 u1_ref_layer_frame_mbs_only_flag = 1;
const UWORD8 u1_field_mb_flag = 0;
ASSERT((IV_YUV_420P == e_color_format) || (IV_YUV_420SP_UV == e_color_format));
UNUSED(e_color_format);
for(i = 0; i < 2; i++)
{
res_pred_mb_state_t *ps_mb_states;
layer_resampler_props_t *ps_layer_props;
UWORD32 u4_wd_in_mbs;
UWORD32 u4_ht_in_mbs;
UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i));
UWORD32 u4_ref_wd = (u4_wd / d_spatial_res_ratio);
UWORD32 u4_ref_ht = (u4_ht / d_spatial_res_ratio) * (1 + u1_ref_layer_field_pic_flag);
UWORD32 u4_scaled_wd = u4_wd;
UWORD32 u4_scaled_ht = u4_ht * (1 + u1_field_pic_flag);
ps_mb_states =
u1_is_chroma ? ps_layer_state->ps_chroma_mb_states : ps_layer_state->ps_luma_mb_states;
ps_layer_props =
u1_is_chroma ? ps_layer_state->ps_chroma_props : ps_layer_state->ps_luma_props;
u4_ref_wd = u4_ref_wd >> u1_is_chroma;
u4_ref_ht = u4_ref_ht >> u1_is_chroma;
u4_scaled_wd = u4_scaled_wd >> u1_is_chroma;
u4_scaled_ht = u4_scaled_ht >> u1_is_chroma;
u4_wd_in_mbs = u4_scaled_wd / ps_layer_props->u4_mb_wd;
u4_ht_in_mbs = u4_scaled_ht / ps_layer_props->u4_mb_ht;
for(j = 0; j < u4_ht_in_mbs; j++)
{
WORD32 i4_y_refmin16;
WORD32 i4_y_refmax16;
WORD32 i4_y_offset;
i4_y_refmin16 =
isvce_get_scaled_pixel_pos(ps_layer_props, j * ps_layer_props->u4_mb_ht, 1);
i4_y_refmax16 = isvce_get_scaled_pixel_pos(
ps_layer_props, j * ps_layer_props->u4_mb_ht + ps_layer_props->u4_mb_ht - 1, 1);
i4_y_offset = i4_y_refmin16 >> 4;
for(k = 0; k < u4_wd_in_mbs; k++)
{
WORD32 i4_x_refmin16;
WORD32 i4_x_refmax16;
WORD32 i4_x_offset;
coordinates_t s_mb_pos = {k, j};
i4_x_refmin16 =
isvce_get_scaled_pixel_pos(ps_layer_props, k * ps_layer_props->u4_mb_wd, 0);
i4_x_refmax16 = isvce_get_scaled_pixel_pos(
ps_layer_props, k * ps_layer_props->u4_mb_wd + ps_layer_props->u4_mb_wd - 1, 0);
i4_x_offset = i4_x_refmin16 >> 4;
ps_mb_states[k + j * u4_wd_in_mbs].s_offsets.i4_abscissa = i4_x_offset;
ps_mb_states[k + j * u4_wd_in_mbs].s_offsets.i4_ordinate = i4_y_offset;
ps_mb_states[k + j * u4_wd_in_mbs].s_ref_array_dims.i4_abscissa =
(i4_x_refmax16 >> 4) - i4_x_offset + 2;
ps_mb_states[k + j * u4_wd_in_mbs].s_ref_array_dims.i4_ordinate =
(i4_y_refmax16 >> 4) - i4_y_offset + 2;
if((0 == k) && (0 == j) && (2 == d_spatial_res_ratio) && u1_is_chroma)
{
isvce_ref_array_pos_and_phase_init_dyadic(
ps_layer_props, &ps_mb_states[k + j * u4_wd_in_mbs], &s_mb_pos,
u1_frame_mbs_only_flag, u1_field_mb_flag, u1_ref_layer_frame_mbs_only_flag);
}
else if(1.5 == d_spatial_res_ratio)
{
isvce_ref_array_pos_and_phase_init(
ps_layer_props, &ps_mb_states[k + j * u4_wd_in_mbs], &s_mb_pos,
u1_frame_mbs_only_flag, u1_field_mb_flag, u1_ref_layer_frame_mbs_only_flag);
}
}
}
}
}
void isvce_svc_residual_sampling_function_selector(res_pred_state_t *ps_res_pred_state,
DOUBLE d_spatial_res_ratio, IV_ARCH_T e_arch)
{
if(2. == d_spatial_res_ratio)
{
ps_res_pred_state->apf_residual_samplers[U] = isvce_chroma_residual_sampler_2x;
ps_res_pred_state->apf_residual_samplers[V] = isvce_chroma_residual_sampler_2x;
switch(e_arch)
{
#if defined(X86)
case ARCH_X86_SSE42:
{
ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x_sse42;
break;
}
#elif defined(ARMV8)
case ARCH_ARM_A53:
case ARCH_ARM_A57:
case ARCH_ARM_V8_NEON:
{
ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x_neon;
break;
}
#elif defined(ARM) && !defined(DISABLE_NEON)
case ARCH_ARM_A9Q:
case ARCH_ARM_A9A:
case ARCH_ARM_A9:
case ARCH_ARM_A7:
case ARCH_ARM_A5:
case ARCH_ARM_A15:
{
ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x_neon;
break;
}
#endif
default:
{
ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x;
break;
}
}
}
switch(e_arch)
{
#if defined(X86)
case ARCH_X86_SSE42:
{
ps_res_pred_state->pf_get_sad_with_residual_pred =
isvce_get_sad_with_residual_pred_sse42;
break;
}
#elif defined(ARMV8)
case ARCH_ARM_A53:
case ARCH_ARM_A57:
case ARCH_ARM_V8_NEON:
{
ps_res_pred_state->pf_get_sad_with_residual_pred =
isvce_get_sad_with_residual_pred_neon;
break;
}
#elif defined(ARM) && !defined(DISABLE_NEON)
case ARCH_ARM_A9Q:
case ARCH_ARM_A9A:
case ARCH_ARM_A9:
case ARCH_ARM_A7:
case ARCH_ARM_A5:
case ARCH_ARM_A15:
{
ps_res_pred_state->pf_get_sad_with_residual_pred =
isvce_get_sad_with_residual_pred_neon;
break;
}
#endif
default:
{
ps_res_pred_state->pf_get_sad_with_residual_pred = isvce_get_sad_with_residual_pred;
break;
}
}
}
/**
*******************************************************************************
*
* @brief
* Function to initialize svc ilp buffers
*
* @param[in] ps_codec
* Pointer to codec context
*
* @param[in] ps_mem_rec
* Pointer to memory allocated for input buffers
*
*******************************************************************************
*/
void isvce_svc_res_pred_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec)
{
WORD32 i, j, k;
const WORD32 i4_num_proc_ctxts = sizeof(ps_codec->as_process) / sizeof(ps_codec->as_process[0]);
UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers;
DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio;
UWORD32 u4_wd = ps_codec->s_cfg.u4_wd;
UWORD32 u4_ht = ps_codec->s_cfg.u4_ht;
UWORD8 *pu1_buf = ps_mem_rec->pv_base;
WORD64 i8_alloc_mem_size =
isvce_get_svc_res_pred_ctxt_size(u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht);
if(u1_num_spatial_layers > 1)
{
res_pred_mb_state_t *aps_luma_mb_states[MAX_NUM_SPATIAL_LAYERS];
res_pred_mb_state_t *aps_chroma_mb_states[MAX_NUM_SPATIAL_LAYERS];
WORD8 *api1_mb_mode[MAX_NUM_SPATIAL_LAYERS];
WORD32 ai4_mb_mode_stride[MAX_NUM_SPATIAL_LAYERS];
WORD32 i4_size;
for(i = 0; i < i4_num_proc_ctxts; i++)
{
res_pred_state_t *ps_res_pred_state;
svc_res_pred_ctxt_t *ps_res_pred_ctxt;
yuv_buf_props_t *ps_mb_res_buf;
res_pred_mem_store_t *ps_mem_store;
isvce_process_ctxt_t *ps_proc = ps_codec->as_process + i;
ps_res_pred_ctxt = ps_proc->ps_res_pred_ctxt = (svc_res_pred_ctxt_t *) pu1_buf;
pu1_buf += sizeof(svc_res_pred_ctxt_t);
i8_alloc_mem_size -= sizeof(svc_res_pred_ctxt_t);
ps_res_pred_ctxt->s_res_pred_constants.pv_state = pu1_buf;
ps_res_pred_state = (res_pred_state_t *) pu1_buf;
pu1_buf += sizeof(res_pred_state_t);
i8_alloc_mem_size -= sizeof(res_pred_state_t);
ps_res_pred_state->ps_layer_state = (res_pred_layer_state_t *) pu1_buf;
pu1_buf += u1_num_spatial_layers * sizeof(ps_res_pred_state->ps_layer_state[0]);
i8_alloc_mem_size -=
u1_num_spatial_layers * sizeof(ps_res_pred_state->ps_layer_state[0]);
i4_size = REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(UWORD8);
ps_res_pred_state->pu1_ref_x_ptr_incr = (UWORD8 *) pu1_buf;
pu1_buf += i4_size;
ps_res_pred_state->pu1_ref_y_ptr_incr = (UWORD8 *) pu1_buf;
pu1_buf += i4_size;
ASSERT(i8_alloc_mem_size >= 0);
if(0 == i)
{
UWORD32 au4_ref_pos_array_size[NUM_SP_COMPONENTS];
UWORD32 au4_ref_phase_array_size[NUM_SP_COMPONENTS];
if(1.5 == d_spatial_res_ratio)
{
au4_ref_pos_array_size[Y] = MB_SIZE * MB_SIZE;
au4_ref_phase_array_size[Y] = 5;
au4_ref_pos_array_size[U] = (MB_SIZE / 2) * (MB_SIZE / 2);
au4_ref_phase_array_size[U] = 5;
}
else
{
au4_ref_pos_array_size[Y] = au4_ref_pos_array_size[U] = 0;
au4_ref_phase_array_size[Y] = 0;
au4_ref_phase_array_size[U] = 3;
}
for(j = u1_num_spatial_layers - 1; j >= 1; j--)
{
res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j];
WORD32 i4_layer_luma_wd =
((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
0.99;
WORD32 i4_layer_luma_ht =
((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
0.99;
WORD32 i4_layer_luma_mbs =
(i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE);
WORD32 i4_layer_u_wd = i4_layer_luma_wd / 2.0 + 0.99;
WORD32 i4_layer_u_ht = i4_layer_luma_ht / 2.0 + 0.99;
WORD32 i4_layer_u_mbs =
(i4_layer_u_wd / (MB_SIZE / 2)) * (i4_layer_u_ht / (MB_SIZE / 2));
ps_layer->ps_luma_mb_states = (res_pred_mb_state_t *) pu1_buf;
aps_luma_mb_states[j] = ps_layer->ps_luma_mb_states;
pu1_buf += i4_layer_luma_mbs * sizeof(ps_layer->ps_luma_mb_states[0]);
i8_alloc_mem_size -=
u1_num_spatial_layers * sizeof(ps_layer->ps_luma_mb_states[0]);
ps_layer->ps_chroma_mb_states = (res_pred_mb_state_t *) pu1_buf;
aps_chroma_mb_states[j] = ps_layer->ps_chroma_mb_states;
pu1_buf += i4_layer_u_mbs * sizeof(ps_layer->ps_chroma_mb_states[0]);
i8_alloc_mem_size -= i4_layer_u_mbs * sizeof(ps_layer->ps_chroma_mb_states[0]);
if(1.5 == d_spatial_res_ratio)
{
coordinates_t *ps_ref_array_pos = (coordinates_t *) pu1_buf;
coordinates_t *ps_ref_array_phases =
ps_ref_array_pos + i4_layer_luma_mbs * au4_ref_pos_array_size[Y];
for(k = 0; k < i4_layer_luma_mbs; k++)
{
ps_layer->ps_luma_mb_states[k].ps_ref_array_positions =
ps_ref_array_pos + k * au4_ref_pos_array_size[Y];
ps_layer->ps_luma_mb_states[k].ps_ref_array_phases =
ps_ref_array_phases + k * au4_ref_phase_array_size[Y];
pu1_buf += au4_ref_pos_array_size[Y] * sizeof(ps_ref_array_pos[0]);
i8_alloc_mem_size -=
au4_ref_pos_array_size[Y] * sizeof(ps_ref_array_pos[0]);
pu1_buf += au4_ref_phase_array_size[Y] * sizeof(ps_ref_array_phases[0]);
i8_alloc_mem_size -=
au4_ref_phase_array_size[Y] * sizeof(ps_ref_array_phases[0]);
}
ps_ref_array_pos = (coordinates_t *) pu1_buf;
ps_ref_array_phases =
ps_ref_array_pos + i4_layer_u_mbs * au4_ref_pos_array_size[U];
for(k = 0; k < i4_layer_u_mbs; k++)
{
ps_layer->ps_chroma_mb_states[k].ps_ref_array_positions =
ps_ref_array_pos + k * au4_ref_pos_array_size[U];
ps_layer->ps_chroma_mb_states[k].ps_ref_array_phases =
ps_ref_array_phases + k * au4_ref_phase_array_size[U];
pu1_buf += au4_ref_pos_array_size[U] * sizeof(ps_ref_array_pos[0]);
i8_alloc_mem_size -=
au4_ref_pos_array_size[U] * sizeof(ps_ref_array_pos[0]);
pu1_buf += au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]);
i8_alloc_mem_size -=
au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]);
}
}
else
{
coordinates_t *ps_ref_array_pos = NULL;
coordinates_t *ps_ref_array_phases = NULL;
for(k = 0; k < i4_layer_luma_mbs; k++)
{
ps_layer->ps_luma_mb_states[k].ps_ref_array_positions =
ps_ref_array_pos;
ps_layer->ps_luma_mb_states[k].ps_ref_array_phases =
ps_ref_array_phases;
}
ps_ref_array_pos = NULL;
ps_ref_array_phases = (coordinates_t *) pu1_buf;
for(k = 0; k < i4_layer_u_mbs; k++)
{
ps_layer->ps_chroma_mb_states[k].ps_ref_array_positions =
ps_ref_array_pos;
ps_layer->ps_chroma_mb_states[k].ps_ref_array_phases =
ps_ref_array_phases;
}
pu1_buf += au4_ref_phase_array_size[U] * sizeof(ps_ref_array_pos[0]);
i8_alloc_mem_size -=
au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]);
}
ASSERT(i8_alloc_mem_size >= 0);
/* Asserts below verify that
* 'ps_codec->s_svc_ilp_data.aps_layer_resampler_props' is initialised
*/
ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j].u4_mb_wd ==
MB_SIZE);
ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][j].u4_mb_wd ==
(MB_SIZE / 2));
ps_layer->ps_luma_props =
&ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j];
ps_layer->ps_chroma_props =
&ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][j];
isvce_res_pred_layer_state_init(ps_layer, d_spatial_res_ratio, i4_layer_luma_wd,
i4_layer_luma_ht,
ps_codec->s_cfg.e_inp_color_fmt);
}
for(j = u1_num_spatial_layers - 1; j >= 0; j--)
{
res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j];
WORD32 i4_layer_luma_wd =
((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
0.99;
WORD32 i4_layer_luma_ht =
((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
0.99;
WORD32 i4_layer_luma_mbs =
((i4_layer_luma_wd / MB_SIZE) + 2) * ((i4_layer_luma_ht / MB_SIZE) + 2);
ps_layer->pi1_mb_mode = (WORD8 *) pu1_buf;
pu1_buf += i4_layer_luma_mbs * sizeof(WORD8);
memset(ps_layer->pi1_mb_mode, -1, i4_layer_luma_mbs);
ps_layer->i4_mb_mode_stride = ai4_mb_mode_stride[j] =
(i4_layer_luma_wd / MB_SIZE) + 2;
ps_layer->pi1_mb_mode += 1 + ps_layer->i4_mb_mode_stride;
api1_mb_mode[j] = ps_layer->pi1_mb_mode;
}
}
else
{
for(j = u1_num_spatial_layers - 1; j >= 1; j--)
{
res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j];
ps_layer->ps_luma_mb_states = aps_luma_mb_states[j];
ps_layer->ps_chroma_mb_states = aps_chroma_mb_states[j];
ps_layer->ps_luma_props =
&ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j];
ps_layer->ps_chroma_props =
&ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][j];
}
for(j = u1_num_spatial_layers - 1; j >= 0; j--)
{
res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j];
ps_layer->pi1_mb_mode = api1_mb_mode[j];
ps_layer->i4_mb_mode_stride = ai4_mb_mode_stride[j];
}
}
ps_mb_res_buf = &ps_res_pred_ctxt->s_res_pred_outputs.s_res_pred;
ps_mem_store = &ps_res_pred_state->s_mem_store;
ps_proc->ps_mb_res_buf = ps_mb_res_buf;
for(j = 0; j < NUM_SP_COMPONENTS; j++)
{
buffer_container_t *ps_comp_buf = &ps_mb_res_buf->as_component_bufs[j];
UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) j));
ps_comp_buf->pv_data = pu1_buf;
ps_comp_buf->i4_data_stride = MB_SIZE;
pu1_buf += MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16);
i8_alloc_mem_size -= MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16);
}
ps_mem_store->s_scratch.pv_data = pu1_buf;
ps_mem_store->s_scratch.i4_data_stride = REF_ARRAY_MAX_WIDTH;
pu1_buf += REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(WORD16);
i8_alloc_mem_size -= REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(WORD16);
ASSERT(i8_alloc_mem_size >= 0);
ps_mb_res_buf->as_component_bufs[V].pv_data = NULL;
ps_mb_res_buf->e_color_format = IV_YUV_420SP_UV;
ps_mb_res_buf->u1_bit_depth = 10;
ps_mb_res_buf->u4_width = MB_SIZE;
ps_mb_res_buf->u4_height = MB_SIZE;
isvce_svc_residual_sampling_function_selector(ps_res_pred_state, d_spatial_res_ratio,
ps_codec->s_cfg.e_arch);
}
}
else
{
for(i = 0; i < i4_num_proc_ctxts; i++)
{
isvce_process_ctxt_t *ps_proc = ps_codec->as_process + i;
ps_proc->ps_res_pred_ctxt = NULL;
ps_proc->ps_mb_res_buf = (yuv_buf_props_t *) pu1_buf;
pu1_buf += sizeof(yuv_buf_props_t);
i8_alloc_mem_size -= sizeof(yuv_buf_props_t);
for(j = 0; j < NUM_SP_COMPONENTS; j++)
{
buffer_container_t *ps_comp_buf = &ps_proc->ps_mb_res_buf->as_component_bufs[j];
UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) j));
ps_comp_buf->pv_data = pu1_buf;
ps_comp_buf->i4_data_stride = MB_SIZE;
pu1_buf += MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16);
i8_alloc_mem_size -= MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16);
}
ASSERT(i8_alloc_mem_size >= 0);
}
}
}
void isvce_get_mb_residual_pred(svc_res_pred_ctxt_t *ps_res_pred_ctxt)
{
buffer_container_t s_inp;
buffer_container_t s_out;
coordinates_t s_frame_dims;
coordinates_t s_frame_dims_in_mbs;
coordinates_t s_ref_array_offsets;
svc_layer_data_t *ps_ref_layer_data;
res_pred_layer_state_t *ps_layer_state;
yuv_buf_props_t *ps_ref_residual_buf;
res_pred_mb_state_t *ps_luma_mb_state;
res_pred_mb_state_t *ps_chroma_mb_state;
isvce_mb_info_t *ps_ref_mb;
WORD32 i;
res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants;
res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables;
res_pred_outputs_t *ps_res_pred_outputs = &ps_res_pred_ctxt->s_res_pred_outputs;
res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state;
res_pred_mem_store_t *ps_mem_store = &ps_res_pred_state->s_mem_store;
svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data;
coordinates_t *ps_mb_pos = &ps_res_pred_variables->s_mb_pos;
UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id;
ASSERT(u1_spatial_layer_id > 0);
s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
ps_ref_layer_data =
&ps_svc_ilp_data->ps_svc_au_data->ps_svc_layer_data[u1_spatial_layer_id - 1];
ps_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id];
ps_ref_residual_buf = &ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1];
ps_luma_mb_state = ps_layer_state->ps_luma_mb_states + ps_mb_pos->i4_abscissa +
ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa;
ps_chroma_mb_state = ps_layer_state->ps_chroma_mb_states + ps_mb_pos->i4_abscissa +
ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa;
for(i = 0; i < NUM_COMPONENTS; i++)
{
res_pred_mb_state_t *ps_mb_state;
layer_resampler_props_t *ps_layer_props;
UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i));
ps_mb_state = u1_is_chroma ? ps_chroma_mb_state : ps_luma_mb_state;
ps_layer_props =
u1_is_chroma ? ps_layer_state->ps_chroma_props : ps_layer_state->ps_luma_props;
/* Presence of appropriate padding is assumed */
s_ref_array_offsets = ps_mb_state->s_offsets;
s_inp = ps_ref_residual_buf->as_component_bufs[u1_is_chroma ? UV : Y];
s_inp.pv_data = ((WORD16 *) s_inp.pv_data) + (V == ((COMPONENT_TYPE) i)) +
(s_ref_array_offsets.i4_abscissa << u1_is_chroma) +
s_ref_array_offsets.i4_ordinate * s_inp.i4_data_stride;
s_out = ps_res_pred_outputs->s_res_pred.as_component_bufs[u1_is_chroma ? UV : Y];
s_out.pv_data = ((WORD16 *) s_out.pv_data) + (V == ((COMPONENT_TYPE) i));
ps_ref_mb =
ps_ref_layer_data->ps_mb_info +
((s_ref_array_offsets.i4_abscissa + (ps_mb_state->s_ref_array_dims.i4_abscissa / 2)) /
ps_layer_props->u4_mb_wd) +
((s_ref_array_offsets.i4_ordinate + (ps_mb_state->s_ref_array_dims.i4_ordinate / 2)) /
ps_layer_props->u4_mb_ht) *
(s_frame_dims_in_mbs.i4_abscissa / 2);
ps_res_pred_state->apf_residual_samplers[i](
ps_mb_state->ps_ref_array_positions, ps_mb_state->ps_ref_array_phases, &s_inp, &s_out,
&ps_mem_store->s_scratch, UINT32_MAX, ps_ref_mb->u1_tx_size == 8);
}
}
void isvce_get_ref_layer_mbtype_tx_size(WORD8 *pi1_ref_mb_modes, WORD32 i4_ref_mode_stride,
WORD32 i4_element_size, WORD32 i4_x_ref, WORD32 i4_y_ref,
WORD32 *pi4_mb_type, WORD32 *pi4_tx_size,
WORD32 i4_chroma_flag)
{
WORD32 i4_mb_wd_sft, i4_mb_ht_sft;
WORD32 i4_mb_x, i4_mb_y;
WORD8 i1_mb_mode;
if(i4_x_ref < 0)
{
i4_x_ref = 0;
}
if(i4_y_ref < 0)
{
i4_y_ref = 0;
}
i4_mb_wd_sft = (MB_WIDTH_SHIFT - i4_chroma_flag);
i4_mb_ht_sft = (MB_HEIGHT_SHIFT - i4_chroma_flag);
i4_mb_x = (i4_x_ref >> i4_mb_wd_sft);
i4_mb_y = (i4_y_ref >> i4_mb_ht_sft);
pi1_ref_mb_modes += (i4_mb_y * i4_ref_mode_stride * i4_element_size);
pi1_ref_mb_modes += (i4_mb_x * i4_element_size);
i1_mb_mode = *pi1_ref_mb_modes;
i1_mb_mode = (i1_mb_mode < 0) ? i1_mb_mode : SVC_EXTRACT_MB_MODE(*pi1_ref_mb_modes);
if(i1_mb_mode <= SVC_INTER_MB)
{
*pi4_mb_type = SVC_INTER_MB;
*pi4_tx_size = GET_BIT_TX_SIZE(*pi1_ref_mb_modes, 1);
}
else
{
*pi4_mb_type = SVC_INTRA_MB;
*pi4_tx_size = 1;
}
}
void isvce_ref_layer_ptr_incr(WORD8 *pi1_ref_mb_modes, WORD32 i4_ref_mode_stride,
WORD32 i4_element_size, WORD32 i4_x_offset, WORD32 i4_y_offset,
WORD32 i4_refary_wd, WORD32 i4_refary_ht, UWORD8 *pu1_ref_x_ptr_incr,
UWORD8 *pu1_ref_y_ptr_incr, WORD32 i4_chroma_flag)
{
WORD32 i4_x, i4_y;
WORD32 i4_x_idx, i4_y_idx;
WORD32 i4_prev_x, i4_prev_y;
WORD32 i4_const_val;
WORD32 i4_pos_x, i4_pos_y;
WORD32 i4_trans_size;
WORD32 i4_mb_type, i4_tx_size;
WORD32 i4_act_ary_wd, i4_act_ary_ht;
WORD32 i4_and_const;
UWORD8 *pu1_incr_x, *pu1_incr_y;
memset(pu1_ref_x_ptr_incr, 1, (i4_refary_wd * i4_refary_ht));
memset(pu1_ref_y_ptr_incr, 1, (i4_refary_wd * i4_refary_ht));
i4_act_ary_wd = i4_refary_wd;
i4_act_ary_ht = i4_refary_ht;
i4_x = 0;
i4_y = 0;
i4_prev_y = 0;
if(0 == i4_chroma_flag)
{
do
{
WORD32 i4_x_ref, i4_y_ref;
WORD32 i4_idx;
WORD32 i4_wd, i4_ht;
WORD32 i4_max_pos_x, i4_max_pos_y;
i4_prev_x = i4_x;
i4_x_ref = i4_x_offset + i4_x;
i4_y_ref = i4_y_offset + i4_y;
isvce_get_ref_layer_mbtype_tx_size(pi1_ref_mb_modes, i4_ref_mode_stride,
i4_element_size, i4_x_ref, i4_y_ref, &i4_mb_type,
&i4_tx_size, i4_chroma_flag);
i4_trans_size = ((i4_tx_size + 1) << 2);
i4_const_val = i4_trans_size - 1;
i4_and_const = i4_const_val;
/* Fill horizontal tx block edges of current reference mb with 0 */
pu1_incr_x = pu1_ref_x_ptr_incr + i4_x;
pu1_incr_x += (i4_y * i4_refary_wd);
i4_ht = (16 - (i4_y_ref & 0xF));
i4_ht = MIN((i4_act_ary_ht - i4_y), i4_ht);
i4_x_idx = i4_x;
i4_pos_x = i4_x_ref & 0xF;
i4_max_pos_x = 16;
i4_x += (16 - i4_pos_x);
/* Get the transform block edge pos */
i4_idx = (i4_const_val - (i4_pos_x & i4_and_const));
i4_x_idx += i4_idx;
while((i4_pos_x < i4_max_pos_x) && (i4_x_idx < i4_act_ary_wd))
{
WORD32 i4_i;
UWORD8 *pu1_incr;
pu1_incr = pu1_incr_x + i4_idx;
for(i4_i = 0; i4_i < i4_ht; i4_i++)
{ /* Fill the block edge with 0s */
*pu1_incr = 0;
pu1_incr += i4_refary_wd;
}
i4_pos_x += i4_trans_size;
pu1_incr_x += i4_trans_size;
i4_x_idx += MIN(i4_trans_size, (i4_act_ary_wd - i4_x_idx));
}
/* Fill vertical tx block edges of current reference mb with 0 */
pu1_incr_y = pu1_ref_y_ptr_incr + i4_prev_x;
pu1_incr_y += (i4_y * i4_refary_wd);
i4_wd = (16 - (i4_x_ref & 0xF));
i4_wd = MIN((i4_act_ary_wd - i4_prev_x), i4_wd);
i4_y_idx = i4_y;
i4_pos_y = i4_y_ref & 0xF;
i4_max_pos_y = 16;
i4_y += (16 - i4_pos_y);
/* Get the transform block edge pos */
i4_idx = (i4_const_val - (i4_pos_y & i4_and_const));
i4_y_idx += i4_idx;
while((i4_pos_y < i4_max_pos_y) && (i4_y_idx < i4_act_ary_ht))
{
WORD32 i4_i;
UWORD8 *pu1_incr;
pu1_incr = pu1_incr_y + i4_idx * i4_refary_wd;
for(i4_i = 0; i4_i < i4_wd; i4_i++)
{ /* Fill the block edge with 0s */
*pu1_incr = 0;
pu1_incr++;
}
i4_pos_y += i4_trans_size;
pu1_incr_y += i4_trans_size * i4_refary_wd;
i4_y_idx += MIN(i4_trans_size, (i4_act_ary_ht - i4_y_idx));
}
if(i4_x < i4_act_ary_wd)
{
i4_y = i4_prev_y;
}
else if(i4_y < i4_act_ary_ht)
{
i4_prev_y = i4_y;
i4_x = 0;
}
} while((i4_y < i4_act_ary_ht) || (i4_x < i4_act_ary_wd));
}
else
{
i4_trans_size = 4;
i4_const_val = 3;
do
{
WORD32 i4_x_ref, i4_y_ref;
WORD32 i4_idx;
WORD32 i4_wd, i4_ht;
WORD32 i4_max_pos_x, i4_max_pos_y;
i4_prev_x = i4_x;
i4_x_ref = i4_x_offset + i4_x;
i4_y_ref = i4_y_offset + i4_y;
/* Fill horizontal tx block edges of current reference mb with 0 */
pu1_incr_x = pu1_ref_x_ptr_incr + i4_x;
pu1_incr_x += (i4_y * i4_refary_wd);
i4_ht = (8 - (i4_y_ref & 0x7));
i4_ht = MIN((i4_act_ary_ht - i4_y), i4_ht);
i4_x_idx = i4_x;
i4_pos_x = i4_x_ref & 0x7;
i4_max_pos_x = 8;
i4_x += (8 - i4_pos_x);
/* Get the transform block edge pos */
i4_idx = (i4_const_val - (i4_pos_x & 0x3));
i4_x_idx += i4_idx;
while((i4_pos_x < i4_max_pos_x) && (i4_x_idx < i4_act_ary_wd))
{
WORD32 i4_i;
UWORD8 *pu1_incr;
pu1_incr = pu1_incr_x + i4_idx;
for(i4_i = 0; i4_i < i4_ht; i4_i++)
{ /* Fill the block edge with 0s */
*pu1_incr = 0;
pu1_incr += i4_refary_wd;
}
i4_pos_x += i4_trans_size;
pu1_incr_x += i4_trans_size;
i4_x_idx += MIN(i4_trans_size, (i4_act_ary_wd - i4_x_idx));
}
/* Fill vertical tx block edges of current reference mb with 0 */
pu1_incr_y = pu1_ref_y_ptr_incr + i4_prev_x;
pu1_incr_y += (i4_y * i4_refary_wd);
i4_wd = (8 - (i4_x_ref & 0x7));
i4_wd = MIN((i4_act_ary_wd - i4_prev_x), i4_wd);
i4_y_idx = i4_y;
i4_pos_y = i4_y_ref & 0x7;
i4_max_pos_y = 8;
i4_y += (8 - i4_pos_y);
/* Get the transform block edge pos */
i4_idx = (i4_const_val - (i4_pos_y & 0x3));
i4_y_idx += i4_idx;
while((i4_pos_y < i4_max_pos_y) && (i4_y_idx < i4_act_ary_ht))
{
WORD32 i4_i;
UWORD8 *pu1_incr;
pu1_incr = pu1_incr_y + i4_idx * i4_refary_wd;
for(i4_i = 0; i4_i < i4_wd; i4_i++)
{ /* Fill the block edge with 0s */
*pu1_incr = 0;
pu1_incr++;
}
i4_pos_y += i4_trans_size;
pu1_incr_y += i4_trans_size * i4_refary_wd;
i4_y_idx += MIN(i4_trans_size, (i4_act_ary_ht - i4_y_idx));
}
if(i4_x < i4_act_ary_wd)
{
i4_y = i4_prev_y;
}
else if(i4_y < i4_act_ary_ht)
{
i4_prev_y = i4_y;
i4_x = 0;
}
} while((i4_y < i4_act_ary_ht) || (i4_x < i4_act_ary_wd));
}
}
void isvce_residual_reflayer_const(svc_res_pred_ctxt_t *ps_res_pred_ctxt, WORD16 *pi2_inp_data,
WORD32 i4_inp_data_stride, WORD8 *ps_ref_mb_mode,
WORD32 i4_ref_mb_mode_stride, WORD32 *pi4_refarr_wd,
WORD32 i4_chroma_flag)
{
WORD8 *pi1_ref_mb_modes;
WORD32 i4_ref_mode_stride;
WORD32 i4_x, i4_y;
WORD32 i4_ref_wd;
WORD32 i4_ref_ht;
WORD32 i4_x_offset;
WORD32 i4_y_offset;
WORD32 i4_refarray_wd;
WORD32 i4_refarray_ht;
WORD16 *pi2_ref_array;
res_pred_mb_state_t *ps_mb_states;
res_pred_layer_state_t *ps_layer_state;
res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants;
res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables;
res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state;
res_pred_mem_store_t *ps_mem_store = &ps_res_pred_state->s_mem_store;
svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data;
coordinates_t *ps_mb_pos = &ps_res_pred_variables->s_mb_pos;
UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id;
ps_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id];
pi2_ref_array = (WORD16 *) ps_mem_store->s_scratch.pv_data;
pi1_ref_mb_modes = (WORD8 *) ps_ref_mb_mode;
i4_ref_mode_stride = i4_ref_mb_mode_stride;
ASSERT(NULL != pi1_ref_mb_modes);
{
WORD32 i4_base_width;
WORD32 i4_base_height;
coordinates_t s_frame_dims, s_frame_dims_in_mbs;
s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
ps_mb_states = i4_chroma_flag ? ps_layer_state->ps_chroma_mb_states
: ps_layer_state->ps_luma_mb_states;
ps_mb_states +=
ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa;
i4_base_width = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_width;
i4_base_height = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_height;
i4_ref_wd = i4_base_width >> i4_chroma_flag;
i4_ref_ht = i4_base_height >> i4_chroma_flag;
i4_x_offset = ps_mb_states->s_offsets.i4_abscissa;
i4_y_offset = ps_mb_states->s_offsets.i4_ordinate;
i4_refarray_wd = ps_mb_states->s_ref_array_dims.i4_abscissa;
i4_refarray_ht = ps_mb_states->s_ref_array_dims.i4_ordinate;
}
{
isvce_ref_layer_ptr_incr(pi1_ref_mb_modes, i4_ref_mode_stride, 1, i4_x_offset, i4_y_offset,
i4_refarray_wd, i4_refarray_ht,
ps_res_pred_state->pu1_ref_x_ptr_incr,
ps_res_pred_state->pu1_ref_y_ptr_incr, i4_chroma_flag);
}
for(i4_y = 0; i4_y < i4_refarray_ht; i4_y++)
{
for(i4_x = 0; i4_x < i4_refarray_wd; i4_x++)
{
WORD32 i4_x_ref;
WORD32 i4_y_ref;
WORD32 i4_ref_mb_type, i4_ref_tx_size;
WORD16 *pi2_ref_data_byte;
WORD16 *pi2_ref_array_temp;
i4_x_ref = MAX(0, MIN(i4_ref_wd - 1, i4_x + i4_x_offset));
i4_y_ref = MAX(0, MIN(i4_ref_ht - 1, i4_y + i4_y_offset));
isvce_get_ref_layer_mbtype_tx_size(pi1_ref_mb_modes, i4_ref_mode_stride, 1, i4_x_ref,
i4_y_ref, &i4_ref_mb_type, &i4_ref_tx_size,
i4_chroma_flag);
if(0 <= i4_x_offset)
{
i4_x_ref = i4_x_ref - i4_x_offset;
}
if(0 <= i4_y_offset)
{
i4_y_ref = i4_y_ref - i4_y_offset;
}
pi2_ref_array_temp = pi2_ref_array + i4_x;
pi2_ref_array_temp += i4_y * i4_refarray_wd;
if(SVC_INTER_MB == i4_ref_mb_type)
{
pi2_ref_data_byte = pi2_inp_data + (i4_x_ref << i4_chroma_flag);
pi2_ref_data_byte += i4_y_ref * i4_inp_data_stride;
*pi2_ref_array_temp = (WORD16) (*pi2_ref_data_byte);
}
else
{
*pi2_ref_array_temp = 0;
}
}
}
*pi4_refarr_wd = i4_refarray_wd;
}
void isvce_interpolate_residual(svc_res_pred_ctxt_t *ps_res_pred_ctxt, WORD16 *pi2_out,
WORD32 i4_out_stride, WORD32 i4_refarray_wd, WORD32 i4_chroma_flag,
coordinates_t *ps_mb_pos)
{
res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants;
res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables;
res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state;
res_pred_mem_store_t *ps_mem_store = &ps_res_pred_state->s_mem_store;
WORD32 i4_x, i4_y;
WORD32 i4_temp_array_ht;
WORD32 i4_mb_wd;
WORD32 i4_mb_ht;
WORD16 *pi2_ref_array;
UWORD8 *pu1_ref_x_ptr_incr, *pu1_ref_y_ptr_incr;
coordinates_t *ps_phase;
coordinates_t *ps_pos;
res_pred_mb_state_t *ps_mb_states;
coordinates_t s_frame_dims;
coordinates_t s_frame_dims_in_mbs;
UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id;
svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data;
res_pred_mb_state_t *ps_mb_state;
s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
pu1_ref_x_ptr_incr = ps_res_pred_state->pu1_ref_x_ptr_incr;
pu1_ref_y_ptr_incr = ps_res_pred_state->pu1_ref_y_ptr_incr;
ps_mb_states = i4_chroma_flag
? ps_res_pred_state->ps_layer_state[u1_spatial_layer_id].ps_chroma_mb_states
: ps_res_pred_state->ps_layer_state[u1_spatial_layer_id].ps_luma_mb_states;
i4_mb_wd = MB_SIZE >> i4_chroma_flag;
i4_mb_ht = MB_SIZE >> i4_chroma_flag;
ps_mb_state = &ps_mb_states[ps_mb_pos->i4_abscissa +
(ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa)];
ps_phase = ps_mb_state->ps_ref_array_phases;
ps_pos = ps_mb_state->ps_ref_array_positions;
i4_temp_array_ht = i4_mb_ht;
pi2_ref_array = (WORD16 *) ps_mem_store->s_scratch.pv_data;
for(i4_y = 0; i4_y < i4_temp_array_ht; i4_y++)
{
for(i4_x = 0; i4_x < i4_mb_wd; i4_x++)
{
WORD32 i4_i;
WORD32 i4_y_ref;
WORD32 i4_y_phase;
WORD32 i4_x_ref;
WORD32 i4_x_phase;
WORD32 i4_x_ref_round;
WORD16 *pi2_out_curr;
WORD32 ai4_temp_pred[2];
UWORD8 *pu1_ref_y_ptr_incr_temp;
WORD32 *pi4_temp_pred;
UWORD8 u1_incr_y;
WORD16 i2_res;
pi2_out_curr = pi2_out + (i4_x << i4_chroma_flag) + (i4_y * i4_out_stride);
i4_y_ref = ps_pos[(i4_mb_wd * i4_y) + i4_x].i4_ordinate;
i4_y_phase = ps_phase[((i4_y % 3) > 0) * 2 + (i4_y % 3)].i4_ordinate;
i4_x_ref = ps_pos[(i4_mb_wd * i4_y) + i4_x].i4_abscissa;
i4_x_phase = ps_phase[i4_x % 3].i4_abscissa;
/* horizontal processing*/
for(i4_i = 0; i4_i < 2; i4_i++)
{
UWORD8 *pu1_ref_x_ptr_incr_temp;
UWORD8 u1_incr;
WORD16 *pi2_ref_array_1, *pi2_ref_array_2;
pu1_ref_x_ptr_incr_temp = pu1_ref_x_ptr_incr + i4_x_ref;
pu1_ref_x_ptr_incr_temp += ((i4_y_ref + i4_i) * i4_refarray_wd);
u1_incr = *pu1_ref_x_ptr_incr_temp;
pi2_ref_array_1 = pi2_ref_array + i4_x_ref;
pi2_ref_array_1 += ((i4_y_ref + i4_i) * i4_refarray_wd);
if(!u1_incr)
{
pi2_ref_array_1 += (i4_x_phase >> 3);
}
pi2_ref_array_2 = pi2_ref_array_1 + u1_incr;
ai4_temp_pred[i4_i] =
(16 - i4_x_phase) * (*pi2_ref_array_1) + i4_x_phase * (*pi2_ref_array_2);
}
/* vertical processing */
i4_x_ref_round = (i4_x_ref + (i4_x_phase >> 3));
pu1_ref_y_ptr_incr_temp =
pu1_ref_y_ptr_incr + i4_x_ref_round + (i4_y_ref * i4_refarray_wd);
u1_incr_y = *pu1_ref_y_ptr_incr_temp;
pi4_temp_pred = &ai4_temp_pred[0];
if(!u1_incr_y)
{
pi4_temp_pred += (i4_y_phase >> 3);
}
i2_res = (((16 - i4_y_phase) * pi4_temp_pred[0] +
i4_y_phase * pi4_temp_pred[u1_incr_y] + 128) >>
8);
*pi2_out_curr = i2_res;
}
}
}
void isvce_get_mb_residual_pred_non_dyadic(svc_res_pred_ctxt_t *ps_res_pred_ctxt)
{
buffer_container_t s_inp;
buffer_container_t s_out;
coordinates_t s_frame_dims;
coordinates_t s_frame_dims_in_mbs;
coordinates_t s_ref_array_offsets;
res_pred_layer_state_t *ps_layer_state, *ps_ref_layer_state;
yuv_buf_props_t *ps_ref_residual_buf;
res_pred_mb_state_t *ps_luma_mb_state;
res_pred_mb_state_t *ps_chroma_mb_state;
WORD16 *pi2_inp, *pi2_out;
WORD32 i4_inp_stride, i4_out_stride;
res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants;
res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables;
res_pred_outputs_t *ps_res_pred_outputs = &ps_res_pred_ctxt->s_res_pred_outputs;
res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state;
svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data;
coordinates_t *ps_mb_pos = &ps_res_pred_variables->s_mb_pos;
UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id;
WORD32 i4_refarray_wd;
WORD32 i;
ASSERT(u1_spatial_layer_id > 0);
s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
ps_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id];
ps_ref_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id - 1];
ps_ref_residual_buf = &ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1];
ps_luma_mb_state = ps_layer_state->ps_luma_mb_states + ps_mb_pos->i4_abscissa +
ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa;
ps_chroma_mb_state = ps_layer_state->ps_chroma_mb_states + ps_mb_pos->i4_abscissa +
ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa;
for(i = 0; i < NUM_COMPONENTS; i++)
{
res_pred_mb_state_t *ps_mb_state;
UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i));
ps_mb_state = u1_is_chroma ? ps_chroma_mb_state : ps_luma_mb_state;
s_ref_array_offsets.i4_abscissa =
MAX(0, MIN(ps_mb_state->s_offsets.i4_abscissa,
(s_frame_dims.i4_abscissa >> u1_is_chroma) - 1));
s_ref_array_offsets.i4_ordinate =
MAX(0, MIN(ps_mb_state->s_offsets.i4_ordinate,
(s_frame_dims.i4_ordinate >> u1_is_chroma) - 1));
s_inp = ps_ref_residual_buf->as_component_bufs[u1_is_chroma ? UV : Y];
s_inp.pv_data = ((WORD16 *) s_inp.pv_data) + (V == ((COMPONENT_TYPE) i)) +
(s_ref_array_offsets.i4_abscissa << u1_is_chroma) +
s_ref_array_offsets.i4_ordinate * s_inp.i4_data_stride;
s_out = ps_res_pred_outputs->s_res_pred.as_component_bufs[u1_is_chroma ? UV : Y];
s_out.pv_data = ((WORD16 *) s_out.pv_data) + (V == ((COMPONENT_TYPE) i));
pi2_inp = (WORD16 *) s_inp.pv_data;
pi2_out = (WORD16 *) s_out.pv_data;
i4_inp_stride = s_inp.i4_data_stride;
i4_out_stride = s_out.i4_data_stride;
/* ------- Constructing refSampleArray ----------------------- */
isvce_residual_reflayer_const(
ps_res_pred_ctxt, pi2_inp, i4_inp_stride, ps_ref_layer_state->pi1_mb_mode,
ps_ref_layer_state->i4_mb_mode_stride, &i4_refarray_wd, u1_is_chroma);
/* ---- Interpolation process for Residual prediction ------ */
isvce_interpolate_residual(ps_res_pred_ctxt, pi2_out, i4_out_stride, i4_refarray_wd,
u1_is_chroma, ps_mb_pos);
}
}
UWORD32 isvce_get_sad_with_residual_pred(buffer_container_t *ps_src, buffer_container_t *ps_pred,
buffer_container_t *ps_res, UWORD32 u4_mb_wd,
UWORD32 u4_mb_ht)
{
UWORD32 i, j;
UWORD32 u4_sad = 0;
for(i = 0; i < u4_mb_ht; i++)
{
for(j = 0; j < u4_mb_wd; j++)
{
WORD16 i2_src = ((UWORD8 *) ps_src->pv_data)[j + i * ps_src->i4_data_stride];
WORD16 i2_pred = ((UWORD8 *) ps_pred->pv_data)[j + i * ps_pred->i4_data_stride];
WORD16 i2_res = ((WORD16 *) ps_res->pv_data)[j + i * ps_res->i4_data_stride];
u4_sad += ABS(i2_src - i2_pred - i2_res);
}
}
return u4_sad;
}
/**
*******************************************************************************
*
* @brief
* Function to evaluate residual_prediction_flag
*
* @param[in] ps_src
* Pointer to MB src buffers
*
* @param[in] ps_pred
* Pointer to MB pred buffers
*
* @param[in] ps_res
* Pointer to MB res buffers
*
* @param[out] pu4_res_pred_sad
* Output variable for SAD
*
* @param[out] pu1_residual_prediction_flag
* Output variable for residual_prediction_flag
*
* @param[in] u4_winning_sad
* Winning mode's SAD
*
* @notes The algorithm currently uses only luma for evaluating
* residual_prediction_flag.
*
*******************************************************************************
*/
void isvce_residual_pred_eval(svc_res_pred_ctxt_t *ps_res_pred_ctxt, yuv_buf_props_t *ps_src,
yuv_buf_props_t *ps_pred, yuv_buf_props_t *ps_res,
UWORD32 *pu4_res_pred_sad, UWORD8 *pu1_residual_prediction_flag,
UWORD32 u4_winning_sad)
{
res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants;
res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state;
pu4_res_pred_sad[0] = ps_res_pred_state->pf_get_sad_with_residual_pred(
&ps_src->as_component_bufs[Y], &ps_pred->as_component_bufs[Y],
&ps_res->as_component_bufs[Y], MB_SIZE, MB_SIZE);
pu1_residual_prediction_flag[0] = pu4_res_pred_sad[0] < u4_winning_sad;
}
void isvce_update_res_pred_info(isvce_process_ctxt_t *ps_proc)
{
if(ps_proc->s_svc_params.u1_num_spatial_layers > 1)
{
svc_res_pred_ctxt_t *ps_res_pred_ctxt = ps_proc->ps_res_pred_ctxt;
res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants;
res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state;
res_pred_layer_state_t *ps_layer_state =
&ps_res_pred_state->ps_layer_state[ps_proc->u1_spatial_layer_id];
WORD8 i1_is_intra = ps_proc->ps_mb_info->u1_is_intra;
WORD8 *pi1_mb_mode =
&ps_layer_state->pi1_mb_mode[ps_proc->i4_mb_x +
(ps_proc->i4_mb_y * (ps_layer_state->i4_mb_mode_stride))];
if(ps_proc->ps_mb_info->u1_base_mode_flag == 1 && i1_is_intra)
{
*pi1_mb_mode = SVC_IBL_MB;
}
else
{
if(i1_is_intra)
{
*pi1_mb_mode = SVC_INTRA_MB;
}
else
{
*pi1_mb_mode = SVC_INTER_MB;
}
}
}
}