unplugged-system/external/libaom/av1/encoder/sparse_linear_solver.c

/*
 * Copyright (c) 2021, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */
#include "av1/common/av1_common_int.h"
#include "av1/encoder/sparse_linear_solver.h"
#include "config/aom_config.h"
#include "aom_mem/aom_mem.h"
#include "av1/common/alloccommon.h"

#if CONFIG_OPTICAL_FLOW_API
/*
 * Input:
 * rows: array of row positions
 * cols: array of column positions
 * values: array of element values
 * num_elem: total number of elements in the matrix
 * num_rows: number of rows in the matrix
 * num_cols: number of columns in the matrix
 *
 * Output:
 * sm: pointer to the sparse matrix to be initialized
 *
 * Return: 0  - success
 *         -1 - failed
 */
int av1_init_sparse_mtx(const int *rows, const int *cols, const double *values,
                        int num_elem, int num_rows, int num_cols,
                        SPARSE_MTX *sm) {
  sm->n_elem = num_elem;
  sm->n_rows = num_rows;
  sm->n_cols = num_cols;
  if (num_elem == 0) {
    sm->row_pos = NULL;
    sm->col_pos = NULL;
    sm->value = NULL;
    return 0;
  }
  sm->row_pos = aom_calloc(num_elem, sizeof(*sm->row_pos));
  sm->col_pos = aom_calloc(num_elem, sizeof(*sm->col_pos));
  sm->value = aom_calloc(num_elem, sizeof(*sm->value));

  if (!sm->row_pos || !sm->col_pos || !sm->value) {
    av1_free_sparse_mtx_elems(sm);
    return -1;
  }

  memcpy(sm->row_pos, rows, num_elem * sizeof(*sm->row_pos));
  memcpy(sm->col_pos, cols, num_elem * sizeof(*sm->col_pos));
  memcpy(sm->value, values, num_elem * sizeof(*sm->value));

  return 0;
}

/*
 * Combines two sparse matrices (allocating new space).
 *
 * Input:
 * sm1, sm2: matrices to be combined
 * row_offset1, row_offset2: row offset of each matrix in the new matrix
 * col_offset1, col_offset2: column offset of each matrix in the new matrix
 * new_n_rows, new_n_cols: number of rows and columns in the new matrix
 *
 * Output:
 * sm: the combined matrix
 *
 * Return: 0  - success
 *         -1 - failed
 */
int av1_init_combine_sparse_mtx(const SPARSE_MTX *sm1, const SPARSE_MTX *sm2,
                                SPARSE_MTX *sm, int row_offset1,
                                int col_offset1, int row_offset2,
                                int col_offset2, int new_n_rows,
                                int new_n_cols) {
  sm->n_elem = sm1->n_elem + sm2->n_elem;
  sm->n_cols = new_n_cols;
  sm->n_rows = new_n_rows;

  if (sm->n_elem == 0) {
    sm->row_pos = NULL;
    sm->col_pos = NULL;
    sm->value = NULL;
    return 0;
  }

  sm->row_pos = aom_calloc(sm->n_elem, sizeof(*sm->row_pos));
  sm->col_pos = aom_calloc(sm->n_elem, sizeof(*sm->col_pos));
  sm->value = aom_calloc(sm->n_elem, sizeof(*sm->value));

  if (!sm->row_pos || !sm->col_pos || !sm->value) {
    av1_free_sparse_mtx_elems(sm);
    return -1;
  }

  for (int i = 0; i < sm1->n_elem; i++) {
    sm->row_pos[i] = sm1->row_pos[i] + row_offset1;
    sm->col_pos[i] = sm1->col_pos[i] + col_offset1;
  }
  memcpy(sm->value, sm1->value, sm1->n_elem * sizeof(*sm1->value));
  int n_elem1 = sm1->n_elem;
  for (int i = 0; i < sm2->n_elem; i++) {
    sm->row_pos[n_elem1 + i] = sm2->row_pos[i] + row_offset2;
    sm->col_pos[n_elem1 + i] = sm2->col_pos[i] + col_offset2;
  }
  memcpy(sm->value + n_elem1, sm2->value, sm2->n_elem * sizeof(*sm2->value));
  return 0;
}

void av1_free_sparse_mtx_elems(SPARSE_MTX *sm) {
  sm->n_cols = 0;
  sm->n_rows = 0;
  if (sm->n_elem != 0) {
    aom_free(sm->row_pos);
    aom_free(sm->col_pos);
    aom_free(sm->value);
  }
  sm->n_elem = 0;
}

/*
 * Calculate matrix and vector multiplication: A*b
 *
 * Input:
 * sm: matrix A
 * srcv: the vector b to be multiplied to
 * dstl: the length of vectors
 *
 * Output:
 * dstv: pointer to the resulting vector
 */
void av1_mtx_vect_multi_right(const SPARSE_MTX *sm, const double *srcv,
                              double *dstv, int dstl) {
  memset(dstv, 0, sizeof(*dstv) * dstl);
  for (int i = 0; i < sm->n_elem; i++) {
    dstv[sm->row_pos[i]] += srcv[sm->col_pos[i]] * sm->value[i];
  }
}
/*
 * Calculate matrix and vector multiplication: b*A
 *
 * Input:
 * sm: matrix A
 * srcv: the vector b to be multiplied to
 * dstl: the length of vectors
 *
 * Output:
 * dstv: pointer to the resulting vector
 */
void av1_mtx_vect_multi_left(const SPARSE_MTX *sm, const double *srcv,
                             double *dstv, int dstl) {
  memset(dstv, 0, sizeof(*dstv) * dstl);
  for (int i = 0; i < sm->n_elem; i++) {
    dstv[sm->col_pos[i]] += srcv[sm->row_pos[i]] * sm->value[i];
  }
}

/*
 * Calculate inner product of two vectors
 *
 * Input:
 * src1, scr2: the vectors to be multiplied
 * src1l: length of the vectors
 *
 * Output:
 * the inner product
 */
double av1_vect_vect_multi(const double *src1, int src1l, const double *src2) {
  double result = 0;
  for (int i = 0; i < src1l; i++) {
    result += src1[i] * src2[i];
  }
  return result;
}

/*
 * Multiply each element in the matrix sm with a constant c
 */
void av1_constant_multiply_sparse_matrix(SPARSE_MTX *sm, double c) {
  for (int i = 0; i < sm->n_elem; i++) {
    sm->value[i] *= c;
  }
}

static INLINE void free_solver_local_buf(double *buf1, double *buf2,
                                         double *buf3, double *buf4,
                                         double *buf5, double *buf6,
                                         double *buf7) {
  aom_free(buf1);
  aom_free(buf2);
  aom_free(buf3);
  aom_free(buf4);
  aom_free(buf5);
  aom_free(buf6);
  aom_free(buf7);
}

/*
 * Solve for Ax = b
 * no requirement on A
 *
 * Input:
 * A: the sparse matrix
 * b: the vector b
 * bl: length of b
 * x: the vector x
 *
 * Output:
 * x: pointer to the solution vector
 *
 * Return: 0  - success
 *         -1 - failed
 */
int av1_bi_conjugate_gradient_sparse(const SPARSE_MTX *A, const double *b,
                                     int bl, double *x) {
  double *r = NULL, *r_hat = NULL, *p = NULL, *p_hat = NULL, *Ap = NULL,
         *p_hatA = NULL, *x_hat = NULL;
  double alpha, beta, rtr, r_norm_2;
  double denormtemp;

  // initialize
  r = aom_calloc(bl, sizeof(*r));
  r_hat = aom_calloc(bl, sizeof(*r_hat));
  p = aom_calloc(bl, sizeof(*p));
  p_hat = aom_calloc(bl, sizeof(*p_hat));
  Ap = aom_calloc(bl, sizeof(*Ap));
  p_hatA = aom_calloc(bl, sizeof(*p_hatA));
  x_hat = aom_calloc(bl, sizeof(*x_hat));
  if (!r || !r_hat || !p || !p_hat || !Ap || !p_hatA || !x_hat) {
    free_solver_local_buf(r, r_hat, p, p_hat, Ap, p_hatA, x_hat);
    return -1;
  }

  int i;
  for (i = 0; i < bl; i++) {
    r[i] = b[i];
    r_hat[i] = b[i];
    p[i] = r[i];
    p_hat[i] = r_hat[i];
    x[i] = 0;
    x_hat[i] = 0;
  }
  r_norm_2 = av1_vect_vect_multi(r_hat, bl, r);
  for (int k = 0; k < MAX_CG_SP_ITER; k++) {
    rtr = r_norm_2;
    av1_mtx_vect_multi_right(A, p, Ap, bl);
    av1_mtx_vect_multi_left(A, p_hat, p_hatA, bl);

    denormtemp = av1_vect_vect_multi(p_hat, bl, Ap);
    if (denormtemp < 1e-10) break;
    alpha = rtr / denormtemp;
    r_norm_2 = 0;
    for (i = 0; i < bl; i++) {
      x[i] += alpha * p[i];
      x_hat[i] += alpha * p_hat[i];
      r[i] -= alpha * Ap[i];
      r_hat[i] -= alpha * p_hatA[i];
      r_norm_2 += r_hat[i] * r[i];
    }
    if (sqrt(r_norm_2) < 1e-2) {
      break;
    }
    if (rtr < 1e-10) break;
    beta = r_norm_2 / rtr;
    for (i = 0; i < bl; i++) {
      p[i] = r[i] + beta * p[i];
      p_hat[i] = r_hat[i] + beta * p_hat[i];
    }
  }
  // free
  free_solver_local_buf(r, r_hat, p, p_hat, Ap, p_hatA, x_hat);
  return 0;
}

/*
 * Solve for Ax = b when A is symmetric and positive definite
 *
 * Input:
 * A: the sparse matrix
 * b: the vector b
 * bl: length of b
 * x: the vector x
 *
 * Output:
 * x: pointer to the solution vector
 *
 * Return: 0  - success
 *         -1 - failed
 */
int av1_conjugate_gradient_sparse(const SPARSE_MTX *A, const double *b, int bl,
                                  double *x) {
  double *r = NULL, *p = NULL, *Ap = NULL;
  double alpha, beta, rtr, r_norm_2;
  double denormtemp;

  // initialize
  r = aom_calloc(bl, sizeof(*r));
  p = aom_calloc(bl, sizeof(*p));
  Ap = aom_calloc(bl, sizeof(*Ap));
  if (!r || !p || !Ap) {
    free_solver_local_buf(r, p, Ap, NULL, NULL, NULL, NULL);
    return -1;
  }

  int i;
  for (i = 0; i < bl; i++) {
    r[i] = b[i];
    p[i] = r[i];
    x[i] = 0;
  }
  r_norm_2 = av1_vect_vect_multi(r, bl, r);
  int k;
  for (k = 0; k < MAX_CG_SP_ITER; k++) {
    rtr = r_norm_2;
    av1_mtx_vect_multi_right(A, p, Ap, bl);
    denormtemp = av1_vect_vect_multi(p, bl, Ap);
    if (denormtemp < 1e-10) break;
    alpha = rtr / denormtemp;
    r_norm_2 = 0;
    for (i = 0; i < bl; i++) {
      x[i] += alpha * p[i];
      r[i] -= alpha * Ap[i];
      r_norm_2 += r[i] * r[i];
    }
    if (r_norm_2 < 1e-8 * bl) break;
    if (rtr < 1e-10) break;
    beta = r_norm_2 / rtr;
    for (i = 0; i < bl; i++) {
      p[i] = r[i] + beta * p[i];
    }
  }
  // free
  free_solver_local_buf(r, p, Ap, NULL, NULL, NULL, NULL);

  return 0;
}

/*
 * Solve for Ax = b using Jacobi method
 *
 * Input:
 * A: the sparse matrix
 * b: the vector b
 * bl: length of b
 * x: the vector x
 *
 * Output:
 * x: pointer to the solution vector
 *
 * Return: 0  - success
 *         -1 - failed
 */
int av1_jacobi_sparse(const SPARSE_MTX *A, const double *b, int bl, double *x) {
  double *diags = NULL, *Rx = NULL, *x_last = NULL, *x_cur = NULL,
         *tempx = NULL;
  double resi2;

  diags = aom_calloc(bl, sizeof(*diags));
  Rx = aom_calloc(bl, sizeof(*Rx));
  x_last = aom_calloc(bl, sizeof(*x_last));
  x_cur = aom_calloc(bl, sizeof(*x_cur));

  if (!diags || !Rx || !x_last || !x_cur) {
    free_solver_local_buf(diags, Rx, x_last, x_cur, NULL, NULL, NULL);
    return -1;
  }

  int i;
  memset(x_last, 0, sizeof(*x_last) * bl);
  // get the diagonals of A
  memset(diags, 0, sizeof(*diags) * bl);
  for (int c = 0; c < A->n_elem; c++) {
    if (A->row_pos[c] != A->col_pos[c]) continue;
    diags[A->row_pos[c]] = A->value[c];
  }
  int k;
  for (k = 0; k < MAX_CG_SP_ITER; k++) {
    // R = A - diag(diags)
    // get R*x_last
    memset(Rx, 0, sizeof(*Rx) * bl);
    for (int c = 0; c < A->n_elem; c++) {
      if (A->row_pos[c] == A->col_pos[c]) continue;
      Rx[A->row_pos[c]] += x_last[A->col_pos[c]] * A->value[c];
    }
    resi2 = 0;
    for (i = 0; i < bl; i++) {
      x_cur[i] = (b[i] - Rx[i]) / diags[i];
      resi2 += (x_last[i] - x_cur[i]) * (x_last[i] - x_cur[i]);
    }
    if (resi2 <= 1e-10 * bl) break;
    // swap last & cur buffer ptrs
    tempx = x_last;
    x_last = x_cur;
    x_cur = tempx;
  }
  printf("\n numiter: %d\n", k);
  for (i = 0; i < bl; i++) {
    x[i] = x_cur[i];
  }
  free_solver_local_buf(diags, Rx, x_last, x_cur, NULL, NULL, NULL);
  return 0;
}

/*
 * Solve for Ax = b using Steepest descent method
 *
 * Input:
 * A: the sparse matrix
 * b: the vector b
 * bl: length of b
 * x: the vector x
 *
 * Output:
 * x: pointer to the solution vector
 *
 * Return: 0  - success
 *         -1 - failed
 */
int av1_steepest_descent_sparse(const SPARSE_MTX *A, const double *b, int bl,
                                double *x) {
  double *d = NULL, *Ad = NULL, *Ax = NULL;
  double resi2, resi2_last, dAd, temp;

  d = aom_calloc(bl, sizeof(*d));
  Ax = aom_calloc(bl, sizeof(*Ax));
  Ad = aom_calloc(bl, sizeof(*Ad));

  if (!d || !Ax || !Ad) {
    free_solver_local_buf(d, Ax, Ad, NULL, NULL, NULL, NULL);
    return -1;
  }

  int i;
  // initialize with 0s
  resi2 = 0;
  for (i = 0; i < bl; i++) {
    x[i] = 0;
    d[i] = b[i];
    resi2 += d[i] * d[i] / bl;
  }
  int k;
  for (k = 0; k < MAX_CG_SP_ITER; k++) {
    // get A*x_last
    av1_mtx_vect_multi_right(A, d, Ad, bl);
    dAd = resi2 * bl / av1_vect_vect_multi(d, bl, Ad);
    for (i = 0; i < bl; i++) {
      temp = dAd * d[i];
      x[i] = x[i] + temp;
    }
    av1_mtx_vect_multi_right(A, x, Ax, bl);
    resi2_last = resi2;
    resi2 = 0;
    for (i = 0; i < bl; i++) {
      d[i] = b[i] - Ax[i];
      resi2 += d[i] * d[i] / bl;
    }
    if (resi2 <= 1e-8) break;
    if (resi2_last - resi2 < 1e-8) {
      break;
    }
  }
  free_solver_local_buf(d, Ax, Ad, NULL, NULL, NULL, NULL);

  return 0;
}

#endif  // CONFIG_OPTICAL_FLOW_API
Initial commit: AOSP 14 with modifications for Unplugged OS 2025-10-06 13:59:42 +00:00			`/*`
			`* Copyright (c) 2021, Alliance for Open Media. All rights reserved`
			`*`
			`* This source code is subject to the terms of the BSD 2 Clause License and`
			`* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License`
			`* was not distributed with this source code in the LICENSE file, you can`
			`* obtain it at www.aomedia.org/license/software. If the Alliance for Open`
			`* Media Patent License 1.0 was not distributed with this source code in the`
			`* PATENTS file, you can obtain it at www.aomedia.org/license/patent.`
			`*/`
			`#include "av1/common/av1_common_int.h"`
			`#include "av1/encoder/sparse_linear_solver.h"`
			`#include "config/aom_config.h"`
			`#include "aom_mem/aom_mem.h"`
			`#include "av1/common/alloccommon.h"`

			`#if CONFIG_OPTICAL_FLOW_API`
			`/*`
			`* Input:`
			`* rows: array of row positions`
			`* cols: array of column positions`
			`* values: array of element values`
			`* num_elem: total number of elements in the matrix`
			`* num_rows: number of rows in the matrix`
			`* num_cols: number of columns in the matrix`
			`*`
			`* Output:`
			`* sm: pointer to the sparse matrix to be initialized`
			`*`
			`* Return: 0 - success`
			`* -1 - failed`
			`*/`
			`int av1_init_sparse_mtx(const int rows, const int cols, const double *values,`
			`int num_elem, int num_rows, int num_cols,`
			`SPARSE_MTX *sm) {`
			`sm->n_elem = num_elem;`
			`sm->n_rows = num_rows;`
			`sm->n_cols = num_cols;`
			`if (num_elem == 0) {`
			`sm->row_pos = NULL;`
			`sm->col_pos = NULL;`
			`sm->value = NULL;`
			`return 0;`
			`}`
			`sm->row_pos = aom_calloc(num_elem, sizeof(*sm->row_pos));`
			`sm->col_pos = aom_calloc(num_elem, sizeof(*sm->col_pos));`
			`sm->value = aom_calloc(num_elem, sizeof(*sm->value));`

			`if (!sm->row_pos \|\| !sm->col_pos \|\| !sm->value) {`
			`av1_free_sparse_mtx_elems(sm);`
			`return -1;`
			`}`

			`memcpy(sm->row_pos, rows, num_elem * sizeof(*sm->row_pos));`
			`memcpy(sm->col_pos, cols, num_elem * sizeof(*sm->col_pos));`
			`memcpy(sm->value, values, num_elem * sizeof(*sm->value));`

			`return 0;`
			`}`

			`/*`
			`* Combines two sparse matrices (allocating new space).`
			`*`
			`* Input:`
			`* sm1, sm2: matrices to be combined`
			`* row_offset1, row_offset2: row offset of each matrix in the new matrix`
			`* col_offset1, col_offset2: column offset of each matrix in the new matrix`
			`* new_n_rows, new_n_cols: number of rows and columns in the new matrix`
			`*`
			`* Output:`
			`* sm: the combined matrix`
			`*`
			`* Return: 0 - success`
			`* -1 - failed`
			`*/`
			`int av1_init_combine_sparse_mtx(const SPARSE_MTX sm1, const SPARSE_MTX sm2,`
			`SPARSE_MTX *sm, int row_offset1,`
			`int col_offset1, int row_offset2,`
			`int col_offset2, int new_n_rows,`
			`int new_n_cols) {`
			`sm->n_elem = sm1->n_elem + sm2->n_elem;`
			`sm->n_cols = new_n_cols;`
			`sm->n_rows = new_n_rows;`

			`if (sm->n_elem == 0) {`
			`sm->row_pos = NULL;`
			`sm->col_pos = NULL;`
			`sm->value = NULL;`
			`return 0;`
			`}`

			`sm->row_pos = aom_calloc(sm->n_elem, sizeof(*sm->row_pos));`
			`sm->col_pos = aom_calloc(sm->n_elem, sizeof(*sm->col_pos));`
			`sm->value = aom_calloc(sm->n_elem, sizeof(*sm->value));`

			`if (!sm->row_pos \|\| !sm->col_pos \|\| !sm->value) {`
			`av1_free_sparse_mtx_elems(sm);`
			`return -1;`
			`}`

			`for (int i = 0; i < sm1->n_elem; i++) {`
			`sm->row_pos[i] = sm1->row_pos[i] + row_offset1;`
			`sm->col_pos[i] = sm1->col_pos[i] + col_offset1;`
			`}`
			`memcpy(sm->value, sm1->value, sm1->n_elem * sizeof(*sm1->value));`
			`int n_elem1 = sm1->n_elem;`
			`for (int i = 0; i < sm2->n_elem; i++) {`
			`sm->row_pos[n_elem1 + i] = sm2->row_pos[i] + row_offset2;`
			`sm->col_pos[n_elem1 + i] = sm2->col_pos[i] + col_offset2;`
			`}`
			`memcpy(sm->value + n_elem1, sm2->value, sm2->n_elem * sizeof(*sm2->value));`
			`return 0;`
			`}`

			`void av1_free_sparse_mtx_elems(SPARSE_MTX *sm) {`
			`sm->n_cols = 0;`
			`sm->n_rows = 0;`
			`if (sm->n_elem != 0) {`
			`aom_free(sm->row_pos);`
			`aom_free(sm->col_pos);`
			`aom_free(sm->value);`
			`}`
			`sm->n_elem = 0;`
			`}`

			`/*`
			`* Calculate matrix and vector multiplication: A*b`
			`*`
			`* Input:`
			`* sm: matrix A`
			`* srcv: the vector b to be multiplied to`
			`* dstl: the length of vectors`
			`*`
			`* Output:`
			`* dstv: pointer to the resulting vector`
			`*/`
			`void av1_mtx_vect_multi_right(const SPARSE_MTX sm, const double srcv,`
			`double *dstv, int dstl) {`
			`memset(dstv, 0, sizeof(dstv) dstl);`
			`for (int i = 0; i < sm->n_elem; i++) {`
			`dstv[sm->row_pos[i]] += srcv[sm->col_pos[i]] * sm->value[i];`
			`}`
			`}`
			`/*`
			`* Calculate matrix and vector multiplication: b*A`
			`*`
			`* Input:`
			`* sm: matrix A`
			`* srcv: the vector b to be multiplied to`
			`* dstl: the length of vectors`
			`*`
			`* Output:`
			`* dstv: pointer to the resulting vector`
			`*/`
			`void av1_mtx_vect_multi_left(const SPARSE_MTX sm, const double srcv,`
			`double *dstv, int dstl) {`
			`memset(dstv, 0, sizeof(dstv) dstl);`
			`for (int i = 0; i < sm->n_elem; i++) {`
			`dstv[sm->col_pos[i]] += srcv[sm->row_pos[i]] * sm->value[i];`
			`}`
			`}`

			`/*`
			`* Calculate inner product of two vectors`
			`*`
			`* Input:`
			`* src1, scr2: the vectors to be multiplied`
			`* src1l: length of the vectors`
			`*`
			`* Output:`
			`* the inner product`
			`*/`
			`double av1_vect_vect_multi(const double src1, int src1l, const double src2) {`
			`double result = 0;`
			`for (int i = 0; i < src1l; i++) {`
			`result += src1[i] * src2[i];`
			`}`
			`return result;`
			`}`

			`/*`
			`* Multiply each element in the matrix sm with a constant c`
			`*/`
			`void av1_constant_multiply_sparse_matrix(SPARSE_MTX *sm, double c) {`
			`for (int i = 0; i < sm->n_elem; i++) {`
			`sm->value[i] *= c;`
			`}`
			`}`

			`static INLINE void free_solver_local_buf(double buf1, double buf2,`
			`double buf3, double buf4,`
			`double buf5, double buf6,`
			`double *buf7) {`
			`aom_free(buf1);`
			`aom_free(buf2);`
			`aom_free(buf3);`
			`aom_free(buf4);`
			`aom_free(buf5);`
			`aom_free(buf6);`
			`aom_free(buf7);`
			`}`

			`/*`
			`* Solve for Ax = b`
			`* no requirement on A`
			`*`
			`* Input:`
			`* A: the sparse matrix`
			`* b: the vector b`
			`* bl: length of b`
			`* x: the vector x`
			`*`
			`* Output:`
			`* x: pointer to the solution vector`
			`*`
			`* Return: 0 - success`
			`* -1 - failed`
			`*/`
			`int av1_bi_conjugate_gradient_sparse(const SPARSE_MTX A, const double b,`
			`int bl, double *x) {`
			`double r = NULL, r_hat = NULL, p = NULL, p_hat = NULL, *Ap = NULL,`
			`p_hatA = NULL, x_hat = NULL;`
			`double alpha, beta, rtr, r_norm_2;`
			`double denormtemp;`

			`// initialize`
			`r = aom_calloc(bl, sizeof(*r));`
			`r_hat = aom_calloc(bl, sizeof(*r_hat));`
			`p = aom_calloc(bl, sizeof(*p));`
			`p_hat = aom_calloc(bl, sizeof(*p_hat));`
			`Ap = aom_calloc(bl, sizeof(*Ap));`
			`p_hatA = aom_calloc(bl, sizeof(*p_hatA));`
			`x_hat = aom_calloc(bl, sizeof(*x_hat));`
			`if (!r \|\| !r_hat \|\| !p \|\| !p_hat \|\| !Ap \|\| !p_hatA \|\| !x_hat) {`
			`free_solver_local_buf(r, r_hat, p, p_hat, Ap, p_hatA, x_hat);`
			`return -1;`
			`}`

			`int i;`
			`for (i = 0; i < bl; i++) {`
			`r[i] = b[i];`
			`r_hat[i] = b[i];`
			`p[i] = r[i];`
			`p_hat[i] = r_hat[i];`
			`x[i] = 0;`
			`x_hat[i] = 0;`
			`}`
			`r_norm_2 = av1_vect_vect_multi(r_hat, bl, r);`
			`for (int k = 0; k < MAX_CG_SP_ITER; k++) {`
			`rtr = r_norm_2;`
			`av1_mtx_vect_multi_right(A, p, Ap, bl);`
			`av1_mtx_vect_multi_left(A, p_hat, p_hatA, bl);`

			`denormtemp = av1_vect_vect_multi(p_hat, bl, Ap);`
			`if (denormtemp < 1e-10) break;`
			`alpha = rtr / denormtemp;`
			`r_norm_2 = 0;`
			`for (i = 0; i < bl; i++) {`
			`x[i] += alpha * p[i];`
			`x_hat[i] += alpha * p_hat[i];`
			`r[i] -= alpha * Ap[i];`
			`r_hat[i] -= alpha * p_hatA[i];`
			`r_norm_2 += r_hat[i] * r[i];`
			`}`
			`if (sqrt(r_norm_2) < 1e-2) {`
			`break;`
			`}`
			`if (rtr < 1e-10) break;`
			`beta = r_norm_2 / rtr;`
			`for (i = 0; i < bl; i++) {`
			`p[i] = r[i] + beta * p[i];`
			`p_hat[i] = r_hat[i] + beta * p_hat[i];`
			`}`
			`}`
			`// free`
			`free_solver_local_buf(r, r_hat, p, p_hat, Ap, p_hatA, x_hat);`
			`return 0;`
			`}`

			`/*`
			`* Solve for Ax = b when A is symmetric and positive definite`
			`*`
			`* Input:`
			`* A: the sparse matrix`
			`* b: the vector b`
			`* bl: length of b`
			`* x: the vector x`
			`*`
			`* Output:`
			`* x: pointer to the solution vector`
			`*`
			`* Return: 0 - success`
			`* -1 - failed`
			`*/`
			`int av1_conjugate_gradient_sparse(const SPARSE_MTX A, const double b, int bl,`
			`double *x) {`
			`double r = NULL, p = NULL, *Ap = NULL;`
			`double alpha, beta, rtr, r_norm_2;`
			`double denormtemp;`

			`// initialize`
			`r = aom_calloc(bl, sizeof(*r));`
			`p = aom_calloc(bl, sizeof(*p));`
			`Ap = aom_calloc(bl, sizeof(*Ap));`
			`if (!r \|\| !p \|\| !Ap) {`
			`free_solver_local_buf(r, p, Ap, NULL, NULL, NULL, NULL);`
			`return -1;`
			`}`

			`int i;`
			`for (i = 0; i < bl; i++) {`
			`r[i] = b[i];`
			`p[i] = r[i];`
			`x[i] = 0;`
			`}`
			`r_norm_2 = av1_vect_vect_multi(r, bl, r);`
			`int k;`
			`for (k = 0; k < MAX_CG_SP_ITER; k++) {`
			`rtr = r_norm_2;`
			`av1_mtx_vect_multi_right(A, p, Ap, bl);`
			`denormtemp = av1_vect_vect_multi(p, bl, Ap);`
			`if (denormtemp < 1e-10) break;`
			`alpha = rtr / denormtemp;`
			`r_norm_2 = 0;`
			`for (i = 0; i < bl; i++) {`
			`x[i] += alpha * p[i];`
			`r[i] -= alpha * Ap[i];`
			`r_norm_2 += r[i] * r[i];`
			`}`
			`if (r_norm_2 < 1e-8 * bl) break;`
			`if (rtr < 1e-10) break;`
			`beta = r_norm_2 / rtr;`
			`for (i = 0; i < bl; i++) {`
			`p[i] = r[i] + beta * p[i];`
			`}`
			`}`
			`// free`
			`free_solver_local_buf(r, p, Ap, NULL, NULL, NULL, NULL);`

			`return 0;`
			`}`

			`/*`
			`* Solve for Ax = b using Jacobi method`
			`*`
			`* Input:`
			`* A: the sparse matrix`
			`* b: the vector b`
			`* bl: length of b`
			`* x: the vector x`
			`*`
			`* Output:`
			`* x: pointer to the solution vector`
			`*`
			`* Return: 0 - success`
			`* -1 - failed`
			`*/`
			`int av1_jacobi_sparse(const SPARSE_MTX A, const double b, int bl, double *x) {`
			`double diags = NULL, Rx = NULL, x_last = NULL, x_cur = NULL,`
			`*tempx = NULL;`
			`double resi2;`

			`diags = aom_calloc(bl, sizeof(*diags));`
			`Rx = aom_calloc(bl, sizeof(*Rx));`
			`x_last = aom_calloc(bl, sizeof(*x_last));`
			`x_cur = aom_calloc(bl, sizeof(*x_cur));`

			`if (!diags \|\| !Rx \|\| !x_last \|\| !x_cur) {`
			`free_solver_local_buf(diags, Rx, x_last, x_cur, NULL, NULL, NULL);`
			`return -1;`
			`}`

			`int i;`
			`memset(x_last, 0, sizeof(x_last) bl);`
			`// get the diagonals of A`
			`memset(diags, 0, sizeof(diags) bl);`
			`for (int c = 0; c < A->n_elem; c++) {`
			`if (A->row_pos[c] != A->col_pos[c]) continue;`
			`diags[A->row_pos[c]] = A->value[c];`
			`}`
			`int k;`
			`for (k = 0; k < MAX_CG_SP_ITER; k++) {`
			`// R = A - diag(diags)`
			`// get R*x_last`
			`memset(Rx, 0, sizeof(Rx) bl);`
			`for (int c = 0; c < A->n_elem; c++) {`
			`if (A->row_pos[c] == A->col_pos[c]) continue;`
			`Rx[A->row_pos[c]] += x_last[A->col_pos[c]] * A->value[c];`
			`}`
			`resi2 = 0;`
			`for (i = 0; i < bl; i++) {`
			`x_cur[i] = (b[i] - Rx[i]) / diags[i];`
			`resi2 += (x_last[i] - x_cur[i]) * (x_last[i] - x_cur[i]);`
			`}`
			`if (resi2 <= 1e-10 * bl) break;`
			`// swap last & cur buffer ptrs`
			`tempx = x_last;`
			`x_last = x_cur;`
			`x_cur = tempx;`
			`}`
			`printf("\n numiter: %d\n", k);`
			`for (i = 0; i < bl; i++) {`
			`x[i] = x_cur[i];`
			`}`
			`free_solver_local_buf(diags, Rx, x_last, x_cur, NULL, NULL, NULL);`
			`return 0;`
			`}`

			`/*`
			`* Solve for Ax = b using Steepest descent method`
			`*`
			`* Input:`
			`* A: the sparse matrix`
			`* b: the vector b`
			`* bl: length of b`
			`* x: the vector x`
			`*`
			`* Output:`
			`* x: pointer to the solution vector`
			`*`
			`* Return: 0 - success`
			`* -1 - failed`
			`*/`
			`int av1_steepest_descent_sparse(const SPARSE_MTX A, const double b, int bl,`
			`double *x) {`
			`double d = NULL, Ad = NULL, *Ax = NULL;`
			`double resi2, resi2_last, dAd, temp;`

			`d = aom_calloc(bl, sizeof(*d));`
			`Ax = aom_calloc(bl, sizeof(*Ax));`
			`Ad = aom_calloc(bl, sizeof(*Ad));`

			`if (!d \|\| !Ax \|\| !Ad) {`
			`free_solver_local_buf(d, Ax, Ad, NULL, NULL, NULL, NULL);`
			`return -1;`
			`}`

			`int i;`
			`// initialize with 0s`
			`resi2 = 0;`
			`for (i = 0; i < bl; i++) {`
			`x[i] = 0;`
			`d[i] = b[i];`
			`resi2 += d[i] * d[i] / bl;`
			`}`
			`int k;`
			`for (k = 0; k < MAX_CG_SP_ITER; k++) {`
			`// get A*x_last`
			`av1_mtx_vect_multi_right(A, d, Ad, bl);`
			`dAd = resi2 * bl / av1_vect_vect_multi(d, bl, Ad);`
			`for (i = 0; i < bl; i++) {`
			`temp = dAd * d[i];`
			`x[i] = x[i] + temp;`
			`}`
			`av1_mtx_vect_multi_right(A, x, Ax, bl);`
			`resi2_last = resi2;`
			`resi2 = 0;`
			`for (i = 0; i < bl; i++) {`
			`d[i] = b[i] - Ax[i];`
			`resi2 += d[i] * d[i] / bl;`
			`}`
			`if (resi2 <= 1e-8) break;`
			`if (resi2_last - resi2 < 1e-8) {`
			`break;`
			`}`
			`}`
			`free_solver_local_buf(d, Ax, Ad, NULL, NULL, NULL, NULL);`

			`return 0;`
			`}`

			`#endif // CONFIG_OPTICAL_FLOW_API`