/*
 * Single-precision erfc(x) function.
 *
 * Copyright (c) 2019-2023, Arm Limited.
 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 */

#include "erfcf.h"
#include "math_config.h"
#include "pl_sig.h"
#include "pl_test.h"

#define P(i) __erfcf_poly_data.poly[i]

/* Approximation of erfcf for |x| > 4.0.  */
static inline float
approx_erfcf_hi (float x, uint32_t sign, const double *coeff)
{
  if (sign)
    {
      return 2.0f;
    }

  /* Polynomial contribution.  */
  double z = (double) fabs (x);
  float p = (float) eval_poly (z, coeff);
  /* Gaussian contribution.  */
  float e_mx2 = (float) eval_exp_mx2 (z);

  return p * e_mx2;
}

/* Approximation of erfcf for |x| < 4.0.  */
static inline float
approx_erfcf_lo (float x, uint32_t sign, const double *coeff)
{
  /* Polynomial contribution.  */
  double z = (double) fabs (x);
  float p = (float) eval_poly (z, coeff);
  /* Gaussian contribution.  */
  float e_mx2 = (float) eval_exp_mx2 (z);

  if (sign)
    return fmaf (-p, e_mx2, 2.0f);
  else
    return p * e_mx2;
}

/* Top 12 bits of a float (sign and exponent bits).  */
static inline uint32_t
abstop12 (float x)
{
  return (asuint (x) >> 20) & 0x7ff;
}

/* Top 12 bits of a float.  */
static inline uint32_t
top12 (float x)
{
  return asuint (x) >> 20;
}

/* Fast erfcf approximation using polynomial approximation
   multiplied by gaussian.
   Most of the computation is carried out in double precision,
   and is very sensitive to accuracy of polynomial and exp
   evaluation.
   Worst-case error is 1.968ulps, obtained for x = 2.0412941.
   erfcf(0x1.05492p+1) got 0x1.fe10f6p-9 want 0x1.fe10f2p-9 ulp
   err 1.46788.  */
float
erfcf (float x)
{
  /* Get top words and sign.  */
  uint32_t ix = asuint (x); /* We need to compare at most 32 bits.  */
  uint32_t sign = ix >> 31;
  uint32_t ia12 = top12 (x) & 0x7ff;

  /* Handle special cases and small values with a single comparison:
       abstop12(x)-abstop12(small) >= abstop12(INFINITY)-abstop12(small)

     Special cases
       erfcf(nan)=nan, erfcf(+inf)=0 and erfcf(-inf)=2

     Errno
       EDOM does not have to be set in case of erfcf(nan).
       Only ERANGE may be set in case of underflow.

     Small values (|x|<small)
       |x|<0x1.0p-26 => accurate to 0.5 ULP (top12(0x1p-26) = 0x328).  */
  if (unlikely (abstop12 (x) - 0x328 >= (abstop12 (INFINITY) & 0x7f8) - 0x328))
    {
      if (abstop12 (x) >= 0x7f8)
	return (float) (sign << 1) + 1.0f / x; /* Special cases.  */
      else
	return 1.0f - x; /* Small case.  */
    }

  /* Normalized numbers divided in 4 intervals
     with bounds: 2.0, 4.0, 8.0 and 10.0. 10 was chosen as the upper bound for
     the interesting region as it is the smallest value, representable as a
     12-bit integer, for which returning 0 gives <1.5 ULP.  */
  if (ia12 < 0x400)
    {
      return approx_erfcf_lo (x, sign, P (0));
    }
  if (ia12 < 0x408)
    {
      return approx_erfcf_lo (x, sign, P (1));
    }
  if (ia12 < 0x410)
    {
      return approx_erfcf_hi (x, sign, P (2));
    }
  if (ia12 < 0x412)
    {
      return approx_erfcf_hi (x, sign, P (3));
    }
  if (sign)
    {
      return 2.0f;
    }
  return __math_uflowf (0);
}

PL_SIG (S, F, 1, erfc, -4.0, 10.0)
PL_TEST_ULP (erfcf, 1.5)
PL_TEST_INTERVAL (erfcf, 0, 0xffff0000, 10000)
PL_TEST_INTERVAL (erfcf, 0x1p-127, 0x1p-26, 40000)
PL_TEST_INTERVAL (erfcf, -0x1p-127, -0x1p-26, 40000)
PL_TEST_INTERVAL (erfcf, 0x1p-26, 0x1p5, 40000)
PL_TEST_INTERVAL (erfcf, -0x1p-26, -0x1p3, 40000)
PL_TEST_INTERVAL (erfcf, 0, inf, 40000)