unplugged-vendor/system/bt/embdrv/lc3/Common/DctIV.cpp

/*
 * DctIV.cpp
 *
 * Copyright 2019 HIMSA II K/S - www.himsa.dk. Represented by EHIMA - www.ehima.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "DctIV.hpp"
#include <cmath>

/*
 * Notes on the choice of the applied FFT library:
 *  - Different fast transform implementations for the DCT-IV can be selected via
 *      USE_FFTW
 *      USE_FFTW_FOR_FFT  // assume NF being 4 times an integer
 *      USE_KISSFFT       // assume NF being 4 times an integer
 *     where only one of them is meaningful to be defined
 *
 *  - Defining none of the fast transforms will lead to a direct implementation
 *     which is good for testing but usually to slow for practial applications
 *
 *  - USE_FFTW: The free "fftw" library should be fastest, but may be more complicated to be build
 *     for embedded target devices. It provides dedicated optimization for the DCT-IV
 *     and thus is preferred from a technical perspective. In terms of licences "fftw"
 *     is free in the sense of GPL, which is not liked in some products. There might be the option
 *     to ask the authors for other licences (which they propose). Nevertheless, the "license" issue
 *     has driven us to not bundle "fftw" directly with the LC3 implementation, although the API
 *     for using it is provided and tested.
 *
 *  - USE_FFTW_FOR_FFT: this is a not fully optimized alternative approach using the complex fft
 *     provided by "fftw" instead of the dedicated optimized DCT-IV implementation. This option
 *     is mainly provided to demonstrate the transition from USE_FFTW to USE_KISSFFT
 *
 *  - USE_KISSFFT: the free "kissfft" library is sligthly slower than "fftw", but gives a majority
 *     of the possible performance gain over a "stupid" direct DCT-IV implementation.
 *     "kissfft" does not provide a dedicated optimization for DCT-IV so that some additional code
 *     for implementing a DCT-IV by a complex fft has to be added. This also implies additional
 *     memory needed.
 *      The big advantage of the "kissfft" is that it is less restrictive
 *     in terms of its license. Further, the template based C++ implementation reduces its impact
 *     on the build system to a minimum (we just need to include the right header without needing
 *     to compile and link a separate file).
 *
 *   - USE_KISSFFT is defined below to be the default choice. The other alternatives may be selecting
 *      by providing its define via the compiler switch -D<define>
 *
 *   - The impact of the defines is restricted to this *.cpp file for clarity reasons.
 *      Ok, to make this work, we needed a few "ugly" pointer casts, but we really wanted to make
 *      sure, that no other code or build impact is generated by the options in this file.
 */
#define USE_KISSFFT


#if defined USE_FFTW  || defined USE_FFTW_FOR_FFT
#include <fftw3.h>

#elif defined USE_KISSFFT
#include "kissfft.hh"

class KissfftConfig
{
public:
    KissfftConfig(uint16_t N)
     : inbuf(nullptr), outbuf(nullptr), fft(nullptr)
    {
        inbuf  = new std::complex<double>[N];
        outbuf = new std::complex<double>[N];
        twiddle = new std::complex<double>[N];
        fft = new kissfft<double>(N, false);

        const double pi = std::acos(-1);
        for (uint16_t n=0; n < N; n++)
        {
            twiddle[n] = std::complex<double>( std::cos(-pi*(8*n+1)/(8.0*N*2)), std::sin(-pi*(8*n+1)/(8.0*N*2)) );
        }

    }

    ~KissfftConfig()
    {
        if (nullptr != fft)
        {
            delete fft;
        }
        if (nullptr != inbuf)
        {
            delete[] inbuf;
        }
        if (nullptr != outbuf)
        {
            delete[] outbuf;
        }
        if (nullptr != twiddle)
        {
            delete[] twiddle;
        }
    }

    void transform()
    {
        if (nullptr != fft)
        {
            fft->transform( inbuf, outbuf );
        }
    }

    std::complex<double>* inbuf;
    std::complex<double>* outbuf;
    std::complex<double>* twiddle;
    kissfft<double>* fft;
};

#endif

DctIVDbl::DctIVDbl(uint16_t NF_) :
        NF(NF_),
        in(nullptr),
        out(nullptr),
        dctIVconfig(nullptr)
{
    in  = new double[NF];
    out = new double[NF];

#if defined USE_FFTW
    dctIVconfig = fftw_plan_r2r_1d(NF, in, out, FFTW_REDFT11, FFTW_ESTIMATE);

#elif defined USE_FFTW_FOR_FFT
    dctIVconfig = fftw_plan_dft_1d( NF/2,
                           reinterpret_cast<fftw_complex*>(in),
                           reinterpret_cast<fftw_complex*>(out),
                           FFTW_FORWARD, FFTW_ESTIMATE);

#elif defined USE_KISSFFT
    dctIVconfig = new KissfftConfig(NF/2);

#endif

    for (uint16_t n=0; n < NF; n++)
    {
        in[n]=0;
        out[n]=0;
    }
}

DctIVDbl::~DctIVDbl()
{
#if defined USE_FFTW || defined USE_FFTW_FOR_FFT
    if (nullptr != dctIVconfig)
    {
        fftw_destroy_plan(reinterpret_cast<fftw_plan>(dctIVconfig));
    }

#elif defined USE_KISSFFT
    if (nullptr != dctIVconfig)
    {
        KissfftConfig* kissfftConfig = reinterpret_cast<KissfftConfig*>(dctIVconfig);
        delete kissfftConfig;
    }

#endif
    if (nullptr != in)
    {
        delete[] in;
    }
    if (nullptr != out)
    {
        delete[] out;
    }
}

void DctIVDirectDbl(uint16_t N, const double* const tw, double* const X)
{
    const double pi = std::acos(-1);
    for (uint16_t k=0; k < N; k++)
    {
        X[k] = 0;
        for (uint16_t n=0; n < N; n++)
        {
            X[k] += tw[n] * std::cos( pi / N * (n+0.5)*(k+0.5) );
        }
        X[k] *= 2;
    }

}

void DctIVDbl::run()
{
#ifdef USE_FFTW
    fftw_execute(reinterpret_cast<fftw_plan>(dctIVconfig));

#elif defined USE_FFTW_FOR_FFT
    const double pi = std::acos(-1);
    // assume NF being 4 times an integer
    for (uint16_t n=1; n < NF/2; n+=2)
    {
        double buffer;
        buffer   = in[n];
        in[n]    = in[NF-n];
        in[NF-n] = buffer;
    }
    for (uint16_t n=0; n < NF; n+=2)
    {
        double real = in[n+0];
        double imag = in[n+1];
        in[n+0] = real * std::cos( -pi*(4*n+1)/(8.0*NF) )
                  - imag * std::sin( -pi*(4*n+1)/(8.0*NF) );
        in[n+1] = real * std::sin( -pi*(4*n+1)/(8.0*NF) )
                  + imag * std::cos( -pi*(4*n+1)/(8.0*NF) );
    }

    fftw_execute(reinterpret_cast<fftw_plan>(dctIVconfig));

    for (uint16_t n=0; n < NF; n+=2)
    {
        double real = out[n+0];
        double imag = out[n+1];
        out[n+0] = 2*(real * std::cos( -pi*(4*n+1)/(8.0*NF) )
                    - imag * std::sin( -pi*(4*n+1)/(8.0*NF) ) );
        out[n+1] = 2*(real * std::sin( -pi*(4*n+1)/(8.0*NF) )
                    + imag * std::cos( -pi*(4*n+1)/(8.0*NF) ) );
    }
    for (uint16_t n=1; n < NF/2; n+=2)
    {
        double buffer;
        buffer    = out[n];
        out[n]    = -out[NF-n];
        out[NF-n] = -buffer;
    }

#elif defined USE_KISSFFT
    // assume NF being 4 times an integer
    KissfftConfig* kissfftConfig = reinterpret_cast<KissfftConfig*>(dctIVconfig);
    for (uint16_t n=0; n < NF/2; n++)
    {
        kissfftConfig->inbuf[n] = kissfftConfig->twiddle[n] * std::complex<double>( in[2*n], in[NF-2*n-1] );
    }

    kissfftConfig->transform();

    for (uint16_t n=0; n < NF/2; n++)
    {
        std::complex<double> complexOut = kissfftConfig->twiddle[n] * kissfftConfig->outbuf[n];
        out[2*n]      =  complexOut.real()*2;
        out[NF-2*n-1] = -complexOut.imag()*2;
    }

#else
    DctIVDirectDbl(NF, in, out);
#endif
}