unplugged-vendor/system/bt/embdrv/lc3/Common/DctIV.cpp

262 lines
8.1 KiB
C++

/*
* DctIV.cpp
*
* Copyright 2019 HIMSA II K/S - www.himsa.dk. Represented by EHIMA - www.ehima.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "DctIV.hpp"
#include <cmath>
/*
* Notes on the choice of the applied FFT library:
* - Different fast transform implementations for the DCT-IV can be selected via
* USE_FFTW
* USE_FFTW_FOR_FFT // assume NF being 4 times an integer
* USE_KISSFFT // assume NF being 4 times an integer
* where only one of them is meaningful to be defined
*
* - Defining none of the fast transforms will lead to a direct implementation
* which is good for testing but usually to slow for practial applications
*
* - USE_FFTW: The free "fftw" library should be fastest, but may be more complicated to be build
* for embedded target devices. It provides dedicated optimization for the DCT-IV
* and thus is preferred from a technical perspective. In terms of licences "fftw"
* is free in the sense of GPL, which is not liked in some products. There might be the option
* to ask the authors for other licences (which they propose). Nevertheless, the "license" issue
* has driven us to not bundle "fftw" directly with the LC3 implementation, although the API
* for using it is provided and tested.
*
* - USE_FFTW_FOR_FFT: this is a not fully optimized alternative approach using the complex fft
* provided by "fftw" instead of the dedicated optimized DCT-IV implementation. This option
* is mainly provided to demonstrate the transition from USE_FFTW to USE_KISSFFT
*
* - USE_KISSFFT: the free "kissfft" library is sligthly slower than "fftw", but gives a majority
* of the possible performance gain over a "stupid" direct DCT-IV implementation.
* "kissfft" does not provide a dedicated optimization for DCT-IV so that some additional code
* for implementing a DCT-IV by a complex fft has to be added. This also implies additional
* memory needed.
* The big advantage of the "kissfft" is that it is less restrictive
* in terms of its license. Further, the template based C++ implementation reduces its impact
* on the build system to a minimum (we just need to include the right header without needing
* to compile and link a separate file).
*
* - USE_KISSFFT is defined below to be the default choice. The other alternatives may be selecting
* by providing its define via the compiler switch -D<define>
*
* - The impact of the defines is restricted to this *.cpp file for clarity reasons.
* Ok, to make this work, we needed a few "ugly" pointer casts, but we really wanted to make
* sure, that no other code or build impact is generated by the options in this file.
*/
#define USE_KISSFFT
#if defined USE_FFTW || defined USE_FFTW_FOR_FFT
#include <fftw3.h>
#elif defined USE_KISSFFT
#include "kissfft.hh"
class KissfftConfig
{
public:
KissfftConfig(uint16_t N)
: inbuf(nullptr), outbuf(nullptr), fft(nullptr)
{
inbuf = new std::complex<double>[N];
outbuf = new std::complex<double>[N];
twiddle = new std::complex<double>[N];
fft = new kissfft<double>(N, false);
const double pi = std::acos(-1);
for (uint16_t n=0; n < N; n++)
{
twiddle[n] = std::complex<double>( std::cos(-pi*(8*n+1)/(8.0*N*2)), std::sin(-pi*(8*n+1)/(8.0*N*2)) );
}
}
~KissfftConfig()
{
if (nullptr != fft)
{
delete fft;
}
if (nullptr != inbuf)
{
delete[] inbuf;
}
if (nullptr != outbuf)
{
delete[] outbuf;
}
if (nullptr != twiddle)
{
delete[] twiddle;
}
}
void transform()
{
if (nullptr != fft)
{
fft->transform( inbuf, outbuf );
}
}
std::complex<double>* inbuf;
std::complex<double>* outbuf;
std::complex<double>* twiddle;
kissfft<double>* fft;
};
#endif
DctIVDbl::DctIVDbl(uint16_t NF_) :
NF(NF_),
in(nullptr),
out(nullptr),
dctIVconfig(nullptr)
{
in = new double[NF];
out = new double[NF];
#if defined USE_FFTW
dctIVconfig = fftw_plan_r2r_1d(NF, in, out, FFTW_REDFT11, FFTW_ESTIMATE);
#elif defined USE_FFTW_FOR_FFT
dctIVconfig = fftw_plan_dft_1d( NF/2,
reinterpret_cast<fftw_complex*>(in),
reinterpret_cast<fftw_complex*>(out),
FFTW_FORWARD, FFTW_ESTIMATE);
#elif defined USE_KISSFFT
dctIVconfig = new KissfftConfig(NF/2);
#endif
for (uint16_t n=0; n < NF; n++)
{
in[n]=0;
out[n]=0;
}
}
DctIVDbl::~DctIVDbl()
{
#if defined USE_FFTW || defined USE_FFTW_FOR_FFT
if (nullptr != dctIVconfig)
{
fftw_destroy_plan(reinterpret_cast<fftw_plan>(dctIVconfig));
}
#elif defined USE_KISSFFT
if (nullptr != dctIVconfig)
{
KissfftConfig* kissfftConfig = reinterpret_cast<KissfftConfig*>(dctIVconfig);
delete kissfftConfig;
}
#endif
if (nullptr != in)
{
delete[] in;
}
if (nullptr != out)
{
delete[] out;
}
}
void DctIVDirectDbl(uint16_t N, const double* const tw, double* const X)
{
const double pi = std::acos(-1);
for (uint16_t k=0; k < N; k++)
{
X[k] = 0;
for (uint16_t n=0; n < N; n++)
{
X[k] += tw[n] * std::cos( pi / N * (n+0.5)*(k+0.5) );
}
X[k] *= 2;
}
}
void DctIVDbl::run()
{
#ifdef USE_FFTW
fftw_execute(reinterpret_cast<fftw_plan>(dctIVconfig));
#elif defined USE_FFTW_FOR_FFT
const double pi = std::acos(-1);
// assume NF being 4 times an integer
for (uint16_t n=1; n < NF/2; n+=2)
{
double buffer;
buffer = in[n];
in[n] = in[NF-n];
in[NF-n] = buffer;
}
for (uint16_t n=0; n < NF; n+=2)
{
double real = in[n+0];
double imag = in[n+1];
in[n+0] = real * std::cos( -pi*(4*n+1)/(8.0*NF) )
- imag * std::sin( -pi*(4*n+1)/(8.0*NF) );
in[n+1] = real * std::sin( -pi*(4*n+1)/(8.0*NF) )
+ imag * std::cos( -pi*(4*n+1)/(8.0*NF) );
}
fftw_execute(reinterpret_cast<fftw_plan>(dctIVconfig));
for (uint16_t n=0; n < NF; n+=2)
{
double real = out[n+0];
double imag = out[n+1];
out[n+0] = 2*(real * std::cos( -pi*(4*n+1)/(8.0*NF) )
- imag * std::sin( -pi*(4*n+1)/(8.0*NF) ) );
out[n+1] = 2*(real * std::sin( -pi*(4*n+1)/(8.0*NF) )
+ imag * std::cos( -pi*(4*n+1)/(8.0*NF) ) );
}
for (uint16_t n=1; n < NF/2; n+=2)
{
double buffer;
buffer = out[n];
out[n] = -out[NF-n];
out[NF-n] = -buffer;
}
#elif defined USE_KISSFFT
// assume NF being 4 times an integer
KissfftConfig* kissfftConfig = reinterpret_cast<KissfftConfig*>(dctIVconfig);
for (uint16_t n=0; n < NF/2; n++)
{
kissfftConfig->inbuf[n] = kissfftConfig->twiddle[n] * std::complex<double>( in[2*n], in[NF-2*n-1] );
}
kissfftConfig->transform();
for (uint16_t n=0; n < NF/2; n++)
{
std::complex<double> complexOut = kissfftConfig->twiddle[n] * kissfftConfig->outbuf[n];
out[2*n] = complexOut.real()*2;
out[NF-2*n-1] = -complexOut.imag()*2;
}
#else
DctIVDirectDbl(NF, in, out);
#endif
}