/* * DctIV.cpp * * Copyright 2019 HIMSA II K/S - www.himsa.dk. Represented by EHIMA - www.ehima.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "DctIV.hpp" #include /* * Notes on the choice of the applied FFT library: * - Different fast transform implementations for the DCT-IV can be selected via * USE_FFTW * USE_FFTW_FOR_FFT // assume NF being 4 times an integer * USE_KISSFFT // assume NF being 4 times an integer * where only one of them is meaningful to be defined * * - Defining none of the fast transforms will lead to a direct implementation * which is good for testing but usually to slow for practial applications * * - USE_FFTW: The free "fftw" library should be fastest, but may be more complicated to be build * for embedded target devices. It provides dedicated optimization for the DCT-IV * and thus is preferred from a technical perspective. In terms of licences "fftw" * is free in the sense of GPL, which is not liked in some products. There might be the option * to ask the authors for other licences (which they propose). Nevertheless, the "license" issue * has driven us to not bundle "fftw" directly with the LC3 implementation, although the API * for using it is provided and tested. * * - USE_FFTW_FOR_FFT: this is a not fully optimized alternative approach using the complex fft * provided by "fftw" instead of the dedicated optimized DCT-IV implementation. This option * is mainly provided to demonstrate the transition from USE_FFTW to USE_KISSFFT * * - USE_KISSFFT: the free "kissfft" library is sligthly slower than "fftw", but gives a majority * of the possible performance gain over a "stupid" direct DCT-IV implementation. * "kissfft" does not provide a dedicated optimization for DCT-IV so that some additional code * for implementing a DCT-IV by a complex fft has to be added. This also implies additional * memory needed. * The big advantage of the "kissfft" is that it is less restrictive * in terms of its license. Further, the template based C++ implementation reduces its impact * on the build system to a minimum (we just need to include the right header without needing * to compile and link a separate file). * * - USE_KISSFFT is defined below to be the default choice. The other alternatives may be selecting * by providing its define via the compiler switch -D * * - The impact of the defines is restricted to this *.cpp file for clarity reasons. * Ok, to make this work, we needed a few "ugly" pointer casts, but we really wanted to make * sure, that no other code or build impact is generated by the options in this file. */ #define USE_KISSFFT #if defined USE_FFTW || defined USE_FFTW_FOR_FFT #include #elif defined USE_KISSFFT #include "kissfft.hh" class KissfftConfig { public: KissfftConfig(uint16_t N) : inbuf(nullptr), outbuf(nullptr), fft(nullptr) { inbuf = new std::complex[N]; outbuf = new std::complex[N]; twiddle = new std::complex[N]; fft = new kissfft(N, false); const double pi = std::acos(-1); for (uint16_t n=0; n < N; n++) { twiddle[n] = std::complex( std::cos(-pi*(8*n+1)/(8.0*N*2)), std::sin(-pi*(8*n+1)/(8.0*N*2)) ); } } ~KissfftConfig() { if (nullptr != fft) { delete fft; } if (nullptr != inbuf) { delete[] inbuf; } if (nullptr != outbuf) { delete[] outbuf; } if (nullptr != twiddle) { delete[] twiddle; } } void transform() { if (nullptr != fft) { fft->transform( inbuf, outbuf ); } } std::complex* inbuf; std::complex* outbuf; std::complex* twiddle; kissfft* fft; }; #endif DctIVDbl::DctIVDbl(uint16_t NF_) : NF(NF_), in(nullptr), out(nullptr), dctIVconfig(nullptr) { in = new double[NF]; out = new double[NF]; #if defined USE_FFTW dctIVconfig = fftw_plan_r2r_1d(NF, in, out, FFTW_REDFT11, FFTW_ESTIMATE); #elif defined USE_FFTW_FOR_FFT dctIVconfig = fftw_plan_dft_1d( NF/2, reinterpret_cast(in), reinterpret_cast(out), FFTW_FORWARD, FFTW_ESTIMATE); #elif defined USE_KISSFFT dctIVconfig = new KissfftConfig(NF/2); #endif for (uint16_t n=0; n < NF; n++) { in[n]=0; out[n]=0; } } DctIVDbl::~DctIVDbl() { #if defined USE_FFTW || defined USE_FFTW_FOR_FFT if (nullptr != dctIVconfig) { fftw_destroy_plan(reinterpret_cast(dctIVconfig)); } #elif defined USE_KISSFFT if (nullptr != dctIVconfig) { KissfftConfig* kissfftConfig = reinterpret_cast(dctIVconfig); delete kissfftConfig; } #endif if (nullptr != in) { delete[] in; } if (nullptr != out) { delete[] out; } } void DctIVDirectDbl(uint16_t N, const double* const tw, double* const X) { const double pi = std::acos(-1); for (uint16_t k=0; k < N; k++) { X[k] = 0; for (uint16_t n=0; n < N; n++) { X[k] += tw[n] * std::cos( pi / N * (n+0.5)*(k+0.5) ); } X[k] *= 2; } } void DctIVDbl::run() { #ifdef USE_FFTW fftw_execute(reinterpret_cast(dctIVconfig)); #elif defined USE_FFTW_FOR_FFT const double pi = std::acos(-1); // assume NF being 4 times an integer for (uint16_t n=1; n < NF/2; n+=2) { double buffer; buffer = in[n]; in[n] = in[NF-n]; in[NF-n] = buffer; } for (uint16_t n=0; n < NF; n+=2) { double real = in[n+0]; double imag = in[n+1]; in[n+0] = real * std::cos( -pi*(4*n+1)/(8.0*NF) ) - imag * std::sin( -pi*(4*n+1)/(8.0*NF) ); in[n+1] = real * std::sin( -pi*(4*n+1)/(8.0*NF) ) + imag * std::cos( -pi*(4*n+1)/(8.0*NF) ); } fftw_execute(reinterpret_cast(dctIVconfig)); for (uint16_t n=0; n < NF; n+=2) { double real = out[n+0]; double imag = out[n+1]; out[n+0] = 2*(real * std::cos( -pi*(4*n+1)/(8.0*NF) ) - imag * std::sin( -pi*(4*n+1)/(8.0*NF) ) ); out[n+1] = 2*(real * std::sin( -pi*(4*n+1)/(8.0*NF) ) + imag * std::cos( -pi*(4*n+1)/(8.0*NF) ) ); } for (uint16_t n=1; n < NF/2; n+=2) { double buffer; buffer = out[n]; out[n] = -out[NF-n]; out[NF-n] = -buffer; } #elif defined USE_KISSFFT // assume NF being 4 times an integer KissfftConfig* kissfftConfig = reinterpret_cast(dctIVconfig); for (uint16_t n=0; n < NF/2; n++) { kissfftConfig->inbuf[n] = kissfftConfig->twiddle[n] * std::complex( in[2*n], in[NF-2*n-1] ); } kissfftConfig->transform(); for (uint16_t n=0; n < NF/2; n++) { std::complex complexOut = kissfftConfig->twiddle[n] * kissfftConfig->outbuf[n]; out[2*n] = complexOut.real()*2; out[NF-2*n-1] = -complexOut.imag()*2; } #else DctIVDirectDbl(NF, in, out); #endif }