262 lines
8.1 KiB
C++
262 lines
8.1 KiB
C++
/*
|
|
* DctIV.cpp
|
|
*
|
|
* Copyright 2019 HIMSA II K/S - www.himsa.dk. Represented by EHIMA - www.ehima.com
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "DctIV.hpp"
|
|
#include <cmath>
|
|
|
|
/*
|
|
* Notes on the choice of the applied FFT library:
|
|
* - Different fast transform implementations for the DCT-IV can be selected via
|
|
* USE_FFTW
|
|
* USE_FFTW_FOR_FFT // assume NF being 4 times an integer
|
|
* USE_KISSFFT // assume NF being 4 times an integer
|
|
* where only one of them is meaningful to be defined
|
|
*
|
|
* - Defining none of the fast transforms will lead to a direct implementation
|
|
* which is good for testing but usually to slow for practial applications
|
|
*
|
|
* - USE_FFTW: The free "fftw" library should be fastest, but may be more complicated to be build
|
|
* for embedded target devices. It provides dedicated optimization for the DCT-IV
|
|
* and thus is preferred from a technical perspective. In terms of licences "fftw"
|
|
* is free in the sense of GPL, which is not liked in some products. There might be the option
|
|
* to ask the authors for other licences (which they propose). Nevertheless, the "license" issue
|
|
* has driven us to not bundle "fftw" directly with the LC3 implementation, although the API
|
|
* for using it is provided and tested.
|
|
*
|
|
* - USE_FFTW_FOR_FFT: this is a not fully optimized alternative approach using the complex fft
|
|
* provided by "fftw" instead of the dedicated optimized DCT-IV implementation. This option
|
|
* is mainly provided to demonstrate the transition from USE_FFTW to USE_KISSFFT
|
|
*
|
|
* - USE_KISSFFT: the free "kissfft" library is sligthly slower than "fftw", but gives a majority
|
|
* of the possible performance gain over a "stupid" direct DCT-IV implementation.
|
|
* "kissfft" does not provide a dedicated optimization for DCT-IV so that some additional code
|
|
* for implementing a DCT-IV by a complex fft has to be added. This also implies additional
|
|
* memory needed.
|
|
* The big advantage of the "kissfft" is that it is less restrictive
|
|
* in terms of its license. Further, the template based C++ implementation reduces its impact
|
|
* on the build system to a minimum (we just need to include the right header without needing
|
|
* to compile and link a separate file).
|
|
*
|
|
* - USE_KISSFFT is defined below to be the default choice. The other alternatives may be selecting
|
|
* by providing its define via the compiler switch -D<define>
|
|
*
|
|
* - The impact of the defines is restricted to this *.cpp file for clarity reasons.
|
|
* Ok, to make this work, we needed a few "ugly" pointer casts, but we really wanted to make
|
|
* sure, that no other code or build impact is generated by the options in this file.
|
|
*/
|
|
#define USE_KISSFFT
|
|
|
|
|
|
#if defined USE_FFTW || defined USE_FFTW_FOR_FFT
|
|
#include <fftw3.h>
|
|
|
|
#elif defined USE_KISSFFT
|
|
#include "kissfft.hh"
|
|
|
|
class KissfftConfig
|
|
{
|
|
public:
|
|
KissfftConfig(uint16_t N)
|
|
: inbuf(nullptr), outbuf(nullptr), fft(nullptr)
|
|
{
|
|
inbuf = new std::complex<double>[N];
|
|
outbuf = new std::complex<double>[N];
|
|
twiddle = new std::complex<double>[N];
|
|
fft = new kissfft<double>(N, false);
|
|
|
|
const double pi = std::acos(-1);
|
|
for (uint16_t n=0; n < N; n++)
|
|
{
|
|
twiddle[n] = std::complex<double>( std::cos(-pi*(8*n+1)/(8.0*N*2)), std::sin(-pi*(8*n+1)/(8.0*N*2)) );
|
|
}
|
|
|
|
}
|
|
|
|
~KissfftConfig()
|
|
{
|
|
if (nullptr != fft)
|
|
{
|
|
delete fft;
|
|
}
|
|
if (nullptr != inbuf)
|
|
{
|
|
delete[] inbuf;
|
|
}
|
|
if (nullptr != outbuf)
|
|
{
|
|
delete[] outbuf;
|
|
}
|
|
if (nullptr != twiddle)
|
|
{
|
|
delete[] twiddle;
|
|
}
|
|
}
|
|
|
|
void transform()
|
|
{
|
|
if (nullptr != fft)
|
|
{
|
|
fft->transform( inbuf, outbuf );
|
|
}
|
|
}
|
|
|
|
std::complex<double>* inbuf;
|
|
std::complex<double>* outbuf;
|
|
std::complex<double>* twiddle;
|
|
kissfft<double>* fft;
|
|
};
|
|
|
|
#endif
|
|
|
|
DctIVDbl::DctIVDbl(uint16_t NF_) :
|
|
NF(NF_),
|
|
in(nullptr),
|
|
out(nullptr),
|
|
dctIVconfig(nullptr)
|
|
{
|
|
in = new double[NF];
|
|
out = new double[NF];
|
|
|
|
#if defined USE_FFTW
|
|
dctIVconfig = fftw_plan_r2r_1d(NF, in, out, FFTW_REDFT11, FFTW_ESTIMATE);
|
|
|
|
#elif defined USE_FFTW_FOR_FFT
|
|
dctIVconfig = fftw_plan_dft_1d( NF/2,
|
|
reinterpret_cast<fftw_complex*>(in),
|
|
reinterpret_cast<fftw_complex*>(out),
|
|
FFTW_FORWARD, FFTW_ESTIMATE);
|
|
|
|
#elif defined USE_KISSFFT
|
|
dctIVconfig = new KissfftConfig(NF/2);
|
|
|
|
#endif
|
|
|
|
for (uint16_t n=0; n < NF; n++)
|
|
{
|
|
in[n]=0;
|
|
out[n]=0;
|
|
}
|
|
}
|
|
|
|
DctIVDbl::~DctIVDbl()
|
|
{
|
|
#if defined USE_FFTW || defined USE_FFTW_FOR_FFT
|
|
if (nullptr != dctIVconfig)
|
|
{
|
|
fftw_destroy_plan(reinterpret_cast<fftw_plan>(dctIVconfig));
|
|
}
|
|
|
|
#elif defined USE_KISSFFT
|
|
if (nullptr != dctIVconfig)
|
|
{
|
|
KissfftConfig* kissfftConfig = reinterpret_cast<KissfftConfig*>(dctIVconfig);
|
|
delete kissfftConfig;
|
|
}
|
|
|
|
#endif
|
|
if (nullptr != in)
|
|
{
|
|
delete[] in;
|
|
}
|
|
if (nullptr != out)
|
|
{
|
|
delete[] out;
|
|
}
|
|
}
|
|
|
|
void DctIVDirectDbl(uint16_t N, const double* const tw, double* const X)
|
|
{
|
|
const double pi = std::acos(-1);
|
|
for (uint16_t k=0; k < N; k++)
|
|
{
|
|
X[k] = 0;
|
|
for (uint16_t n=0; n < N; n++)
|
|
{
|
|
X[k] += tw[n] * std::cos( pi / N * (n+0.5)*(k+0.5) );
|
|
}
|
|
X[k] *= 2;
|
|
}
|
|
|
|
}
|
|
|
|
void DctIVDbl::run()
|
|
{
|
|
#ifdef USE_FFTW
|
|
fftw_execute(reinterpret_cast<fftw_plan>(dctIVconfig));
|
|
|
|
#elif defined USE_FFTW_FOR_FFT
|
|
const double pi = std::acos(-1);
|
|
// assume NF being 4 times an integer
|
|
for (uint16_t n=1; n < NF/2; n+=2)
|
|
{
|
|
double buffer;
|
|
buffer = in[n];
|
|
in[n] = in[NF-n];
|
|
in[NF-n] = buffer;
|
|
}
|
|
for (uint16_t n=0; n < NF; n+=2)
|
|
{
|
|
double real = in[n+0];
|
|
double imag = in[n+1];
|
|
in[n+0] = real * std::cos( -pi*(4*n+1)/(8.0*NF) )
|
|
- imag * std::sin( -pi*(4*n+1)/(8.0*NF) );
|
|
in[n+1] = real * std::sin( -pi*(4*n+1)/(8.0*NF) )
|
|
+ imag * std::cos( -pi*(4*n+1)/(8.0*NF) );
|
|
}
|
|
|
|
fftw_execute(reinterpret_cast<fftw_plan>(dctIVconfig));
|
|
|
|
for (uint16_t n=0; n < NF; n+=2)
|
|
{
|
|
double real = out[n+0];
|
|
double imag = out[n+1];
|
|
out[n+0] = 2*(real * std::cos( -pi*(4*n+1)/(8.0*NF) )
|
|
- imag * std::sin( -pi*(4*n+1)/(8.0*NF) ) );
|
|
out[n+1] = 2*(real * std::sin( -pi*(4*n+1)/(8.0*NF) )
|
|
+ imag * std::cos( -pi*(4*n+1)/(8.0*NF) ) );
|
|
}
|
|
for (uint16_t n=1; n < NF/2; n+=2)
|
|
{
|
|
double buffer;
|
|
buffer = out[n];
|
|
out[n] = -out[NF-n];
|
|
out[NF-n] = -buffer;
|
|
}
|
|
|
|
#elif defined USE_KISSFFT
|
|
// assume NF being 4 times an integer
|
|
KissfftConfig* kissfftConfig = reinterpret_cast<KissfftConfig*>(dctIVconfig);
|
|
for (uint16_t n=0; n < NF/2; n++)
|
|
{
|
|
kissfftConfig->inbuf[n] = kissfftConfig->twiddle[n] * std::complex<double>( in[2*n], in[NF-2*n-1] );
|
|
}
|
|
|
|
kissfftConfig->transform();
|
|
|
|
for (uint16_t n=0; n < NF/2; n++)
|
|
{
|
|
std::complex<double> complexOut = kissfftConfig->twiddle[n] * kissfftConfig->outbuf[n];
|
|
out[2*n] = complexOut.real()*2;
|
|
out[NF-2*n-1] = -complexOut.imag()*2;
|
|
}
|
|
|
|
#else
|
|
DctIVDirectDbl(NF, in, out);
|
|
#endif
|
|
}
|