add pffft
This commit is contained in:
171
pffft/pffastconv.h
Normal file
171
pffft/pffastconv.h
Normal file
@@ -0,0 +1,171 @@
|
||||
/* Copyright (c) 2019 Hayati Ayguen ( h_ayguen@web.de )
|
||||
|
||||
Redistribution and use of the Software in source and binary forms,
|
||||
with or without modification, is permitted provided that the
|
||||
following conditions are met:
|
||||
|
||||
- Neither the names of PFFFT, PFFASTCONV, nor the names of its
|
||||
sponsors or contributors may be used to endorse or promote products
|
||||
derived from this Software without specific prior written permission.
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notices, this list of conditions, and the disclaimer below.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions, and the disclaimer below in the
|
||||
documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
PFFASTCONV : a Pretty Fast Fast Convolution
|
||||
|
||||
This is basically the implementation of fast convolution,
|
||||
utilizing the FFT (pffft).
|
||||
|
||||
Restrictions:
|
||||
|
||||
- 1D transforms only, with 32-bit single precision.
|
||||
|
||||
- all (float*) pointers in the functions below are expected to
|
||||
have an "simd-compatible" alignment, that is 16 bytes on x86 and
|
||||
powerpc CPUs.
|
||||
|
||||
You can allocate such buffers with the functions
|
||||
pffft_aligned_malloc / pffft_aligned_free (or with stuff like
|
||||
posix_memalign..)
|
||||
|
||||
*/
|
||||
|
||||
#ifndef PFFASTCONV_H
|
||||
#define PFFASTCONV_H
|
||||
|
||||
#include <stddef.h> /* for size_t */
|
||||
#include "pffft.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* opaque struct holding internal stuff
|
||||
this struct can't be shared by many threads as it contains
|
||||
temporary data, computed within the convolution
|
||||
*/
|
||||
typedef struct PFFASTCONV_Setup PFFASTCONV_Setup;
|
||||
|
||||
typedef enum {
|
||||
PFFASTCONV_CPLX_INP_OUT = 1,
|
||||
/* set when input and output is complex,
|
||||
* with real and imag part interleaved in both vectors.
|
||||
* input[] has inputLen complex values: 2 * inputLen floats,
|
||||
* output[] is also written with complex values.
|
||||
* without this flag, the input is interpreted as real vector
|
||||
*/
|
||||
|
||||
PFFASTCONV_CPLX_FILTER = 2,
|
||||
/* set when filterCoeffs is complex,
|
||||
* with real and imag part interleaved.
|
||||
* filterCoeffs[] has filterLen complex values: 2 * filterLen floats
|
||||
* without this flag, the filter is interpreted as real vector
|
||||
* ATTENTION: this is not implemented yet!
|
||||
*/
|
||||
|
||||
PFFASTCONV_DIRECT_INP = 4,
|
||||
/* set PFFASTCONV_DIRECT_INP only, when following conditions are met:
|
||||
* 1- input vecor X must be aligned
|
||||
* 2- (all) inputLen <= ouput blockLen
|
||||
* 3- X must have minimum length of output BlockLen
|
||||
* 4- the additional samples from inputLen .. BlockLen-1
|
||||
* must contain valid small and non-NAN samples (ideally zero)
|
||||
*
|
||||
* this option is ignored when PFFASTCONV_CPLX_INP_OUT is set
|
||||
*/
|
||||
|
||||
PFFASTCONV_DIRECT_OUT = 8,
|
||||
/* set PFFASTCONV_DIRECT_OUT only when following conditions are met:
|
||||
* 1- output vector Y must be aligned
|
||||
* 2- (all) inputLen <= ouput blockLen
|
||||
* 3- Y must have minimum length of output blockLen
|
||||
*
|
||||
* this option is ignored when PFFASTCONV_CPLX_INP_OUT is set
|
||||
*/
|
||||
|
||||
PFFASTCONV_CPLX_SINGLE_FFT = 16,
|
||||
/* hint to process complex data with one single FFT;
|
||||
* default is to use 2 FFTs: one for real part, one for imag part
|
||||
* */
|
||||
|
||||
|
||||
PFFASTCONV_SYMMETRIC = 32,
|
||||
/* just informal, that filter is symmetric .. and filterLen is multiple of 8 */
|
||||
|
||||
PFFASTCONV_CORRELATION = 64,
|
||||
/* filterCoeffs[] of pffastconv_new_setup are for correlation;
|
||||
* thus, do not flip them for the internal fft calculation
|
||||
* - as necessary for the fast convolution */
|
||||
|
||||
} pffastconv_flags_t;
|
||||
|
||||
/*
|
||||
prepare for performing fast convolution(s) of 'filterLen' with input 'blockLen'.
|
||||
The output 'blockLen' might be bigger to allow the fast convolution.
|
||||
|
||||
'flags' are bitmask over the 'pffastconv_flags_t' enum.
|
||||
|
||||
PFFASTCONV_Setup structure can't be shared accross multiple filters
|
||||
or concurrent threads.
|
||||
*/
|
||||
PFFASTCONV_Setup * pffastconv_new_setup( const float * filterCoeffs, int filterLen, int * blockLen, int flags );
|
||||
|
||||
void pffastconv_destroy_setup(PFFASTCONV_Setup *);
|
||||
|
||||
/*
|
||||
Perform the fast convolution.
|
||||
|
||||
'input' and 'output' don't need to be aligned - unless any of
|
||||
PFFASTCONV_DIRECT_INP or PFFASTCONV_DIRECT_OUT is set in 'flags'.
|
||||
|
||||
inputLen > output 'blockLen' (from pffastconv_new_setup()) is allowed.
|
||||
in this case, multiple FFTs are called internally, to process the
|
||||
input[].
|
||||
|
||||
'output' vector must have size >= (inputLen - filterLen + 1)
|
||||
|
||||
set bool option 'applyFlush' to process the full input[].
|
||||
with this option, 'tail samples' of input are also processed.
|
||||
This might be inefficient, because the FFT is called to produce
|
||||
few(er) output samples, than possible.
|
||||
This option is useful to process the last samples of an input (file)
|
||||
or to reduce latency.
|
||||
|
||||
return value is the number of produced samples in output[].
|
||||
the same amount of samples is processed from input[]. to continue
|
||||
processing, the caller must save/move the remaining samples of
|
||||
input[].
|
||||
|
||||
*/
|
||||
int pffastconv_apply(PFFASTCONV_Setup * s, const float *input, int inputLen, float *output, int applyFlush);
|
||||
|
||||
void *pffastconv_malloc(size_t nb_bytes);
|
||||
void pffastconv_free(void *);
|
||||
|
||||
/* return 4 or 1 wether support SSE/Altivec instructions was enabled when building pffft.c */
|
||||
int pffastconv_simd_size();
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* PFFASTCONV_H */
|
||||
Reference in New Issue
Block a user