323 lines
8.7 KiB
C++
323 lines
8.7 KiB
C++
|
|
#include "pf_conv.h"
|
|
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
|
|
#include <algorithm>
|
|
|
|
#if 0
|
|
#include <stdio.h>
|
|
|
|
#define DPRINT(...) fprintf(stderr, __VA_ARGS__)
|
|
|
|
#else
|
|
#define DPRINT(...) do { } while (0)
|
|
#endif
|
|
|
|
|
|
#ifdef HAVE_MIPP
|
|
#include <mipp.h>
|
|
#endif
|
|
|
|
|
|
#ifndef CONV_ARCH_POST
|
|
#error CONV_ARCH_POST not defined
|
|
#endif
|
|
|
|
#define PP_STRINGIFY(X) #X
|
|
#define PP_TOSTRING(X) PP_STRINGIFY(X)
|
|
#define PP_CONCAT_IMPL(x, y) x##y
|
|
#define PP_CONCAT(x, y) PP_CONCAT_IMPL( x, y )
|
|
|
|
#define ARCHFUNCNAME(X) PP_CONCAT(X##_,CONV_ARCH_POST)
|
|
|
|
|
|
const char * ARCHFUNCNAME(id)()
|
|
{
|
|
return PP_TOSTRING(CONV_ARCH_POST);
|
|
}
|
|
|
|
|
|
int ARCHFUNCNAME(conv_float_simd_size)()
|
|
{
|
|
#if defined(MIPP_NO_INTRINSICS) || !defined(HAVE_MIPP)
|
|
// have a completely MIPP independent implementation
|
|
return 1;
|
|
#else
|
|
return mipp::N<float>();
|
|
#endif
|
|
}
|
|
|
|
|
|
void ARCHFUNCNAME(conv_float_move_rest)(float * RESTRICT s, conv_buffer_state * RESTRICT state)
|
|
{
|
|
int R = state->size - state->offset; // this many samples from prev conv_float were not processed
|
|
if (R > 0)
|
|
{
|
|
// memmove(s, &s[state->offset], R * sizeof(s[0])); // move them to the begin
|
|
std::copy(&s[state->offset], &s[state->size], s);
|
|
}
|
|
else
|
|
R = 0;
|
|
state->offset = 0; // data - to be processed - is at begin
|
|
state->size = R; // this many unprocessed samples
|
|
}
|
|
|
|
|
|
void ARCHFUNCNAME(conv_cplx_move_rest)(complexf * RESTRICT s, conv_buffer_state * RESTRICT state)
|
|
{
|
|
int R = state->size - state->offset; // this many samples from prev conv_float were not processed
|
|
if (R > 0)
|
|
{
|
|
// memmove(s, &s[state->offset], R * sizeof(s[0])); // move them to the begin
|
|
std::copy(&s[state->offset], &s[state->size], s);
|
|
}
|
|
else
|
|
R = 0;
|
|
state->offset = 0; // data - to be processed - is at begin
|
|
state->size = R; // this many unprocessed samples
|
|
}
|
|
|
|
|
|
#if defined(MIPP_NO_INTRINSICS)
|
|
// have a completely MIPP independent implementation
|
|
// #error missing HAVE_MIPP: there is no MIPP-independent implementation
|
|
|
|
int ARCHFUNCNAME(conv_float_inplace)(
|
|
float * RESTRICT s, conv_buffer_state * RESTRICT state,
|
|
const float * RESTRICT filter, const int sz_filter
|
|
)
|
|
{
|
|
const int off0 = state->offset;
|
|
const int sz_s = state->size;
|
|
int offset;
|
|
|
|
for ( offset = off0; offset + sz_filter <= sz_s; ++offset)
|
|
{
|
|
float accu = 0.0F;
|
|
for (int k = 0; k < sz_filter; ++k)
|
|
accu += s[offset+k] * filter[k];
|
|
s[offset] = accu;
|
|
}
|
|
|
|
state->offset = offset;
|
|
return offset - off0;
|
|
}
|
|
|
|
|
|
int ARCHFUNCNAME(conv_float_oop)(
|
|
const float * RESTRICT s, conv_buffer_state * RESTRICT state,
|
|
const float * RESTRICT filter, const int sz_filter,
|
|
float * RESTRICT y
|
|
)
|
|
{
|
|
const int off0 = state->offset;
|
|
const int sz_s = state->size;
|
|
int offset;
|
|
|
|
for ( offset = off0; offset + sz_filter <= sz_s; ++offset)
|
|
{
|
|
float accu = 0.0F;
|
|
for (int k = 0; k < sz_filter; ++k)
|
|
accu += s[offset+k] * filter[k];
|
|
y[offset] = accu;
|
|
}
|
|
|
|
state->offset = offset;
|
|
return offset - off0;
|
|
}
|
|
|
|
|
|
int ARCHFUNCNAME(conv_cplx_float_oop)(
|
|
const complexf * RESTRICT s_cplx, conv_buffer_state * RESTRICT state,
|
|
const float * RESTRICT filter, const int sz_filter,
|
|
complexf * RESTRICT y_cplx
|
|
)
|
|
{
|
|
const int off0 = state->offset;
|
|
const int sz_s = state->size;
|
|
const int sz_f = sz_filter;
|
|
int offset;
|
|
|
|
for ( offset = off0; offset + sz_f <= sz_s; ++offset)
|
|
{
|
|
float accu_re = 0.0F;
|
|
float accu_im = 0.0F;
|
|
for (int k = 0; k < sz_filter; ++k)
|
|
{
|
|
accu_re = s_cplx[offset+k].i * filter[k]; // accu += rS * rH;
|
|
accu_im = s_cplx[offset+k].q * filter[k]; // accu += rS * rH;
|
|
}
|
|
y_cplx[offset].i = accu_re; // == hadd() == sum of real parts
|
|
y_cplx[offset].q = accu_im; // == hadd() == sum of imag parts
|
|
}
|
|
|
|
state->offset = offset;
|
|
return offset - off0;
|
|
}
|
|
|
|
|
|
#elif defined(HAVE_MIPP)
|
|
|
|
|
|
int ARCHFUNCNAME(conv_float_inplace)(
|
|
float * RESTRICT s, conv_buffer_state * RESTRICT state,
|
|
const float * RESTRICT filter, const int sz_filter
|
|
)
|
|
{
|
|
assert( (sz_filter % mipp::N<float>()) == 0 ); // size of filter must be divisible by conv_float_simd_size()
|
|
|
|
mipp::Reg<float> accu, rS, rH;
|
|
const int off0 = state->offset;
|
|
const int sz_s = state->size;
|
|
int offset;
|
|
|
|
for ( offset = off0; offset + sz_filter <= sz_s; ++offset)
|
|
{
|
|
accu.set0();
|
|
for (int k = 0; k < sz_filter; k += mipp::N<float>())
|
|
{
|
|
rS.load(&s[offset+k]);
|
|
rH.load(&filter[k]);
|
|
accu = mipp::fmadd(rS, rH, accu); // accu += rS * rH;
|
|
}
|
|
s[offset] = accu.sum(); // == hadd()
|
|
}
|
|
|
|
state->offset = offset;
|
|
return offset - off0;
|
|
}
|
|
|
|
|
|
int ARCHFUNCNAME(conv_float_oop)(
|
|
const float * RESTRICT s, conv_buffer_state * RESTRICT state,
|
|
const float * RESTRICT filter, const int sz_filter,
|
|
float * RESTRICT y
|
|
)
|
|
{
|
|
assert( (sz_filter % mipp::N<float>()) == 0 ); // size of filter must be divisible by conv_float_simd_size()
|
|
|
|
mipp::Reg<float> accu, rS, rH;
|
|
const int off0 = state->offset;
|
|
const int sz_s = state->size;
|
|
int offset;
|
|
|
|
for ( offset = off0; offset + sz_filter <= sz_s; ++offset)
|
|
{
|
|
accu.set0();
|
|
for (int k = 0; k < sz_filter; k += mipp::N<float>())
|
|
{
|
|
rS.loadu(&s[offset+k]);
|
|
rH.load(&filter[k]);
|
|
accu = mipp::fmadd(rS, rH, accu); // accu += rS * rH;
|
|
}
|
|
y[offset] = accu.sum(); // == hadd()
|
|
}
|
|
|
|
state->offset = offset;
|
|
return offset - off0;
|
|
}
|
|
|
|
|
|
int ARCHFUNCNAME(conv_cplx_float_oop)(
|
|
const complexf * RESTRICT s_cplx, conv_buffer_state * RESTRICT state,
|
|
const float * RESTRICT filter, const int sz_filter,
|
|
complexf * RESTRICT y_cplx
|
|
)
|
|
{
|
|
assert( (sz_filter % mipp::N<float>()) == 0 ); // size of filter must be divisible by conv_float_simd_size()
|
|
const float * RESTRICT s = &(s_cplx[0].i);
|
|
float * RESTRICT y = &(y_cplx[0].i);
|
|
|
|
mipp::Regx2<float> accu_x2, rS_x2, H_x2;
|
|
const int off0 = 2 * state->offset;
|
|
const int sz_s = 2 * state->size;
|
|
const int sz_f2 = 2 * sz_filter;
|
|
int offset;
|
|
|
|
for ( offset = off0; offset + sz_f2 <= sz_s; offset += 2)
|
|
{
|
|
accu_x2.val[0].set0();
|
|
accu_x2.val[1].set0();
|
|
for (int k = 0; k < sz_filter; k += mipp::N<float>())
|
|
{
|
|
mipp::Reg<float> rH;
|
|
rS_x2.loadu(&s[offset+2*k]);
|
|
rH.load(&filter[k]);
|
|
H_x2 = mipp::interleave<float>(rH, rH);
|
|
accu_x2.val[0] = mipp::fmadd(rS_x2.val[0], H_x2.val[0], accu_x2.val[0]); // accu += rS * rH;
|
|
accu_x2.val[1] = mipp::fmadd(rS_x2.val[1], H_x2.val[1], accu_x2.val[1]); // accu += rS * rH;
|
|
}
|
|
H_x2 = mipp::deinterleave(accu_x2);
|
|
y[offset] = H_x2.val[0].sum(); // == hadd() == sum of real parts
|
|
y[offset+1] = H_x2.val[1].sum(); // == hadd() == sum of imag parts
|
|
}
|
|
|
|
state->offset = offset /2;
|
|
return (offset - off0) / 2;
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
static const conv_f_ptrs conv_ptrs =
|
|
{
|
|
PP_TOSTRING(CONV_ARCH_POST),
|
|
#ifndef MIPP_NO_INTRINSICS
|
|
1,
|
|
#else
|
|
0,
|
|
#endif
|
|
|
|
ARCHFUNCNAME(id),
|
|
ARCHFUNCNAME(conv_float_simd_size),
|
|
|
|
#if defined(MIPP_NO_INTRINSICS) || defined(HAVE_MIPP)
|
|
ARCHFUNCNAME(conv_float_move_rest),
|
|
ARCHFUNCNAME(conv_float_inplace),
|
|
ARCHFUNCNAME(conv_float_oop),
|
|
|
|
ARCHFUNCNAME(conv_cplx_move_rest),
|
|
ARCHFUNCNAME(conv_cplx_float_oop)
|
|
#else
|
|
nullptr,
|
|
nullptr,
|
|
nullptr,
|
|
|
|
nullptr,
|
|
nullptr
|
|
#endif
|
|
};
|
|
|
|
|
|
const conv_f_ptrs* ARCHFUNCNAME(conv_ptrs)()
|
|
{
|
|
DPRINT("arch pointer for '%s':\n", conv_ptrs.id);
|
|
if (!strcmp(conv_ptrs.id, "none"))
|
|
return &conv_ptrs;
|
|
|
|
#if defined(MIPP_NO_INTRINSICS)
|
|
DPRINT("arch pointer for '%s' - BUT defined(MIPP_NO_INTRINSICS)\n", conv_ptrs.id);
|
|
return &conv_ptrs;
|
|
#elif defined(HAVE_MIPP)
|
|
DPRINT("arch pointer for '%s' - defined(HAVE_MIPP)\n", conv_ptrs.id);
|
|
DPRINT("'%s': conv_ptrs.using_mipp %d\n", conv_ptrs.id, conv_ptrs.using_mipp);
|
|
DPRINT("'%s': simd_size() %d\n", conv_ptrs.id, conv_ptrs.fp_conv_float_simd_size());
|
|
if (conv_ptrs.using_mipp && conv_ptrs.fp_conv_float_simd_size() > 1)
|
|
return &conv_ptrs;
|
|
else
|
|
DPRINT("arch pointer for '%s': HAVE_MIPP BUT using_mipp %d, float_simd_size %d\n", conv_ptrs.id, conv_ptrs.using_mipp, conv_ptrs.fp_conv_float_simd_size());
|
|
#else
|
|
DPRINT("arch pointer for '%s': neither MIPP_NO_INTRINSICS nor HAVE_MIPP\n", conv_ptrs.id);
|
|
#endif
|
|
DPRINT("arch pointer for '%s' => nullptr\n", conv_ptrs.id);
|
|
return nullptr;
|
|
}
|
|
|
|
#if defined(__cplusplus) && (__cplusplus >= 201703L)
|
|
[[maybe_unused]]
|
|
#endif
|
|
static f_conv_ptrs test_f_ptrs = ARCHFUNCNAME(conv_ptrs);
|
|
|