From 634bb608fe69f6aa5b49f0dd19e5517fe9f317e0 Mon Sep 17 00:00:00 2001 From: aikiriao Date: Sun, 16 Jun 2024 23:55:58 +0900 Subject: [PATCH] add 4-sample procedure to avx2 routine. --- libs/srla_decoder/src/srla_lpc_synthesize.c | 38 ++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/libs/srla_decoder/src/srla_lpc_synthesize.c b/libs/srla_decoder/src/srla_lpc_synthesize.c index 6561cfb..7c9ed5a 100644 --- a/libs/srla_decoder/src/srla_lpc_synthesize.c +++ b/libs/srla_decoder/src/srla_lpc_synthesize.c @@ -121,7 +121,6 @@ void SRLALPC_Synthesize( { int32_t smpl, ord; const int32_t half = 1 << (coef_rshift - 1); /* 固定小数の0.5 */ - int32_t predict; /* 引数チェック */ SRLA_ASSERT(data != NULL); @@ -178,6 +177,43 @@ void SRLALPC_Synthesize( data[smpl + i] -= (predict[i] >> coef_rshift); } } + } else if (coef_order >= 4) { + uint32_t i; + __m128i vcoef[SRLA_MAX_COEFFICIENT_ORDER]; + /* 係数をベクトル化 */ + for (i = 0; i < coef_order; i++) { + vcoef[i] = _mm_set1_epi32(coef[i]); + } + for (; smpl < num_samples - coef_order - 4; smpl += 4) { + /* 4サンプル並列に処理 */ + DECLALIGN(16) int32_t predict[4]; + __m128i vdata; + __m128i vpred = _mm_set1_epi32(half); + for (ord = 0; ord < (int32_t)coef_order - 3 - 4; ord += 4) { + const int32_t *dat = &data[smpl - coef_order + ord]; + vdata = _mm_loadu_epi32(&dat[0]); + vpred = _mm_add_epi32(vpred, _mm_mullo_epi32(vcoef[ord + 0], vdata)); + vdata = _mm_loadu_epi32(&dat[1]); + vpred = _mm_add_epi32(vpred, _mm_mullo_epi32(vcoef[ord + 1], vdata)); + vdata = _mm_loadu_epi32(&dat[2]); + vpred = _mm_add_epi32(vpred, _mm_mullo_epi32(vcoef[ord + 2], vdata)); + vdata = _mm_loadu_epi32(&dat[3]); + vpred = _mm_add_epi32(vpred, _mm_mullo_epi32(vcoef[ord + 3], vdata)); + } + for (; ord < coef_order - 3; ord++) { + vdata = _mm_loadu_epi32(&data[smpl - coef_order + ord]); + vpred = _mm_add_epi32(vpred, _mm_mullo_epi32(vcoef[ord], vdata)); + } + _mm_store_si128(&predict, vpred); + + /* ord = coef_order - 3 */ + for (i = 0; i < 4; i++) { + predict[i] += (coef[ord + 0] * data[smpl - 3 + i + 0]); + predict[i] += (coef[ord + 1] * data[smpl - 3 + i + 1]); + predict[i] += (coef[ord + 2] * data[smpl - 3 + i + 2]); + data[smpl + i] -= (predict[i] >> coef_rshift); + } + } } /* 余ったサンプル分の処理 */