Skip to content

Commit

Permalink
add 4-sample procedure to avx2 routine.
Browse files Browse the repository at this point in the history
  • Loading branch information
aikiriao committed Jun 16, 2024
1 parent e28cc54 commit 634bb60
Showing 1 changed file with 37 additions and 1 deletion.
38 changes: 37 additions & 1 deletion libs/srla_decoder/src/srla_lpc_synthesize.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ void SRLALPC_Synthesize(
{
int32_t smpl, ord;
const int32_t half = 1 << (coef_rshift - 1); /* 固定小数の0.5 */
int32_t predict;

/* 引数チェック */
SRLA_ASSERT(data != NULL);
Expand Down Expand Up @@ -178,6 +177,43 @@ void SRLALPC_Synthesize(
data[smpl + i] -= (predict[i] >> coef_rshift);
}
}
} else if (coef_order >= 4) {
uint32_t i;
__m128i vcoef[SRLA_MAX_COEFFICIENT_ORDER];
/* 係数をベクトル化 */
for (i = 0; i < coef_order; i++) {
vcoef[i] = _mm_set1_epi32(coef[i]);
}
for (; smpl < num_samples - coef_order - 4; smpl += 4) {
/* 4サンプル並列に処理 */
DECLALIGN(16) int32_t predict[4];
__m128i vdata;
__m128i vpred = _mm_set1_epi32(half);
for (ord = 0; ord < (int32_t)coef_order - 3 - 4; ord += 4) {
const int32_t *dat = &data[smpl - coef_order + ord];
vdata = _mm_loadu_epi32(&dat[0]);
vpred = _mm_add_epi32(vpred, _mm_mullo_epi32(vcoef[ord + 0], vdata));
vdata = _mm_loadu_epi32(&dat[1]);
vpred = _mm_add_epi32(vpred, _mm_mullo_epi32(vcoef[ord + 1], vdata));
vdata = _mm_loadu_epi32(&dat[2]);
vpred = _mm_add_epi32(vpred, _mm_mullo_epi32(vcoef[ord + 2], vdata));
vdata = _mm_loadu_epi32(&dat[3]);
vpred = _mm_add_epi32(vpred, _mm_mullo_epi32(vcoef[ord + 3], vdata));
}
for (; ord < coef_order - 3; ord++) {
vdata = _mm_loadu_epi32(&data[smpl - coef_order + ord]);
vpred = _mm_add_epi32(vpred, _mm_mullo_epi32(vcoef[ord], vdata));
}
_mm_store_si128(&predict, vpred);

/* ord = coef_order - 3 */
for (i = 0; i < 4; i++) {
predict[i] += (coef[ord + 0] * data[smpl - 3 + i + 0]);
predict[i] += (coef[ord + 1] * data[smpl - 3 + i + 1]);
predict[i] += (coef[ord + 2] * data[smpl - 3 + i + 2]);
data[smpl + i] -= (predict[i] >> coef_rshift);
}
}
}

/* 余ったサンプル分の処理 */
Expand Down

0 comments on commit 634bb60

Please sign in to comment.