From 5460067b61f86843a0435ebb06a6ebb8223c3dca Mon Sep 17 00:00:00 2001 From: Takamichi Horikawa Date: Fri, 31 Mar 2017 07:15:28 +0000 Subject: opnassg: 3 -> 4 samples per frame to ease SSE data load --- libopna/opnassg-sinc-c.c | 2 +- libopna/opnassg-sinc-neon.s | 17 ++++++++--------- libopna/opnassg.c | 19 ++++++++++--------- libopna/opnassg.h | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/libopna/opnassg-sinc-c.c b/libopna/opnassg-sinc-c.c index bf93039..5f9baee 100644 --- a/libopna/opnassg-sinc-c.c +++ b/libopna/opnassg-sinc-c.c @@ -6,7 +6,7 @@ void opna_ssg_sinc_calc_c(unsigned resampler_index, const int16_t *inbuf, int32_ for (int j = 0; j < OPNA_SSG_SINCTABLELEN; j++) { unsigned sincindex = j; if (!(resampler_index&1)) sincindex += OPNA_SSG_SINCTABLELEN; - chsample += inbuf[(((resampler_index)>>1)+j)*3+c] * opna_ssg_sinctable[sincindex]; + chsample += inbuf[(((resampler_index)>>1)+j)*4+c] * opna_ssg_sinctable[sincindex]; } outbuf[c] = chsample; } diff --git a/libopna/opnassg-sinc-neon.s b/libopna/opnassg-sinc-neon.s index 19dc3c6..1396562 100644 --- a/libopna/opnassg-sinc-neon.s +++ b/libopna/opnassg-sinc-neon.s @@ -22,8 +22,7 @@ opna_ssg_sinc_calc_neon: @ add offset to ssg input buffer address bic r0, #1 - add r0, r0, lsl #1 - add r1, r0 + add r1, r0, lsl #2 @ initialize output register vmov.i64 q12, #0 @@ -39,12 +38,12 @@ opna_ssg_sinc_calc_neon: blo .end @ load SSG channel data - vld3.16 {d0-d2}, [r1]! - vld3.16 {d3-d5}, [r1]! - vld3.16 {d6-d8}, [r1]! - vld3.16 {d9-d11}, [r1]! - vld3.16 {d12-d14}, [r1]! - vld3.16 {d15-d17}, [r1]! + vld4.16 {d0-d3}, [r1]! + vld4.16 {d3-d6}, [r1]! + vld4.16 {d6-d9}, [r1]! + vld4.16 {d9-d12}, [r1]! + vld4.16 {d12-d15}, [r1]! + vld4.16 {d15-d18}, [r1]! @ load sinc data vld1.16 {d18-d21}, [r3]! @@ -73,7 +72,7 @@ opna_ssg_sinc_calc_neon: .end: @ 8 samples left - vld3.16 {d0-d2}, [r1]! + vld4.16 {d0-d3}, [r1]! vld3.16 {d3-d5}, [r1] vld1.16 {d18-d19}, [r3] diff --git a/libopna/opnassg.c b/libopna/opnassg.c index 4a12f76..86d9c9a 100644 --- a/libopna/opnassg.c +++ b/libopna/opnassg.c @@ -208,6 +208,7 @@ int opna_ssg_channel_level(const struct opna_ssg *ssg, int ch) { #define COEFFSH 14 // 3 samples per frame +// output buf: 0 1 2 x 0 1 2 x ... void opna_ssg_generate_raw(struct opna_ssg *ssg, int16_t *buf, int samples) { for (int i = 0; i < samples; i++) { if (((++ssg->noise_counter) >> 1) >= opna_ssg_noise_period(ssg)) { @@ -234,7 +235,7 @@ void opna_ssg_generate_raw(struct opna_ssg *ssg, int16_t *buf, int samples) { //int16_t out = 0; for (int ch = 0; ch < 3; ch++) { - buf[i*3+ch] = 0; + buf[i*4+ch] = 0; if (++ssg->ch[ch].tone_counter >= opna_ssg_tone_period(ssg, ch)) { ssg->ch[ch].tone_counter = 0; ssg->ch[ch].out = !ssg->ch[ch].out; @@ -252,13 +253,13 @@ void opna_ssg_generate_raw(struct opna_ssg *ssg, int16_t *buf, int samples) { previntmp *= COEFF; ssg->prevout[ch] = previntmp - ssg->previn[ch] + ((((int64_t)COEFF)*ssg->prevout[ch]) >> COEFFSH); ssg->previn[ch] = previntmp; - buf[i*3+ch] = ssg->prevout[ch] >> COEFFSH; - //buf[i*3+ch] = voltable[level]/2; + buf[i*4+ch] = ssg->prevout[ch] >> COEFFSH; + //buf[i*4+ch] = voltable[level]/2; #else if (!opna_ssg_tone_silent(ssg, ch)) { int level = opna_ssg_channel_level(ssg, ch); //out += (opna_ssg_tone_out(ssg, ch) ? voltable[level] : -voltable[level]) / 2; - buf[i*3+ch] = (opna_ssg_tone_out(ssg, ch) ? voltable[level] : -voltable[level]) / 4; + buf[i*4+ch] = (opna_ssg_tone_out(ssg, ch) ? voltable[level] : -voltable[level]) / 4; } #endif @@ -288,18 +289,18 @@ void opna_ssg_mix_55466( for (int i = 0; i < samples; i++) { { int ssg_samples = ((resampler->index + 9)>>1) - ((resampler->index)>>1); - int16_t ssgbuf[15]; + int16_t ssgbuf[20]; opna_ssg_generate_raw(ssg, ssgbuf, ssg_samples); for (int j = 0; j < ssg_samples; j++) { - resampler->buf[BUFINDEX(j)*3+0] = ssgbuf[j*3+0]; - resampler->buf[BUFINDEX(j)*3+1] = ssgbuf[j*3+1]; - resampler->buf[BUFINDEX(j)*3+2] = ssgbuf[j*3+2]; + resampler->buf[BUFINDEX(j)*4+0] = ssgbuf[j*4+0]; + resampler->buf[BUFINDEX(j)*4+1] = ssgbuf[j*4+1]; + resampler->buf[BUFINDEX(j)*4+2] = ssgbuf[j*4+2]; } resampler->index += 9; } int32_t sample = 0; resampler->index &= (1u<<(OPNA_SSG_SINCTABLEBIT+1))-1; - memcpy(resampler->buf + OPNA_SSG_SINCTABLELEN*3, resampler->buf, OPNA_SSG_SINCTABLELEN*3*sizeof(*resampler->buf)); + memcpy(resampler->buf + OPNA_SSG_SINCTABLELEN*4, resampler->buf, OPNA_SSG_SINCTABLELEN*4*sizeof(*resampler->buf)); int32_t outbuf[3]; opna_ssg_sinc_calc_func(resampler->index, resampler->buf, outbuf); for (int ch = 0; ch < 3; ch++) { diff --git a/libopna/opnassg.h b/libopna/opnassg.h index 231db4d..223d542 100644 --- a/libopna/opnassg.h +++ b/libopna/opnassg.h @@ -33,7 +33,7 @@ struct opna_ssg { }; struct opna_ssg_resampler { - int16_t buf[OPNA_SSG_SINCTABLELEN*3 * 2]; + int16_t buf[OPNA_SSG_SINCTABLELEN*4 * 2]; unsigned index; }; -- cgit v1.2.3