aboutsummaryrefslogtreecommitdiff
path: root/libopna
diff options
context:
space:
mode:
authorTakamichi Horikawa <takamichiho@gmail.com>2017-03-31 07:15:28 +0000
committerTakamichi Horikawa <takamichiho@gmail.com>2017-03-31 07:15:28 +0000
commit5460067b61f86843a0435ebb06a6ebb8223c3dca (patch)
treef10ab9c06edc70c72c141e9d31035f47a776c7d1 /libopna
parentf47eba7d7d4c6a1d9501e027b63bbab04bb7d417 (diff)
opnassg: 3 -> 4 samples per frame to ease SSE data load
Diffstat (limited to 'libopna')
-rw-r--r--libopna/opnassg-sinc-c.c2
-rw-r--r--libopna/opnassg-sinc-neon.s17
-rw-r--r--libopna/opnassg.c19
-rw-r--r--libopna/opnassg.h2
4 files changed, 20 insertions, 20 deletions
diff --git a/libopna/opnassg-sinc-c.c b/libopna/opnassg-sinc-c.c
index bf93039..5f9baee 100644
--- a/libopna/opnassg-sinc-c.c
+++ b/libopna/opnassg-sinc-c.c
@@ -6,7 +6,7 @@ void opna_ssg_sinc_calc_c(unsigned resampler_index, const int16_t *inbuf, int32_
for (int j = 0; j < OPNA_SSG_SINCTABLELEN; j++) {
unsigned sincindex = j;
if (!(resampler_index&1)) sincindex += OPNA_SSG_SINCTABLELEN;
- chsample += inbuf[(((resampler_index)>>1)+j)*3+c] * opna_ssg_sinctable[sincindex];
+ chsample += inbuf[(((resampler_index)>>1)+j)*4+c] * opna_ssg_sinctable[sincindex];
}
outbuf[c] = chsample;
}
diff --git a/libopna/opnassg-sinc-neon.s b/libopna/opnassg-sinc-neon.s
index 19dc3c6..1396562 100644
--- a/libopna/opnassg-sinc-neon.s
+++ b/libopna/opnassg-sinc-neon.s
@@ -22,8 +22,7 @@ opna_ssg_sinc_calc_neon:
@ add offset to ssg input buffer address
bic r0, #1
- add r0, r0, lsl #1
- add r1, r0
+ add r1, r0, lsl #2
@ initialize output register
vmov.i64 q12, #0
@@ -39,12 +38,12 @@ opna_ssg_sinc_calc_neon:
blo .end
@ load SSG channel data
- vld3.16 {d0-d2}, [r1]!
- vld3.16 {d3-d5}, [r1]!
- vld3.16 {d6-d8}, [r1]!
- vld3.16 {d9-d11}, [r1]!
- vld3.16 {d12-d14}, [r1]!
- vld3.16 {d15-d17}, [r1]!
+ vld4.16 {d0-d3}, [r1]!
+ vld4.16 {d3-d6}, [r1]!
+ vld4.16 {d6-d9}, [r1]!
+ vld4.16 {d9-d12}, [r1]!
+ vld4.16 {d12-d15}, [r1]!
+ vld4.16 {d15-d18}, [r1]!
@ load sinc data
vld1.16 {d18-d21}, [r3]!
@@ -73,7 +72,7 @@ opna_ssg_sinc_calc_neon:
.end:
@ 8 samples left
- vld3.16 {d0-d2}, [r1]!
+ vld4.16 {d0-d3}, [r1]!
vld3.16 {d3-d5}, [r1]
vld1.16 {d18-d19}, [r3]
diff --git a/libopna/opnassg.c b/libopna/opnassg.c
index 4a12f76..86d9c9a 100644
--- a/libopna/opnassg.c
+++ b/libopna/opnassg.c
@@ -208,6 +208,7 @@ int opna_ssg_channel_level(const struct opna_ssg *ssg, int ch) {
#define COEFFSH 14
// 3 samples per frame
+// output buf: 0 1 2 x 0 1 2 x ...
void opna_ssg_generate_raw(struct opna_ssg *ssg, int16_t *buf, int samples) {
for (int i = 0; i < samples; i++) {
if (((++ssg->noise_counter) >> 1) >= opna_ssg_noise_period(ssg)) {
@@ -234,7 +235,7 @@ void opna_ssg_generate_raw(struct opna_ssg *ssg, int16_t *buf, int samples) {
//int16_t out = 0;
for (int ch = 0; ch < 3; ch++) {
- buf[i*3+ch] = 0;
+ buf[i*4+ch] = 0;
if (++ssg->ch[ch].tone_counter >= opna_ssg_tone_period(ssg, ch)) {
ssg->ch[ch].tone_counter = 0;
ssg->ch[ch].out = !ssg->ch[ch].out;
@@ -252,13 +253,13 @@ void opna_ssg_generate_raw(struct opna_ssg *ssg, int16_t *buf, int samples) {
previntmp *= COEFF;
ssg->prevout[ch] = previntmp - ssg->previn[ch] + ((((int64_t)COEFF)*ssg->prevout[ch]) >> COEFFSH);
ssg->previn[ch] = previntmp;
- buf[i*3+ch] = ssg->prevout[ch] >> COEFFSH;
- //buf[i*3+ch] = voltable[level]/2;
+ buf[i*4+ch] = ssg->prevout[ch] >> COEFFSH;
+ //buf[i*4+ch] = voltable[level]/2;
#else
if (!opna_ssg_tone_silent(ssg, ch)) {
int level = opna_ssg_channel_level(ssg, ch);
//out += (opna_ssg_tone_out(ssg, ch) ? voltable[level] : -voltable[level]) / 2;
- buf[i*3+ch] = (opna_ssg_tone_out(ssg, ch) ? voltable[level] : -voltable[level]) / 4;
+ buf[i*4+ch] = (opna_ssg_tone_out(ssg, ch) ? voltable[level] : -voltable[level]) / 4;
}
#endif
@@ -288,18 +289,18 @@ void opna_ssg_mix_55466(
for (int i = 0; i < samples; i++) {
{
int ssg_samples = ((resampler->index + 9)>>1) - ((resampler->index)>>1);
- int16_t ssgbuf[15];
+ int16_t ssgbuf[20];
opna_ssg_generate_raw(ssg, ssgbuf, ssg_samples);
for (int j = 0; j < ssg_samples; j++) {
- resampler->buf[BUFINDEX(j)*3+0] = ssgbuf[j*3+0];
- resampler->buf[BUFINDEX(j)*3+1] = ssgbuf[j*3+1];
- resampler->buf[BUFINDEX(j)*3+2] = ssgbuf[j*3+2];
+ resampler->buf[BUFINDEX(j)*4+0] = ssgbuf[j*4+0];
+ resampler->buf[BUFINDEX(j)*4+1] = ssgbuf[j*4+1];
+ resampler->buf[BUFINDEX(j)*4+2] = ssgbuf[j*4+2];
}
resampler->index += 9;
}
int32_t sample = 0;
resampler->index &= (1u<<(OPNA_SSG_SINCTABLEBIT+1))-1;
- memcpy(resampler->buf + OPNA_SSG_SINCTABLELEN*3, resampler->buf, OPNA_SSG_SINCTABLELEN*3*sizeof(*resampler->buf));
+ memcpy(resampler->buf + OPNA_SSG_SINCTABLELEN*4, resampler->buf, OPNA_SSG_SINCTABLELEN*4*sizeof(*resampler->buf));
int32_t outbuf[3];
opna_ssg_sinc_calc_func(resampler->index, resampler->buf, outbuf);
for (int ch = 0; ch < 3; ch++) {
diff --git a/libopna/opnassg.h b/libopna/opnassg.h
index 231db4d..223d542 100644
--- a/libopna/opnassg.h
+++ b/libopna/opnassg.h
@@ -33,7 +33,7 @@ struct opna_ssg {
};
struct opna_ssg_resampler {
- int16_t buf[OPNA_SSG_SINCTABLELEN*3 * 2];
+ int16_t buf[OPNA_SSG_SINCTABLELEN*4 * 2];
unsigned index;
};