diff options
Diffstat (limited to 'libopna')
| -rw-r--r-- | libopna/opna.c | 5 | ||||
| -rw-r--r-- | libopna/opnassg-sinc-c.c | 13 | ||||
| -rw-r--r-- | libopna/opnassg-sinc-neon.s | 118 | ||||
| -rw-r--r-- | libopna/opnassg.c | 81 | ||||
| -rw-r--r-- | libopna/opnassg.h | 13 | 
5 files changed, 210 insertions, 20 deletions
| diff --git a/libopna/opna.c b/libopna/opna.c index 1b0aa6c..567c913 100644 --- a/libopna/opna.c +++ b/libopna/opna.c @@ -37,8 +37,9 @@ void opna_mix_oscillo(struct opna *opna, int16_t *buf, unsigned samples, struct      }    }    unsigned offset = OSCILLO_SAMPLE_COUNT - samples; -  opna_fm_mix(&opna->fm, buf, samples, &oscillo[0], offset); -  opna_ssg_mix_55466(&opna->ssg, &opna->resampler, buf, samples, &oscillo[6], offset); +  opna_fm_mix(&opna->fm, buf, samples, oscillo ? &oscillo[0] : 0, offset); +  opna_ssg_mix_55466(&opna->ssg, &opna->resampler, buf, samples, +                     oscillo ? &oscillo[6] : 0, offset);    opna_drum_mix(&opna->drum, buf, samples);    opna_adpcm_mix(&opna->adpcm, buf, samples);  } diff --git a/libopna/opnassg-sinc-c.c b/libopna/opnassg-sinc-c.c new file mode 100644 index 0000000..bf93039 --- /dev/null +++ b/libopna/opnassg-sinc-c.c @@ -0,0 +1,13 @@ +#include "libopna/opnassg.h" + +void opna_ssg_sinc_calc_c(unsigned resampler_index, const int16_t *inbuf, int32_t *outbuf) { +  for (int c = 0; c < 3; c++) { +    int32_t chsample = 0; +    for (int j = 0; j < OPNA_SSG_SINCTABLELEN; j++) { +      unsigned sincindex = j; +      if (!(resampler_index&1)) sincindex += OPNA_SSG_SINCTABLELEN; +      chsample += inbuf[(((resampler_index)>>1)+j)*3+c] * opna_ssg_sinctable[sincindex]; +    } +    outbuf[c] = chsample; +  } +} diff --git a/libopna/opnassg-sinc-neon.s b/libopna/opnassg-sinc-neon.s new file mode 100644 index 0000000..19dc3c6 --- /dev/null +++ b/libopna/opnassg-sinc-neon.s @@ -0,0 +1,118 @@ +@ neon register map: +@  0,  3,  6,  9, 12, 15 ssg1 +@  1,  4,  7, 10, 13, 16 ssg2 +@  2,  5,  8, 11, 14, 17 ssg3 +@ 18, 19, 20, 21, 22, 23 sinc +@ 24-25 (q12): ssg1 out +@ 26-27 (q13): ssg2 out +@ 28-29 (q14): ssg3 out + +.global opna_ssg_sinc_calc_neon +@ r0: resampler_index +@ r1: const int16_t *inbuf +@ r2: int32_t *outbuf + +opna_ssg_sinc_calc_neon: +  push {r4-r10,lr} +@ sinc table to r3 +  movw r3, #:lower16:opna_ssg_sinctable +  movt r3, #:upper16:opna_ssg_sinctable +  tst r0, #1 +  addeq r3, #256 + +@ add offset to ssg input buffer address +  bic r0, #1 +  add r0, r0, lsl #1 +  add r1, r0 + +@ initialize output register +  vmov.i64 q12, #0 +  vmov.i64 q13, #0 +  vmov.i64 q14, #0 + +@ sinc sample length +  mov r0, #128 + +.loop: +@ +  subs r0, #24 +  blo .end + +@ load SSG channel data +  vld3.16 {d0-d2}, [r1]! +  vld3.16 {d3-d5}, [r1]! +  vld3.16 {d6-d8}, [r1]! +  vld3.16 {d9-d11}, [r1]! +  vld3.16 {d12-d14}, [r1]! +  vld3.16 {d15-d17}, [r1]! + +@ load sinc data +  vld1.16 {d18-d21}, [r3]! +  vld1.16 {d22-d23}, [r3]! + +@ multiply and accumulate +  vmlal.s16 q12, d0,  d18 +  vmlal.s16 q13, d1,  d18 +  vmlal.s16 q14, d2,  d18 +  vmlal.s16 q12, d3,  d19 +  vmlal.s16 q13, d4,  d19 +  vmlal.s16 q14, d5,  d19 +  vmlal.s16 q12, d6,  d20 +  vmlal.s16 q13, d7,  d20 +  vmlal.s16 q14, d8,  d20 +  vmlal.s16 q12, d9,  d21 +  vmlal.s16 q13, d10, d21 +  vmlal.s16 q14, d11, d21 +  vmlal.s16 q12, d12, d22 +  vmlal.s16 q13, d13, d22 +  vmlal.s16 q14, d14, d22 +  vmlal.s16 q12, d15, d23 +  vmlal.s16 q13, d16, d23 +  vmlal.s16 q14, d17, d23 +  b .loop + +.end: +@ 8 samples left +  vld3.16 {d0-d2}, [r1]! +  vld3.16 {d3-d5}, [r1] +  vld1.16 {d18-d19}, [r3] + +  vmlal.s16 q12, d0, d18 +  vmlal.s16 q13, d1, d18 +  vmlal.s16 q14, d2, d18 +  vmlal.s16 q12, d3, d19 +  vmlal.s16 q13, d4, d19 +  vmlal.s16 q14, d5, d19 + +@ extract data from result SIMD registers + +  vmov.32 r0,  d24[0] +  vmov.32 r1,  d24[1] +  vmov.32 r3,  d25[0] +  vmov.32 r12, d25[1] + +  vmov.32 r14, d26[0] +  vmov.32 r4,  d26[1] +  vmov.32 r5,  d27[0] +  vmov.32 r6,  d27[1] + +  vmov.32 r7,  d28[0] +  vmov.32 r8,  d28[1] +  vmov.32 r9,  d29[0] +  vmov.32 r10, d29[1] + +  add r0, r1 +  add r3, r12 + +  add r14, r4 +  add r5, r6 + +  add r7, r8 +  add r9, r10 + +  add r4, r0, r3 +  add r5, r14 +  add r6, r7, r9 + +  stmia r2, {r4-r6} +  pop {r4-r10,pc} diff --git a/libopna/opnassg.c b/libopna/opnassg.c index ec03437..4a12f76 100644 --- a/libopna/opnassg.c +++ b/libopna/opnassg.c @@ -1,5 +1,6 @@  #include "opnassg.h"  #include "oscillo/oscillo.h" +#include <string.h>  /*  static const float voltable[32] = {    0.0f,           0.0f,           0x1.ae89f9p-8f, 0x1.000000p-7f, @@ -27,9 +28,6 @@ static const int16_t voltable[32] = {     6494,  7723,  9185, 10922  }; -#define SINCTABLEBIT 7 -#define SINCTABLELEN (1<<SINCTABLEBIT) -  // GNU Octave  // Fc = 7987200  // Ff = Fc/144 @@ -39,7 +37,8 @@ static const int16_t voltable[32] = {  // B = 128 * O / 2  // FILTER=sinc(linspace(-127.5,127.5,256)*2/9/2).*rotdim(kaiser(256,B))  // FILTERI=round(FILTER(1:128).*32768) -static const int16_t sinctable[SINCTABLELEN] = { +#if 0 +const int16_t opna_ssg_sinctable[OPNA_SSG_SINCTABLELEN*2] = {        1,     0,    -1,    -2,    -3,    -5,    -6,    -6,       -6,    -5,    -2,     2,     7,    11,    16,    19,       20,    18,    13,     5,    -5,   -17,   -29,   -38, @@ -56,8 +55,61 @@ static const int16_t sinctable[SINCTABLELEN] = {     3306,  3714,  3690,  3185,  2206,   815,  -868, -2673,    -4391, -5798, -6670, -6809, -6067, -4359, -1681,  1886,     6178, 10957, 15928, 20765, 25133, 28724, 31275, 32600, +  32600, 31275, 28724, 25133, 20765, 15928, 10957,  6178, +   1886, -1681, -4359, -6067, -6809, -6670, -5798, -4391, +  -2673,  -868,   815,  2206,  3185,  3690,  3714,  3306, +   2557,  1585,   523,  -498, -1365, -1994, -2333, -2369, +  -2125, -1655, -1032,  -343,   328,   902,  1322,  1552, +   1580,  1421,  1108,   692,   230,  -220,  -607,  -889, +  -1043, -1062,  -954,  -744,  -464,  -154,   147,   405, +    593,   694,   705,   632,   491,   306,   101,   -96, +   -264,  -385,  -450,  -455,  -406,  -315,  -195,   -64, +     61,   166,   241,   280,   282,   251,   193,   119, +     39,   -37,  -100,  -144,  -166,  -166,  -146,  -112, +    -68,   -22,    21,    56,    80,    91,    90,    79, +     60,    36,    12,   -11,   -29,   -40,   -45,   -44, +    -38,   -29,   -17,    -5,     5,    13,    18,    20, +     19,    16,    11,     7,     2,    -2,    -5,    -6, +     -6,    -6,    -5,    -3,    -2,    -1,     0,     1, +}; +#endif +const int16_t opna_ssg_sinctable[OPNA_SSG_SINCTABLELEN*2] = { +      1,    -1,    -3,    -6,    -6,    -2,     7,    16, +     20,    13,    -5,   -29,   -44,   -40,   -11,    36, +     79,    91,    56,   -22,  -112,  -166,  -144,   -37, +    119,   251,   280,   166,   -64,  -315,  -455,  -385, +    -96,   306,   632,   694,   405,  -154,  -744, -1062, +   -889,  -220,   692,  1421,  1552,   902,  -343, -1655, +  -2369, -1994,  -498,  1585,  3306,  3690,  2206,  -868, +  -4391, -6670, -6067, -1681,  6178, 15928, 25133, 31275, +  32600, 28724, 20765, 10957,  1886, -4359, -6809, -5798, +  -2673,   815,  3185,  3714,  2557,   523, -1365, -2333, +  -2125, -1032,   328,  1322,  1580,  1108,   230,  -607, +  -1043,  -954,  -464,   147,   593,   705,   491,   101, +   -264,  -450,  -406,  -195,    61,   241,   282,   193, +     39,  -100,  -166,  -146,   -68,    21,    80,    90, +     60,    12,   -29,   -45,   -38,   -17,     5,    18, +     19,    11,     2,    -5,    -6,    -5,    -2,     0, +      0,    -2,    -5,    -6,    -5,     2,    11,    19, +     18,     5,   -17,   -38,   -45,   -29,    12,    60, +     90,    80,    21,   -68,  -146,  -166,  -100,    39, +    193,   282,   241,    61,  -195,  -406,  -450,  -264, +    101,   491,   705,   593,   147,  -464,  -954, -1043, +   -607,   230,  1108,  1580,  1322,   328, -1032, -2125, +  -2333, -1365,   523,  2557,  3714,  3185,   815, -2673, +  -5798, -6809, -4359,  1886, 10957, 20765, 28724, 32600, +  31275, 25133, 15928,  6178, -1681, -6067, -6670, -4391, +   -868,  2206,  3690,  3306,  1585,  -498, -1994, -2369, +  -1655,  -343,   902,  1552,  1421,   692,  -220,  -889, +  -1062,  -744,  -154,   405,   694,   632,   306,   -96, +   -385,  -455,  -315,   -64,   166,   280,   251,   119, +    -37,  -144,  -166,  -112,   -22,    56,    91,    79, +     36,   -11,   -40,   -44,   -29,    -5,    13,    20, +     16,     7,    -2,    -6,    -6,    -3,    -1,     1,  }; +opna_ssg_sinc_calc_func_type opna_ssg_sinc_calc_func = opna_ssg_sinc_calc_c; +  void opna_ssg_reset(struct opna_ssg *ssg) {    for (int i = 0; i < 3; i++) {      ssg->ch[i].tone_counter = 0; @@ -78,7 +130,7 @@ void opna_ssg_reset(struct opna_ssg *ssg) {  }  void opna_ssg_resampler_reset(struct opna_ssg_resampler *resampler) { -  for (int i = 0; i < SINCTABLELEN; i++) { +  for (int i = 0; i < OPNA_SSG_SINCTABLELEN; i++) {      resampler->buf[i] = 0;    }    resampler->index = 0; @@ -215,7 +267,7 @@ void opna_ssg_generate_raw(struct opna_ssg *ssg, int16_t *buf, int samples) {    }  } -#define BUFINDEX(n) ((((resampler->index)>>1)+n)&(SINCTABLELEN-1)) +#define BUFINDEX(n) ((((resampler->index)>>1)+n)&(OPNA_SSG_SINCTABLELEN-1))  void opna_ssg_mix_55466(    struct opna_ssg *ssg, struct opna_ssg_resampler *resampler, @@ -246,18 +298,13 @@ void opna_ssg_mix_55466(        resampler->index += 9;      }      int32_t sample = 0; +    resampler->index &= (1u<<(OPNA_SSG_SINCTABLEBIT+1))-1; +    memcpy(resampler->buf + OPNA_SSG_SINCTABLELEN*3, resampler->buf, OPNA_SSG_SINCTABLELEN*3*sizeof(*resampler->buf)); +    int32_t outbuf[3]; +    opna_ssg_sinc_calc_func(resampler->index, resampler->buf, outbuf);      for (int ch = 0; ch < 3; ch++) { -      int32_t chsample = 0; -      for (int j = 0; j < SINCTABLELEN; j++) { -        unsigned sincindex = j*2; -        if (!(resampler->index&1)) sincindex++; -        bool sincsign = sincindex & (1<<(SINCTABLEBIT)); -        unsigned sincmask = ((1<<(SINCTABLEBIT))-1); -        sincindex = (sincindex & sincmask) ^ (sincsign ? sincmask : 0); -        chsample += (resampler->buf[BUFINDEX(j)*3+ch] * sinctable[sincindex])>>2; -      } -      if (oscillo) oscillo[ch].buf[offset+i] = chsample >> 13; -      if (!(ssg->mask & (1<<ch))) sample += chsample; +      if (oscillo) oscillo[ch].buf[offset+i] = outbuf[ch] >> 15; +      if (!(ssg->mask & (1<<ch))) sample += outbuf[ch] >> 2;      }      sample >>= 16;      sample *= 13000; diff --git a/libopna/opnassg.h b/libopna/opnassg.h index 0321163..231db4d 100644 --- a/libopna/opnassg.h +++ b/libopna/opnassg.h @@ -8,6 +8,9 @@  extern "C" {  #endif +#define OPNA_SSG_SINCTABLEBIT 7 +#define OPNA_SSG_SINCTABLELEN (1<<OPNA_SSG_SINCTABLEBIT) +  struct opna_ssg_ch {    uint16_t tone_counter;    bool out; @@ -30,7 +33,7 @@ struct opna_ssg {  };  struct opna_ssg_resampler { -  int16_t buf[(1<<7)*3]; +  int16_t buf[OPNA_SSG_SINCTABLELEN*3 * 2];    unsigned index;  }; @@ -58,6 +61,14 @@ unsigned opna_ssg_readreg(const struct opna_ssg *ssg, unsigned reg);  int opna_ssg_channel_level(const struct opna_ssg *ssg, int ch);  unsigned opna_ssg_tone_period(const struct opna_ssg *ssg, int ch); +typedef void (*opna_ssg_sinc_calc_func_type)(unsigned resampler_index, +                                             const int16_t *inbuf, int32_t *outbuf); +extern opna_ssg_sinc_calc_func_type opna_ssg_sinc_calc_func; +void opna_ssg_sinc_calc_c(unsigned resampler_index, +                          const int16_t *inbuf, int32_t *outbuf); + +extern const int16_t opna_ssg_sinctable[OPNA_SSG_SINCTABLELEN*2]; +  #ifdef __cplusplus  }  #endif | 
