diff options
| -rw-r--r-- | curses/Makefile.am | 1 | ||||
| -rw-r--r-- | fmdsp/fmdsp-vramlookup-c.c | 14 | ||||
| -rw-r--r-- | fmdsp/fmdsp-vramlookup-neon.s | 83 | ||||
| -rw-r--r-- | fmdsp/fmdsp.c | 14 | ||||
| -rw-r--r-- | fmdsp/fmdsp.h | 10 | ||||
| -rw-r--r-- | gtk/Makefile.am | 18 | ||||
| -rw-r--r-- | gtk/configure.ac | 7 | ||||
| -rw-r--r-- | gtk/main.c | 22 | ||||
| -rw-r--r-- | libopna/opna.c | 5 | ||||
| -rw-r--r-- | libopna/opnassg-sinc-c.c | 13 | ||||
| -rw-r--r-- | libopna/opnassg-sinc-neon.s | 118 | ||||
| -rw-r--r-- | libopna/opnassg.c | 81 | ||||
| -rw-r--r-- | libopna/opnassg.h | 13 | 
13 files changed, 359 insertions, 40 deletions
| diff --git a/curses/Makefile.am b/curses/Makefile.am index 586f76d..d4c7585 100644 --- a/curses/Makefile.am +++ b/curses/Makefile.am @@ -9,6 +9,7 @@ LIBOPNA_SOURCES=../libopna/opnaadpcm.c \                  ../libopna/opna.c  FMDRIVER_SOURCES=../fmdriver/fmdriver_fmp.c \ +                 ../fmdriver/fmdriver_common.c \                   ../fmdriver/ppz8.c  fmpc_SOURCES=main.c \               $(LIBOPNA_SOURCES) \ diff --git a/fmdsp/fmdsp-vramlookup-c.c b/fmdsp/fmdsp-vramlookup-c.c new file mode 100644 index 0000000..f900c8d --- /dev/null +++ b/fmdsp/fmdsp-vramlookup-c.c @@ -0,0 +1,14 @@ +#include "fmdsp/fmdsp.h" + +void fmdsp_vramlookup_c(uint8_t *vram32, const uint8_t *vram, const uint8_t *palette, int stride) { +  for (int y = 0; y < PC98_H; y++) { +    for (int x = 0; x < PC98_W; x++) { +      uint8_t r = palette[vram[y*PC98_W+x]*3+0]; +      uint8_t g = palette[vram[y*PC98_W+x]*3+1]; +      uint8_t b = palette[vram[y*PC98_W+x]*3+2]; +      uint32_t data = (((uint32_t)r)<<16) | (((uint32_t)g)<<8) | ((uint32_t)b); +      uint32_t *row = (uint32_t *)(vram32 + y*stride); +      row[x] = data; +    } +  } +} diff --git a/fmdsp/fmdsp-vramlookup-neon.s b/fmdsp/fmdsp-vramlookup-neon.s new file mode 100644 index 0000000..3cfb957 --- /dev/null +++ b/fmdsp/fmdsp-vramlookup-neon.s @@ -0,0 +1,83 @@ +@ neon register map +@  0,  3,  6,  9, 12, 15, 18, 21  b +@  1,  4,  7, 10, 13, 16, 19, 22  g +@  2,  5,  9, 11, 14, 17, 20, 23  r + +@ 16, 17, 18, 19, 20, 21, 22, 23  vram + +@ 26, 27 r palette +@ 28, 29 g palette +@ 30, 31 b palette + +.global fmdsp_vramlookup_neon + +@ r0: uint8_t *vram32 +@   4 bytes aligned +@   b, g, r, 0, +@ r1: const uint8_t *vram +@ r2: const uint8_t *palette +@   r0, g0, b0, r1, g1, b1, ... +@ r3: int stride +fmdsp_vramlookup_neon: +  push {lr} +@ load palette +  vld3.8 {d26, d28, d30}, [r2]! +  vld1.8 {d27}, [r2]! +  vld1.8 {d29}, [r2]! +  vld1.8 {d31}, [r2]! + +  mov r14, #400 +.loopcol: +  mov r2, r0 +  mov r12, #10 +.looprow: +@ row address + +@ load vram +  vld1.8 {d16-d19}, [r1]! +  vld1.8 {d20-d23}, [r1]! + +@ lookup +  vtbl.8 d0,  {d30-d31}, d16 +  vtbl.8 d1,  {d28-d29}, d16 +  vtbl.8 d2,  {d26-d27}, d16 +  vtbl.8 d3,  {d30-d31}, d17 +  vtbl.8 d4,  {d28-d29}, d17 +  vtbl.8 d5,  {d26-d27}, d17 +  vtbl.8 d6,  {d30-d31}, d18 +  vtbl.8 d7,  {d28-d29}, d18 +  vtbl.8 d8,  {d26-d27}, d18 +  vtbl.8 d9,  {d30-d31}, d19 +  vtbl.8 d10, {d28-d29}, d19 +  vtbl.8 d11, {d26-d27}, d19 +  vtbl.8 d12, {d30-d31}, d20 +  vtbl.8 d13, {d28-d29}, d20 +  vtbl.8 d14, {d26-d27}, d20 +  vtbl.8 d15, {d30-d31}, d21 +  vtbl.8 d16, {d28-d29}, d21 +  vtbl.8 d17, {d26-d27}, d21 +  vtbl.8 d18, {d30-d31}, d22 +  vtbl.8 d19, {d28-d29}, d22 +  vtbl.8 d20, {d26-d27}, d22 +  vtbl.8 d21, {d30-d31}, d23 +  vtbl.8 d22, {d28-d29}, d23 +  vtbl.8 d23, {d26-d27}, d23 + +@ store vram32 +  vst4.8 {d0-d3},   [r2]! +  vst4.8 {d3-d6},   [r2]! +  vst4.8 {d6-d9},   [r2]! +  vst4.8 {d9-d12},  [r2]! +  vst4.8 {d12-d15}, [r2]! +  vst4.8 {d15-d18}, [r2]! +  vst4.8 {d18-d21}, [r2]! +  vst4.8 {d21-d24}, [r2]! + +  subs r12, #1 +  bne .looprow + +  add r0, r3 +  subs r14, #1 +  bne .loopcol + +  pop {pc} diff --git a/fmdsp/fmdsp.c b/fmdsp/fmdsp.c index 3d082af..2f708dd 100644 --- a/fmdsp/fmdsp.c +++ b/fmdsp/fmdsp.c @@ -5,6 +5,8 @@  #include <stdio.h>  #include "libopna/opna.h" +fmdsp_vramlookup_type fmdsp_vramlookup_func = fmdsp_vramlookup_c; +  static void vramblit(uint8_t *vram, int x, int y,                       const uint8_t *data, int w, int h) {    for (int yi = 0; yi < h; yi++) { @@ -727,17 +729,9 @@ void fmdsp_update(struct fmdsp *fmdsp,    }    fmdsp_palette_fade(fmdsp);  } +  void fmdsp_vrampalette(struct fmdsp *fmdsp, const uint8_t *vram, uint8_t *vram32, int stride) { -  for (int y = 0; y < PC98_H; y++) { -    for (int x = 0; x < PC98_W; x++) { -      uint8_t r = fmdsp->palette[vram[y*PC98_W+x]*3+0]; -      uint8_t g = fmdsp->palette[vram[y*PC98_W+x]*3+1]; -      uint8_t b = fmdsp->palette[vram[y*PC98_W+x]*3+2]; -      uint32_t data = (((uint32_t)r)<<16) | (((uint32_t)g)<<8) | ((uint32_t)b); -      uint32_t *row = (uint32_t *)(vram32 + y*stride); -      row[x] = data; -    } -  } +  fmdsp_vramlookup_func(vram32, vram, fmdsp->palette, stride);  }  void fmdsp_dispstyle_set(struct fmdsp *fmdsp, enum FMDSP_DISPSTYLE style) { diff --git a/fmdsp/fmdsp.h b/fmdsp/fmdsp.h index cce2310..a7e4aab 100644 --- a/fmdsp/fmdsp.h +++ b/fmdsp/fmdsp.h @@ -49,6 +49,16 @@ void fmdsp_vrampalette(struct fmdsp *fmdsp, const uint8_t *vram, uint8_t *vram32  void fmdsp_font_from_fontrom(uint8_t *font, const uint8_t *fontrom);  void fmdsp_palette_set(struct fmdsp *fmdsp, int p);  void fmdsp_dispstyle_set(struct fmdsp *fmdsp, enum FMDSP_DISPSTYLE style); + +typedef void (*fmdsp_vramlookup_type)(uint8_t *vram32, +                                      const uint8_t *vram, +                                      const uint8_t *palette, +                                      int stride); +extern fmdsp_vramlookup_type fmdsp_vramlookup_func; +void fmdsp_vramlookup_c(uint8_t *vram32, +                        const uint8_t *vram, +                        const uint8_t *palette, +                        int stride);  #ifdef __cplusplus  }  #endif diff --git a/gtk/Makefile.am b/gtk/Makefile.am index 174f5e9..0a9a95b 100644 --- a/gtk/Makefile.am +++ b/gtk/Makefile.am @@ -1,11 +1,12 @@  bin_PROGRAMS=fmplayer  LIBOPNA_SRC=../libopna/opnaadpcm.c \ -                ../libopna/opnadrum.c \ -                ../libopna/opnafm.c \ -                ../libopna/opnassg.c \ -                ../libopna/opnatimer.c \ -                ../libopna/opna.c +            ../libopna/opnadrum.c \ +            ../libopna/opnafm.c \ +            ../libopna/opnassg.c \ +            ../libopna/opnassg-sinc-c.c \ +            ../libopna/opnatimer.c \ +            ../libopna/opna.c  FMDRIVER_SRC=../fmdriver/fmdriver_fmp.c \               ../fmdriver/fmdriver_pmd.c \ @@ -13,9 +14,16 @@ FMDRIVER_SRC=../fmdriver/fmdriver_fmp.c \               ../fmdriver/ppz8.c  FMDSP_SRC=../fmdsp/fmdsp.c \ +          ../fmdsp/fmdsp-vramlookup-c.c \            ../fmdsp/font_rom.c \            ../fmdsp/font_fmdsp_small.c +if ENABLE_NEON +LIBOPNA_SRC+=../libopna/opnassg-sinc-neon.s +FMDSP_SRC+=../fmdsp/fmdsp-vramlookup-neon.s +fmplayer_CCASFLAGS=-march=armv8-a -mfpu=crypto-neon-fp-armv8 +endif +  fmplayer_SOURCES=main.c \                   toneview.c \                   oscilloview.c \ diff --git a/gtk/configure.ac b/gtk/configure.ac index 8e13a34..2727888 100644 --- a/gtk/configure.ac +++ b/gtk/configure.ac @@ -2,10 +2,17 @@ AC_INIT([fmplayer], [0.1.0])  AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects])  AM_SILENT_RULES([yes])  AC_PROG_CC_C99 +AM_PROG_AS  dnl AM_PATH_SDL2([2.0.5])  PKG_CHECK_MODULES([PORTAUDIO], [portaudio-2.0])  PKG_CHECK_MODULES([GTK3], [gtk+-3.0 cairo]) +AC_ARG_ENABLE([neon], AS_HELP_STRING([--enable-neon], [Enable NEON optimized functions for SSG sinc filtering and fmdsp palette lookup. Tested on Cortex-A53 (Raspberry PI 3)])) +AM_CONDITIONAL([ENABLE_NEON], [test "x$enable_neon" = "xyes"]) +AS_IF([test "x$enable_neon" = "xyes"], [ +  AC_DEFINE([ENABLE_NEON]) +]) +  AC_CONFIG_FILES([Makefile])  AC_OUTPUT @@ -53,6 +53,7 @@ static struct {    void *vram32;    int vram32_stride;    const char *current_uri; +  bool oscillo_should_update;    struct oscillodata oscillodata_audiothread[LIBOPNA_OSCILLO_TRACK_COUNT];  } g; @@ -97,17 +98,21 @@ static int pastream_cb(const void *inptr, void *outptr, unsigned long frames,    struct opna_timer *timer = (struct opna_timer *)userdata;    int16_t *buf = (int16_t *)outptr;    memset(outptr, 0, sizeof(int16_t)*frames*2); -  opna_timer_mix_oscillo(timer, buf, frames, g.oscillodata_audiothread); +  opna_timer_mix_oscillo(timer, buf, frames, +                         g.oscillo_should_update ? +                         g.oscillodata_audiothread : 0);    if (!atomic_flag_test_and_set_explicit(        &toneview_g.flag, memory_order_acquire)) {      tonedata_from_opna(&toneview_g.tonedata, &g.opna);      atomic_flag_clear_explicit(&toneview_g.flag, memory_order_release);    } -  if (!atomic_flag_test_and_set_explicit( -    &oscilloview_g.flag, memory_order_acquire)) { -    memcpy(oscilloview_g.oscillodata, g.oscillodata_audiothread, sizeof(oscilloview_g.oscillodata)); -    atomic_flag_clear_explicit(&oscilloview_g.flag, memory_order_release); +  if (g.oscillo_should_update) { +    if (!atomic_flag_test_and_set_explicit( +      &oscilloview_g.flag, memory_order_acquire)) { +      memcpy(oscilloview_g.oscillodata, g.oscillodata_audiothread, sizeof(oscilloview_g.oscillodata)); +      atomic_flag_clear_explicit(&oscilloview_g.flag, memory_order_release); +    }    }    return paContinue;  } @@ -479,7 +484,14 @@ static void drag_data_recv_cb(    gtk_drag_finish(ctx, TRUE, FALSE, time);  } +void opna_ssg_sinc_calc_neon(unsigned, const int16_t *, int32_t *); +void fmdsp_vramlookup_neon(uint8_t *, const uint8_t *, const uint8_t *, int); +  int main(int argc, char **argv) { +#ifdef ENABLE_NEON +  opna_ssg_sinc_calc_func = opna_ssg_sinc_calc_neon; +  fmdsp_vramlookup_func = fmdsp_vramlookup_neon; +#endif    load_fontrom();    gtk_init(&argc, &argv);    GtkWidget *w = gtk_window_new(GTK_WINDOW_TOPLEVEL); diff --git a/libopna/opna.c b/libopna/opna.c index 1b0aa6c..567c913 100644 --- a/libopna/opna.c +++ b/libopna/opna.c @@ -37,8 +37,9 @@ void opna_mix_oscillo(struct opna *opna, int16_t *buf, unsigned samples, struct      }    }    unsigned offset = OSCILLO_SAMPLE_COUNT - samples; -  opna_fm_mix(&opna->fm, buf, samples, &oscillo[0], offset); -  opna_ssg_mix_55466(&opna->ssg, &opna->resampler, buf, samples, &oscillo[6], offset); +  opna_fm_mix(&opna->fm, buf, samples, oscillo ? &oscillo[0] : 0, offset); +  opna_ssg_mix_55466(&opna->ssg, &opna->resampler, buf, samples, +                     oscillo ? &oscillo[6] : 0, offset);    opna_drum_mix(&opna->drum, buf, samples);    opna_adpcm_mix(&opna->adpcm, buf, samples);  } diff --git a/libopna/opnassg-sinc-c.c b/libopna/opnassg-sinc-c.c new file mode 100644 index 0000000..bf93039 --- /dev/null +++ b/libopna/opnassg-sinc-c.c @@ -0,0 +1,13 @@ +#include "libopna/opnassg.h" + +void opna_ssg_sinc_calc_c(unsigned resampler_index, const int16_t *inbuf, int32_t *outbuf) { +  for (int c = 0; c < 3; c++) { +    int32_t chsample = 0; +    for (int j = 0; j < OPNA_SSG_SINCTABLELEN; j++) { +      unsigned sincindex = j; +      if (!(resampler_index&1)) sincindex += OPNA_SSG_SINCTABLELEN; +      chsample += inbuf[(((resampler_index)>>1)+j)*3+c] * opna_ssg_sinctable[sincindex]; +    } +    outbuf[c] = chsample; +  } +} diff --git a/libopna/opnassg-sinc-neon.s b/libopna/opnassg-sinc-neon.s new file mode 100644 index 0000000..19dc3c6 --- /dev/null +++ b/libopna/opnassg-sinc-neon.s @@ -0,0 +1,118 @@ +@ neon register map: +@  0,  3,  6,  9, 12, 15 ssg1 +@  1,  4,  7, 10, 13, 16 ssg2 +@  2,  5,  8, 11, 14, 17 ssg3 +@ 18, 19, 20, 21, 22, 23 sinc +@ 24-25 (q12): ssg1 out +@ 26-27 (q13): ssg2 out +@ 28-29 (q14): ssg3 out + +.global opna_ssg_sinc_calc_neon +@ r0: resampler_index +@ r1: const int16_t *inbuf +@ r2: int32_t *outbuf + +opna_ssg_sinc_calc_neon: +  push {r4-r10,lr} +@ sinc table to r3 +  movw r3, #:lower16:opna_ssg_sinctable +  movt r3, #:upper16:opna_ssg_sinctable +  tst r0, #1 +  addeq r3, #256 + +@ add offset to ssg input buffer address +  bic r0, #1 +  add r0, r0, lsl #1 +  add r1, r0 + +@ initialize output register +  vmov.i64 q12, #0 +  vmov.i64 q13, #0 +  vmov.i64 q14, #0 + +@ sinc sample length +  mov r0, #128 + +.loop: +@ +  subs r0, #24 +  blo .end + +@ load SSG channel data +  vld3.16 {d0-d2}, [r1]! +  vld3.16 {d3-d5}, [r1]! +  vld3.16 {d6-d8}, [r1]! +  vld3.16 {d9-d11}, [r1]! +  vld3.16 {d12-d14}, [r1]! +  vld3.16 {d15-d17}, [r1]! + +@ load sinc data +  vld1.16 {d18-d21}, [r3]! +  vld1.16 {d22-d23}, [r3]! + +@ multiply and accumulate +  vmlal.s16 q12, d0,  d18 +  vmlal.s16 q13, d1,  d18 +  vmlal.s16 q14, d2,  d18 +  vmlal.s16 q12, d3,  d19 +  vmlal.s16 q13, d4,  d19 +  vmlal.s16 q14, d5,  d19 +  vmlal.s16 q12, d6,  d20 +  vmlal.s16 q13, d7,  d20 +  vmlal.s16 q14, d8,  d20 +  vmlal.s16 q12, d9,  d21 +  vmlal.s16 q13, d10, d21 +  vmlal.s16 q14, d11, d21 +  vmlal.s16 q12, d12, d22 +  vmlal.s16 q13, d13, d22 +  vmlal.s16 q14, d14, d22 +  vmlal.s16 q12, d15, d23 +  vmlal.s16 q13, d16, d23 +  vmlal.s16 q14, d17, d23 +  b .loop + +.end: +@ 8 samples left +  vld3.16 {d0-d2}, [r1]! +  vld3.16 {d3-d5}, [r1] +  vld1.16 {d18-d19}, [r3] + +  vmlal.s16 q12, d0, d18 +  vmlal.s16 q13, d1, d18 +  vmlal.s16 q14, d2, d18 +  vmlal.s16 q12, d3, d19 +  vmlal.s16 q13, d4, d19 +  vmlal.s16 q14, d5, d19 + +@ extract data from result SIMD registers + +  vmov.32 r0,  d24[0] +  vmov.32 r1,  d24[1] +  vmov.32 r3,  d25[0] +  vmov.32 r12, d25[1] + +  vmov.32 r14, d26[0] +  vmov.32 r4,  d26[1] +  vmov.32 r5,  d27[0] +  vmov.32 r6,  d27[1] + +  vmov.32 r7,  d28[0] +  vmov.32 r8,  d28[1] +  vmov.32 r9,  d29[0] +  vmov.32 r10, d29[1] + +  add r0, r1 +  add r3, r12 + +  add r14, r4 +  add r5, r6 + +  add r7, r8 +  add r9, r10 + +  add r4, r0, r3 +  add r5, r14 +  add r6, r7, r9 + +  stmia r2, {r4-r6} +  pop {r4-r10,pc} diff --git a/libopna/opnassg.c b/libopna/opnassg.c index ec03437..4a12f76 100644 --- a/libopna/opnassg.c +++ b/libopna/opnassg.c @@ -1,5 +1,6 @@  #include "opnassg.h"  #include "oscillo/oscillo.h" +#include <string.h>  /*  static const float voltable[32] = {    0.0f,           0.0f,           0x1.ae89f9p-8f, 0x1.000000p-7f, @@ -27,9 +28,6 @@ static const int16_t voltable[32] = {     6494,  7723,  9185, 10922  }; -#define SINCTABLEBIT 7 -#define SINCTABLELEN (1<<SINCTABLEBIT) -  // GNU Octave  // Fc = 7987200  // Ff = Fc/144 @@ -39,7 +37,8 @@ static const int16_t voltable[32] = {  // B = 128 * O / 2  // FILTER=sinc(linspace(-127.5,127.5,256)*2/9/2).*rotdim(kaiser(256,B))  // FILTERI=round(FILTER(1:128).*32768) -static const int16_t sinctable[SINCTABLELEN] = { +#if 0 +const int16_t opna_ssg_sinctable[OPNA_SSG_SINCTABLELEN*2] = {        1,     0,    -1,    -2,    -3,    -5,    -6,    -6,       -6,    -5,    -2,     2,     7,    11,    16,    19,       20,    18,    13,     5,    -5,   -17,   -29,   -38, @@ -56,8 +55,61 @@ static const int16_t sinctable[SINCTABLELEN] = {     3306,  3714,  3690,  3185,  2206,   815,  -868, -2673,    -4391, -5798, -6670, -6809, -6067, -4359, -1681,  1886,     6178, 10957, 15928, 20765, 25133, 28724, 31275, 32600, +  32600, 31275, 28724, 25133, 20765, 15928, 10957,  6178, +   1886, -1681, -4359, -6067, -6809, -6670, -5798, -4391, +  -2673,  -868,   815,  2206,  3185,  3690,  3714,  3306, +   2557,  1585,   523,  -498, -1365, -1994, -2333, -2369, +  -2125, -1655, -1032,  -343,   328,   902,  1322,  1552, +   1580,  1421,  1108,   692,   230,  -220,  -607,  -889, +  -1043, -1062,  -954,  -744,  -464,  -154,   147,   405, +    593,   694,   705,   632,   491,   306,   101,   -96, +   -264,  -385,  -450,  -455,  -406,  -315,  -195,   -64, +     61,   166,   241,   280,   282,   251,   193,   119, +     39,   -37,  -100,  -144,  -166,  -166,  -146,  -112, +    -68,   -22,    21,    56,    80,    91,    90,    79, +     60,    36,    12,   -11,   -29,   -40,   -45,   -44, +    -38,   -29,   -17,    -5,     5,    13,    18,    20, +     19,    16,    11,     7,     2,    -2,    -5,    -6, +     -6,    -6,    -5,    -3,    -2,    -1,     0,     1, +}; +#endif +const int16_t opna_ssg_sinctable[OPNA_SSG_SINCTABLELEN*2] = { +      1,    -1,    -3,    -6,    -6,    -2,     7,    16, +     20,    13,    -5,   -29,   -44,   -40,   -11,    36, +     79,    91,    56,   -22,  -112,  -166,  -144,   -37, +    119,   251,   280,   166,   -64,  -315,  -455,  -385, +    -96,   306,   632,   694,   405,  -154,  -744, -1062, +   -889,  -220,   692,  1421,  1552,   902,  -343, -1655, +  -2369, -1994,  -498,  1585,  3306,  3690,  2206,  -868, +  -4391, -6670, -6067, -1681,  6178, 15928, 25133, 31275, +  32600, 28724, 20765, 10957,  1886, -4359, -6809, -5798, +  -2673,   815,  3185,  3714,  2557,   523, -1365, -2333, +  -2125, -1032,   328,  1322,  1580,  1108,   230,  -607, +  -1043,  -954,  -464,   147,   593,   705,   491,   101, +   -264,  -450,  -406,  -195,    61,   241,   282,   193, +     39,  -100,  -166,  -146,   -68,    21,    80,    90, +     60,    12,   -29,   -45,   -38,   -17,     5,    18, +     19,    11,     2,    -5,    -6,    -5,    -2,     0, +      0,    -2,    -5,    -6,    -5,     2,    11,    19, +     18,     5,   -17,   -38,   -45,   -29,    12,    60, +     90,    80,    21,   -68,  -146,  -166,  -100,    39, +    193,   282,   241,    61,  -195,  -406,  -450,  -264, +    101,   491,   705,   593,   147,  -464,  -954, -1043, +   -607,   230,  1108,  1580,  1322,   328, -1032, -2125, +  -2333, -1365,   523,  2557,  3714,  3185,   815, -2673, +  -5798, -6809, -4359,  1886, 10957, 20765, 28724, 32600, +  31275, 25133, 15928,  6178, -1681, -6067, -6670, -4391, +   -868,  2206,  3690,  3306,  1585,  -498, -1994, -2369, +  -1655,  -343,   902,  1552,  1421,   692,  -220,  -889, +  -1062,  -744,  -154,   405,   694,   632,   306,   -96, +   -385,  -455,  -315,   -64,   166,   280,   251,   119, +    -37,  -144,  -166,  -112,   -22,    56,    91,    79, +     36,   -11,   -40,   -44,   -29,    -5,    13,    20, +     16,     7,    -2,    -6,    -6,    -3,    -1,     1,  }; +opna_ssg_sinc_calc_func_type opna_ssg_sinc_calc_func = opna_ssg_sinc_calc_c; +  void opna_ssg_reset(struct opna_ssg *ssg) {    for (int i = 0; i < 3; i++) {      ssg->ch[i].tone_counter = 0; @@ -78,7 +130,7 @@ void opna_ssg_reset(struct opna_ssg *ssg) {  }  void opna_ssg_resampler_reset(struct opna_ssg_resampler *resampler) { -  for (int i = 0; i < SINCTABLELEN; i++) { +  for (int i = 0; i < OPNA_SSG_SINCTABLELEN; i++) {      resampler->buf[i] = 0;    }    resampler->index = 0; @@ -215,7 +267,7 @@ void opna_ssg_generate_raw(struct opna_ssg *ssg, int16_t *buf, int samples) {    }  } -#define BUFINDEX(n) ((((resampler->index)>>1)+n)&(SINCTABLELEN-1)) +#define BUFINDEX(n) ((((resampler->index)>>1)+n)&(OPNA_SSG_SINCTABLELEN-1))  void opna_ssg_mix_55466(    struct opna_ssg *ssg, struct opna_ssg_resampler *resampler, @@ -246,18 +298,13 @@ void opna_ssg_mix_55466(        resampler->index += 9;      }      int32_t sample = 0; +    resampler->index &= (1u<<(OPNA_SSG_SINCTABLEBIT+1))-1; +    memcpy(resampler->buf + OPNA_SSG_SINCTABLELEN*3, resampler->buf, OPNA_SSG_SINCTABLELEN*3*sizeof(*resampler->buf)); +    int32_t outbuf[3]; +    opna_ssg_sinc_calc_func(resampler->index, resampler->buf, outbuf);      for (int ch = 0; ch < 3; ch++) { -      int32_t chsample = 0; -      for (int j = 0; j < SINCTABLELEN; j++) { -        unsigned sincindex = j*2; -        if (!(resampler->index&1)) sincindex++; -        bool sincsign = sincindex & (1<<(SINCTABLEBIT)); -        unsigned sincmask = ((1<<(SINCTABLEBIT))-1); -        sincindex = (sincindex & sincmask) ^ (sincsign ? sincmask : 0); -        chsample += (resampler->buf[BUFINDEX(j)*3+ch] * sinctable[sincindex])>>2; -      } -      if (oscillo) oscillo[ch].buf[offset+i] = chsample >> 13; -      if (!(ssg->mask & (1<<ch))) sample += chsample; +      if (oscillo) oscillo[ch].buf[offset+i] = outbuf[ch] >> 15; +      if (!(ssg->mask & (1<<ch))) sample += outbuf[ch] >> 2;      }      sample >>= 16;      sample *= 13000; diff --git a/libopna/opnassg.h b/libopna/opnassg.h index 0321163..231db4d 100644 --- a/libopna/opnassg.h +++ b/libopna/opnassg.h @@ -8,6 +8,9 @@  extern "C" {  #endif +#define OPNA_SSG_SINCTABLEBIT 7 +#define OPNA_SSG_SINCTABLELEN (1<<OPNA_SSG_SINCTABLEBIT) +  struct opna_ssg_ch {    uint16_t tone_counter;    bool out; @@ -30,7 +33,7 @@ struct opna_ssg {  };  struct opna_ssg_resampler { -  int16_t buf[(1<<7)*3]; +  int16_t buf[OPNA_SSG_SINCTABLELEN*3 * 2];    unsigned index;  }; @@ -58,6 +61,14 @@ unsigned opna_ssg_readreg(const struct opna_ssg *ssg, unsigned reg);  int opna_ssg_channel_level(const struct opna_ssg *ssg, int ch);  unsigned opna_ssg_tone_period(const struct opna_ssg *ssg, int ch); +typedef void (*opna_ssg_sinc_calc_func_type)(unsigned resampler_index, +                                             const int16_t *inbuf, int32_t *outbuf); +extern opna_ssg_sinc_calc_func_type opna_ssg_sinc_calc_func; +void opna_ssg_sinc_calc_c(unsigned resampler_index, +                          const int16_t *inbuf, int32_t *outbuf); + +extern const int16_t opna_ssg_sinctable[OPNA_SSG_SINCTABLELEN*2]; +  #ifdef __cplusplus  }  #endif | 
