aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakamichi Horikawa <takamichiho@gmail.com>2017-03-30 23:31:39 +0000
committerTakamichi Horikawa <takamichiho@gmail.com>2017-03-30 23:31:39 +0000
commitf47eba7d7d4c6a1d9501e027b63bbab04bb7d417 (patch)
treec6d8ebf4f84b76e9d32d4998d28a69d36fddd4a9
parenta86bb48b9f3acb081afa92e5efc50d0192c4f68c (diff)
add NEON optimization
-rw-r--r--curses/Makefile.am1
-rw-r--r--fmdsp/fmdsp-vramlookup-c.c14
-rw-r--r--fmdsp/fmdsp-vramlookup-neon.s83
-rw-r--r--fmdsp/fmdsp.c14
-rw-r--r--fmdsp/fmdsp.h10
-rw-r--r--gtk/Makefile.am18
-rw-r--r--gtk/configure.ac7
-rw-r--r--gtk/main.c22
-rw-r--r--libopna/opna.c5
-rw-r--r--libopna/opnassg-sinc-c.c13
-rw-r--r--libopna/opnassg-sinc-neon.s118
-rw-r--r--libopna/opnassg.c81
-rw-r--r--libopna/opnassg.h13
13 files changed, 359 insertions, 40 deletions
diff --git a/curses/Makefile.am b/curses/Makefile.am
index 586f76d..d4c7585 100644
--- a/curses/Makefile.am
+++ b/curses/Makefile.am
@@ -9,6 +9,7 @@ LIBOPNA_SOURCES=../libopna/opnaadpcm.c \
../libopna/opna.c
FMDRIVER_SOURCES=../fmdriver/fmdriver_fmp.c \
+ ../fmdriver/fmdriver_common.c \
../fmdriver/ppz8.c
fmpc_SOURCES=main.c \
$(LIBOPNA_SOURCES) \
diff --git a/fmdsp/fmdsp-vramlookup-c.c b/fmdsp/fmdsp-vramlookup-c.c
new file mode 100644
index 0000000..f900c8d
--- /dev/null
+++ b/fmdsp/fmdsp-vramlookup-c.c
@@ -0,0 +1,14 @@
+#include "fmdsp/fmdsp.h"
+
+void fmdsp_vramlookup_c(uint8_t *vram32, const uint8_t *vram, const uint8_t *palette, int stride) {
+ for (int y = 0; y < PC98_H; y++) {
+ for (int x = 0; x < PC98_W; x++) {
+ uint8_t r = palette[vram[y*PC98_W+x]*3+0];
+ uint8_t g = palette[vram[y*PC98_W+x]*3+1];
+ uint8_t b = palette[vram[y*PC98_W+x]*3+2];
+ uint32_t data = (((uint32_t)r)<<16) | (((uint32_t)g)<<8) | ((uint32_t)b);
+ uint32_t *row = (uint32_t *)(vram32 + y*stride);
+ row[x] = data;
+ }
+ }
+}
diff --git a/fmdsp/fmdsp-vramlookup-neon.s b/fmdsp/fmdsp-vramlookup-neon.s
new file mode 100644
index 0000000..3cfb957
--- /dev/null
+++ b/fmdsp/fmdsp-vramlookup-neon.s
@@ -0,0 +1,83 @@
+@ neon register map
+@ 0, 3, 6, 9, 12, 15, 18, 21 b
+@ 1, 4, 7, 10, 13, 16, 19, 22 g
+@ 2, 5, 9, 11, 14, 17, 20, 23 r
+
+@ 16, 17, 18, 19, 20, 21, 22, 23 vram
+
+@ 26, 27 r palette
+@ 28, 29 g palette
+@ 30, 31 b palette
+
+.global fmdsp_vramlookup_neon
+
+@ r0: uint8_t *vram32
+@ 4 bytes aligned
+@ b, g, r, 0,
+@ r1: const uint8_t *vram
+@ r2: const uint8_t *palette
+@ r0, g0, b0, r1, g1, b1, ...
+@ r3: int stride
+fmdsp_vramlookup_neon:
+ push {lr}
+@ load palette
+ vld3.8 {d26, d28, d30}, [r2]!
+ vld1.8 {d27}, [r2]!
+ vld1.8 {d29}, [r2]!
+ vld1.8 {d31}, [r2]!
+
+ mov r14, #400
+.loopcol:
+ mov r2, r0
+ mov r12, #10
+.looprow:
+@ row address
+
+@ load vram
+ vld1.8 {d16-d19}, [r1]!
+ vld1.8 {d20-d23}, [r1]!
+
+@ lookup
+ vtbl.8 d0, {d30-d31}, d16
+ vtbl.8 d1, {d28-d29}, d16
+ vtbl.8 d2, {d26-d27}, d16
+ vtbl.8 d3, {d30-d31}, d17
+ vtbl.8 d4, {d28-d29}, d17
+ vtbl.8 d5, {d26-d27}, d17
+ vtbl.8 d6, {d30-d31}, d18
+ vtbl.8 d7, {d28-d29}, d18
+ vtbl.8 d8, {d26-d27}, d18
+ vtbl.8 d9, {d30-d31}, d19
+ vtbl.8 d10, {d28-d29}, d19
+ vtbl.8 d11, {d26-d27}, d19
+ vtbl.8 d12, {d30-d31}, d20
+ vtbl.8 d13, {d28-d29}, d20
+ vtbl.8 d14, {d26-d27}, d20
+ vtbl.8 d15, {d30-d31}, d21
+ vtbl.8 d16, {d28-d29}, d21
+ vtbl.8 d17, {d26-d27}, d21
+ vtbl.8 d18, {d30-d31}, d22
+ vtbl.8 d19, {d28-d29}, d22
+ vtbl.8 d20, {d26-d27}, d22
+ vtbl.8 d21, {d30-d31}, d23
+ vtbl.8 d22, {d28-d29}, d23
+ vtbl.8 d23, {d26-d27}, d23
+
+@ store vram32
+ vst4.8 {d0-d3}, [r2]!
+ vst4.8 {d3-d6}, [r2]!
+ vst4.8 {d6-d9}, [r2]!
+ vst4.8 {d9-d12}, [r2]!
+ vst4.8 {d12-d15}, [r2]!
+ vst4.8 {d15-d18}, [r2]!
+ vst4.8 {d18-d21}, [r2]!
+ vst4.8 {d21-d24}, [r2]!
+
+ subs r12, #1
+ bne .looprow
+
+ add r0, r3
+ subs r14, #1
+ bne .loopcol
+
+ pop {pc}
diff --git a/fmdsp/fmdsp.c b/fmdsp/fmdsp.c
index 3d082af..2f708dd 100644
--- a/fmdsp/fmdsp.c
+++ b/fmdsp/fmdsp.c
@@ -5,6 +5,8 @@
#include <stdio.h>
#include "libopna/opna.h"
+fmdsp_vramlookup_type fmdsp_vramlookup_func = fmdsp_vramlookup_c;
+
static void vramblit(uint8_t *vram, int x, int y,
const uint8_t *data, int w, int h) {
for (int yi = 0; yi < h; yi++) {
@@ -727,17 +729,9 @@ void fmdsp_update(struct fmdsp *fmdsp,
}
fmdsp_palette_fade(fmdsp);
}
+
void fmdsp_vrampalette(struct fmdsp *fmdsp, const uint8_t *vram, uint8_t *vram32, int stride) {
- for (int y = 0; y < PC98_H; y++) {
- for (int x = 0; x < PC98_W; x++) {
- uint8_t r = fmdsp->palette[vram[y*PC98_W+x]*3+0];
- uint8_t g = fmdsp->palette[vram[y*PC98_W+x]*3+1];
- uint8_t b = fmdsp->palette[vram[y*PC98_W+x]*3+2];
- uint32_t data = (((uint32_t)r)<<16) | (((uint32_t)g)<<8) | ((uint32_t)b);
- uint32_t *row = (uint32_t *)(vram32 + y*stride);
- row[x] = data;
- }
- }
+ fmdsp_vramlookup_func(vram32, vram, fmdsp->palette, stride);
}
void fmdsp_dispstyle_set(struct fmdsp *fmdsp, enum FMDSP_DISPSTYLE style) {
diff --git a/fmdsp/fmdsp.h b/fmdsp/fmdsp.h
index cce2310..a7e4aab 100644
--- a/fmdsp/fmdsp.h
+++ b/fmdsp/fmdsp.h
@@ -49,6 +49,16 @@ void fmdsp_vrampalette(struct fmdsp *fmdsp, const uint8_t *vram, uint8_t *vram32
void fmdsp_font_from_fontrom(uint8_t *font, const uint8_t *fontrom);
void fmdsp_palette_set(struct fmdsp *fmdsp, int p);
void fmdsp_dispstyle_set(struct fmdsp *fmdsp, enum FMDSP_DISPSTYLE style);
+
+typedef void (*fmdsp_vramlookup_type)(uint8_t *vram32,
+ const uint8_t *vram,
+ const uint8_t *palette,
+ int stride);
+extern fmdsp_vramlookup_type fmdsp_vramlookup_func;
+void fmdsp_vramlookup_c(uint8_t *vram32,
+ const uint8_t *vram,
+ const uint8_t *palette,
+ int stride);
#ifdef __cplusplus
}
#endif
diff --git a/gtk/Makefile.am b/gtk/Makefile.am
index 174f5e9..0a9a95b 100644
--- a/gtk/Makefile.am
+++ b/gtk/Makefile.am
@@ -1,11 +1,12 @@
bin_PROGRAMS=fmplayer
LIBOPNA_SRC=../libopna/opnaadpcm.c \
- ../libopna/opnadrum.c \
- ../libopna/opnafm.c \
- ../libopna/opnassg.c \
- ../libopna/opnatimer.c \
- ../libopna/opna.c
+ ../libopna/opnadrum.c \
+ ../libopna/opnafm.c \
+ ../libopna/opnassg.c \
+ ../libopna/opnassg-sinc-c.c \
+ ../libopna/opnatimer.c \
+ ../libopna/opna.c
FMDRIVER_SRC=../fmdriver/fmdriver_fmp.c \
../fmdriver/fmdriver_pmd.c \
@@ -13,9 +14,16 @@ FMDRIVER_SRC=../fmdriver/fmdriver_fmp.c \
../fmdriver/ppz8.c
FMDSP_SRC=../fmdsp/fmdsp.c \
+ ../fmdsp/fmdsp-vramlookup-c.c \
../fmdsp/font_rom.c \
../fmdsp/font_fmdsp_small.c
+if ENABLE_NEON
+LIBOPNA_SRC+=../libopna/opnassg-sinc-neon.s
+FMDSP_SRC+=../fmdsp/fmdsp-vramlookup-neon.s
+fmplayer_CCASFLAGS=-march=armv8-a -mfpu=crypto-neon-fp-armv8
+endif
+
fmplayer_SOURCES=main.c \
toneview.c \
oscilloview.c \
diff --git a/gtk/configure.ac b/gtk/configure.ac
index 8e13a34..2727888 100644
--- a/gtk/configure.ac
+++ b/gtk/configure.ac
@@ -2,10 +2,17 @@ AC_INIT([fmplayer], [0.1.0])
AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects])
AM_SILENT_RULES([yes])
AC_PROG_CC_C99
+AM_PROG_AS
dnl AM_PATH_SDL2([2.0.5])
PKG_CHECK_MODULES([PORTAUDIO], [portaudio-2.0])
PKG_CHECK_MODULES([GTK3], [gtk+-3.0 cairo])
+AC_ARG_ENABLE([neon], AS_HELP_STRING([--enable-neon], [Enable NEON optimized functions for SSG sinc filtering and fmdsp palette lookup. Tested on Cortex-A53 (Raspberry PI 3)]))
+AM_CONDITIONAL([ENABLE_NEON], [test "x$enable_neon" = "xyes"])
+AS_IF([test "x$enable_neon" = "xyes"], [
+ AC_DEFINE([ENABLE_NEON])
+])
+
AC_CONFIG_FILES([Makefile])
AC_OUTPUT
diff --git a/gtk/main.c b/gtk/main.c
index af0d2db..fc72d62 100644
--- a/gtk/main.c
+++ b/gtk/main.c
@@ -53,6 +53,7 @@ static struct {
void *vram32;
int vram32_stride;
const char *current_uri;
+ bool oscillo_should_update;
struct oscillodata oscillodata_audiothread[LIBOPNA_OSCILLO_TRACK_COUNT];
} g;
@@ -97,17 +98,21 @@ static int pastream_cb(const void *inptr, void *outptr, unsigned long frames,
struct opna_timer *timer = (struct opna_timer *)userdata;
int16_t *buf = (int16_t *)outptr;
memset(outptr, 0, sizeof(int16_t)*frames*2);
- opna_timer_mix_oscillo(timer, buf, frames, g.oscillodata_audiothread);
+ opna_timer_mix_oscillo(timer, buf, frames,
+ g.oscillo_should_update ?
+ g.oscillodata_audiothread : 0);
if (!atomic_flag_test_and_set_explicit(
&toneview_g.flag, memory_order_acquire)) {
tonedata_from_opna(&toneview_g.tonedata, &g.opna);
atomic_flag_clear_explicit(&toneview_g.flag, memory_order_release);
}
- if (!atomic_flag_test_and_set_explicit(
- &oscilloview_g.flag, memory_order_acquire)) {
- memcpy(oscilloview_g.oscillodata, g.oscillodata_audiothread, sizeof(oscilloview_g.oscillodata));
- atomic_flag_clear_explicit(&oscilloview_g.flag, memory_order_release);
+ if (g.oscillo_should_update) {
+ if (!atomic_flag_test_and_set_explicit(
+ &oscilloview_g.flag, memory_order_acquire)) {
+ memcpy(oscilloview_g.oscillodata, g.oscillodata_audiothread, sizeof(oscilloview_g.oscillodata));
+ atomic_flag_clear_explicit(&oscilloview_g.flag, memory_order_release);
+ }
}
return paContinue;
}
@@ -479,7 +484,14 @@ static void drag_data_recv_cb(
gtk_drag_finish(ctx, TRUE, FALSE, time);
}
+void opna_ssg_sinc_calc_neon(unsigned, const int16_t *, int32_t *);
+void fmdsp_vramlookup_neon(uint8_t *, const uint8_t *, const uint8_t *, int);
+
int main(int argc, char **argv) {
+#ifdef ENABLE_NEON
+ opna_ssg_sinc_calc_func = opna_ssg_sinc_calc_neon;
+ fmdsp_vramlookup_func = fmdsp_vramlookup_neon;
+#endif
load_fontrom();
gtk_init(&argc, &argv);
GtkWidget *w = gtk_window_new(GTK_WINDOW_TOPLEVEL);
diff --git a/libopna/opna.c b/libopna/opna.c
index 1b0aa6c..567c913 100644
--- a/libopna/opna.c
+++ b/libopna/opna.c
@@ -37,8 +37,9 @@ void opna_mix_oscillo(struct opna *opna, int16_t *buf, unsigned samples, struct
}
}
unsigned offset = OSCILLO_SAMPLE_COUNT - samples;
- opna_fm_mix(&opna->fm, buf, samples, &oscillo[0], offset);
- opna_ssg_mix_55466(&opna->ssg, &opna->resampler, buf, samples, &oscillo[6], offset);
+ opna_fm_mix(&opna->fm, buf, samples, oscillo ? &oscillo[0] : 0, offset);
+ opna_ssg_mix_55466(&opna->ssg, &opna->resampler, buf, samples,
+ oscillo ? &oscillo[6] : 0, offset);
opna_drum_mix(&opna->drum, buf, samples);
opna_adpcm_mix(&opna->adpcm, buf, samples);
}
diff --git a/libopna/opnassg-sinc-c.c b/libopna/opnassg-sinc-c.c
new file mode 100644
index 0000000..bf93039
--- /dev/null
+++ b/libopna/opnassg-sinc-c.c
@@ -0,0 +1,13 @@
+#include "libopna/opnassg.h"
+
+void opna_ssg_sinc_calc_c(unsigned resampler_index, const int16_t *inbuf, int32_t *outbuf) {
+ for (int c = 0; c < 3; c++) {
+ int32_t chsample = 0;
+ for (int j = 0; j < OPNA_SSG_SINCTABLELEN; j++) {
+ unsigned sincindex = j;
+ if (!(resampler_index&1)) sincindex += OPNA_SSG_SINCTABLELEN;
+ chsample += inbuf[(((resampler_index)>>1)+j)*3+c] * opna_ssg_sinctable[sincindex];
+ }
+ outbuf[c] = chsample;
+ }
+}
diff --git a/libopna/opnassg-sinc-neon.s b/libopna/opnassg-sinc-neon.s
new file mode 100644
index 0000000..19dc3c6
--- /dev/null
+++ b/libopna/opnassg-sinc-neon.s
@@ -0,0 +1,118 @@
+@ neon register map:
+@ 0, 3, 6, 9, 12, 15 ssg1
+@ 1, 4, 7, 10, 13, 16 ssg2
+@ 2, 5, 8, 11, 14, 17 ssg3
+@ 18, 19, 20, 21, 22, 23 sinc
+@ 24-25 (q12): ssg1 out
+@ 26-27 (q13): ssg2 out
+@ 28-29 (q14): ssg3 out
+
+.global opna_ssg_sinc_calc_neon
+@ r0: resampler_index
+@ r1: const int16_t *inbuf
+@ r2: int32_t *outbuf
+
+opna_ssg_sinc_calc_neon:
+ push {r4-r10,lr}
+@ sinc table to r3
+ movw r3, #:lower16:opna_ssg_sinctable
+ movt r3, #:upper16:opna_ssg_sinctable
+ tst r0, #1
+ addeq r3, #256
+
+@ add offset to ssg input buffer address
+ bic r0, #1
+ add r0, r0, lsl #1
+ add r1, r0
+
+@ initialize output register
+ vmov.i64 q12, #0
+ vmov.i64 q13, #0
+ vmov.i64 q14, #0
+
+@ sinc sample length
+ mov r0, #128
+
+.loop:
+@
+ subs r0, #24
+ blo .end
+
+@ load SSG channel data
+ vld3.16 {d0-d2}, [r1]!
+ vld3.16 {d3-d5}, [r1]!
+ vld3.16 {d6-d8}, [r1]!
+ vld3.16 {d9-d11}, [r1]!
+ vld3.16 {d12-d14}, [r1]!
+ vld3.16 {d15-d17}, [r1]!
+
+@ load sinc data
+ vld1.16 {d18-d21}, [r3]!
+ vld1.16 {d22-d23}, [r3]!
+
+@ multiply and accumulate
+ vmlal.s16 q12, d0, d18
+ vmlal.s16 q13, d1, d18
+ vmlal.s16 q14, d2, d18
+ vmlal.s16 q12, d3, d19
+ vmlal.s16 q13, d4, d19
+ vmlal.s16 q14, d5, d19
+ vmlal.s16 q12, d6, d20
+ vmlal.s16 q13, d7, d20
+ vmlal.s16 q14, d8, d20
+ vmlal.s16 q12, d9, d21
+ vmlal.s16 q13, d10, d21
+ vmlal.s16 q14, d11, d21
+ vmlal.s16 q12, d12, d22
+ vmlal.s16 q13, d13, d22
+ vmlal.s16 q14, d14, d22
+ vmlal.s16 q12, d15, d23
+ vmlal.s16 q13, d16, d23
+ vmlal.s16 q14, d17, d23
+ b .loop
+
+.end:
+@ 8 samples left
+ vld3.16 {d0-d2}, [r1]!
+ vld3.16 {d3-d5}, [r1]
+ vld1.16 {d18-d19}, [r3]
+
+ vmlal.s16 q12, d0, d18
+ vmlal.s16 q13, d1, d18
+ vmlal.s16 q14, d2, d18
+ vmlal.s16 q12, d3, d19
+ vmlal.s16 q13, d4, d19
+ vmlal.s16 q14, d5, d19
+
+@ extract data from result SIMD registers
+
+ vmov.32 r0, d24[0]
+ vmov.32 r1, d24[1]
+ vmov.32 r3, d25[0]
+ vmov.32 r12, d25[1]
+
+ vmov.32 r14, d26[0]
+ vmov.32 r4, d26[1]
+ vmov.32 r5, d27[0]
+ vmov.32 r6, d27[1]
+
+ vmov.32 r7, d28[0]
+ vmov.32 r8, d28[1]
+ vmov.32 r9, d29[0]
+ vmov.32 r10, d29[1]
+
+ add r0, r1
+ add r3, r12
+
+ add r14, r4
+ add r5, r6
+
+ add r7, r8
+ add r9, r10
+
+ add r4, r0, r3
+ add r5, r14
+ add r6, r7, r9
+
+ stmia r2, {r4-r6}
+ pop {r4-r10,pc}
diff --git a/libopna/opnassg.c b/libopna/opnassg.c
index ec03437..4a12f76 100644
--- a/libopna/opnassg.c
+++ b/libopna/opnassg.c
@@ -1,5 +1,6 @@
#include "opnassg.h"
#include "oscillo/oscillo.h"
+#include <string.h>
/*
static const float voltable[32] = {
0.0f, 0.0f, 0x1.ae89f9p-8f, 0x1.000000p-7f,
@@ -27,9 +28,6 @@ static const int16_t voltable[32] = {
6494, 7723, 9185, 10922
};
-#define SINCTABLEBIT 7
-#define SINCTABLELEN (1<<SINCTABLEBIT)
-
// GNU Octave
// Fc = 7987200
// Ff = Fc/144
@@ -39,7 +37,8 @@ static const int16_t voltable[32] = {
// B = 128 * O / 2
// FILTER=sinc(linspace(-127.5,127.5,256)*2/9/2).*rotdim(kaiser(256,B))
// FILTERI=round(FILTER(1:128).*32768)
-static const int16_t sinctable[SINCTABLELEN] = {
+#if 0
+const int16_t opna_ssg_sinctable[OPNA_SSG_SINCTABLELEN*2] = {
1, 0, -1, -2, -3, -5, -6, -6,
-6, -5, -2, 2, 7, 11, 16, 19,
20, 18, 13, 5, -5, -17, -29, -38,
@@ -56,8 +55,61 @@ static const int16_t sinctable[SINCTABLELEN] = {
3306, 3714, 3690, 3185, 2206, 815, -868, -2673,
-4391, -5798, -6670, -6809, -6067, -4359, -1681, 1886,
6178, 10957, 15928, 20765, 25133, 28724, 31275, 32600,
+ 32600, 31275, 28724, 25133, 20765, 15928, 10957, 6178,
+ 1886, -1681, -4359, -6067, -6809, -6670, -5798, -4391,
+ -2673, -868, 815, 2206, 3185, 3690, 3714, 3306,
+ 2557, 1585, 523, -498, -1365, -1994, -2333, -2369,
+ -2125, -1655, -1032, -343, 328, 902, 1322, 1552,
+ 1580, 1421, 1108, 692, 230, -220, -607, -889,
+ -1043, -1062, -954, -744, -464, -154, 147, 405,
+ 593, 694, 705, 632, 491, 306, 101, -96,
+ -264, -385, -450, -455, -406, -315, -195, -64,
+ 61, 166, 241, 280, 282, 251, 193, 119,
+ 39, -37, -100, -144, -166, -166, -146, -112,
+ -68, -22, 21, 56, 80, 91, 90, 79,
+ 60, 36, 12, -11, -29, -40, -45, -44,
+ -38, -29, -17, -5, 5, 13, 18, 20,
+ 19, 16, 11, 7, 2, -2, -5, -6,
+ -6, -6, -5, -3, -2, -1, 0, 1,
+};
+#endif
+const int16_t opna_ssg_sinctable[OPNA_SSG_SINCTABLELEN*2] = {
+ 1, -1, -3, -6, -6, -2, 7, 16,
+ 20, 13, -5, -29, -44, -40, -11, 36,
+ 79, 91, 56, -22, -112, -166, -144, -37,
+ 119, 251, 280, 166, -64, -315, -455, -385,
+ -96, 306, 632, 694, 405, -154, -744, -1062,
+ -889, -220, 692, 1421, 1552, 902, -343, -1655,
+ -2369, -1994, -498, 1585, 3306, 3690, 2206, -868,
+ -4391, -6670, -6067, -1681, 6178, 15928, 25133, 31275,
+ 32600, 28724, 20765, 10957, 1886, -4359, -6809, -5798,
+ -2673, 815, 3185, 3714, 2557, 523, -1365, -2333,
+ -2125, -1032, 328, 1322, 1580, 1108, 230, -607,
+ -1043, -954, -464, 147, 593, 705, 491, 101,
+ -264, -450, -406, -195, 61, 241, 282, 193,
+ 39, -100, -166, -146, -68, 21, 80, 90,
+ 60, 12, -29, -45, -38, -17, 5, 18,
+ 19, 11, 2, -5, -6, -5, -2, 0,
+ 0, -2, -5, -6, -5, 2, 11, 19,
+ 18, 5, -17, -38, -45, -29, 12, 60,
+ 90, 80, 21, -68, -146, -166, -100, 39,
+ 193, 282, 241, 61, -195, -406, -450, -264,
+ 101, 491, 705, 593, 147, -464, -954, -1043,
+ -607, 230, 1108, 1580, 1322, 328, -1032, -2125,
+ -2333, -1365, 523, 2557, 3714, 3185, 815, -2673,
+ -5798, -6809, -4359, 1886, 10957, 20765, 28724, 32600,
+ 31275, 25133, 15928, 6178, -1681, -6067, -6670, -4391,
+ -868, 2206, 3690, 3306, 1585, -498, -1994, -2369,
+ -1655, -343, 902, 1552, 1421, 692, -220, -889,
+ -1062, -744, -154, 405, 694, 632, 306, -96,
+ -385, -455, -315, -64, 166, 280, 251, 119,
+ -37, -144, -166, -112, -22, 56, 91, 79,
+ 36, -11, -40, -44, -29, -5, 13, 20,
+ 16, 7, -2, -6, -6, -3, -1, 1,
};
+opna_ssg_sinc_calc_func_type opna_ssg_sinc_calc_func = opna_ssg_sinc_calc_c;
+
void opna_ssg_reset(struct opna_ssg *ssg) {
for (int i = 0; i < 3; i++) {
ssg->ch[i].tone_counter = 0;
@@ -78,7 +130,7 @@ void opna_ssg_reset(struct opna_ssg *ssg) {
}
void opna_ssg_resampler_reset(struct opna_ssg_resampler *resampler) {
- for (int i = 0; i < SINCTABLELEN; i++) {
+ for (int i = 0; i < OPNA_SSG_SINCTABLELEN; i++) {
resampler->buf[i] = 0;
}
resampler->index = 0;
@@ -215,7 +267,7 @@ void opna_ssg_generate_raw(struct opna_ssg *ssg, int16_t *buf, int samples) {
}
}
-#define BUFINDEX(n) ((((resampler->index)>>1)+n)&(SINCTABLELEN-1))
+#define BUFINDEX(n) ((((resampler->index)>>1)+n)&(OPNA_SSG_SINCTABLELEN-1))
void opna_ssg_mix_55466(
struct opna_ssg *ssg, struct opna_ssg_resampler *resampler,
@@ -246,18 +298,13 @@ void opna_ssg_mix_55466(
resampler->index += 9;
}
int32_t sample = 0;
+ resampler->index &= (1u<<(OPNA_SSG_SINCTABLEBIT+1))-1;
+ memcpy(resampler->buf + OPNA_SSG_SINCTABLELEN*3, resampler->buf, OPNA_SSG_SINCTABLELEN*3*sizeof(*resampler->buf));
+ int32_t outbuf[3];
+ opna_ssg_sinc_calc_func(resampler->index, resampler->buf, outbuf);
for (int ch = 0; ch < 3; ch++) {
- int32_t chsample = 0;
- for (int j = 0; j < SINCTABLELEN; j++) {
- unsigned sincindex = j*2;
- if (!(resampler->index&1)) sincindex++;
- bool sincsign = sincindex & (1<<(SINCTABLEBIT));
- unsigned sincmask = ((1<<(SINCTABLEBIT))-1);
- sincindex = (sincindex & sincmask) ^ (sincsign ? sincmask : 0);
- chsample += (resampler->buf[BUFINDEX(j)*3+ch] * sinctable[sincindex])>>2;
- }
- if (oscillo) oscillo[ch].buf[offset+i] = chsample >> 13;
- if (!(ssg->mask & (1<<ch))) sample += chsample;
+ if (oscillo) oscillo[ch].buf[offset+i] = outbuf[ch] >> 15;
+ if (!(ssg->mask & (1<<ch))) sample += outbuf[ch] >> 2;
}
sample >>= 16;
sample *= 13000;
diff --git a/libopna/opnassg.h b/libopna/opnassg.h
index 0321163..231db4d 100644
--- a/libopna/opnassg.h
+++ b/libopna/opnassg.h
@@ -8,6 +8,9 @@
extern "C" {
#endif
+#define OPNA_SSG_SINCTABLEBIT 7
+#define OPNA_SSG_SINCTABLELEN (1<<OPNA_SSG_SINCTABLEBIT)
+
struct opna_ssg_ch {
uint16_t tone_counter;
bool out;
@@ -30,7 +33,7 @@ struct opna_ssg {
};
struct opna_ssg_resampler {
- int16_t buf[(1<<7)*3];
+ int16_t buf[OPNA_SSG_SINCTABLELEN*3 * 2];
unsigned index;
};
@@ -58,6 +61,14 @@ unsigned opna_ssg_readreg(const struct opna_ssg *ssg, unsigned reg);
int opna_ssg_channel_level(const struct opna_ssg *ssg, int ch);
unsigned opna_ssg_tone_period(const struct opna_ssg *ssg, int ch);
+typedef void (*opna_ssg_sinc_calc_func_type)(unsigned resampler_index,
+ const int16_t *inbuf, int32_t *outbuf);
+extern opna_ssg_sinc_calc_func_type opna_ssg_sinc_calc_func;
+void opna_ssg_sinc_calc_c(unsigned resampler_index,
+ const int16_t *inbuf, int32_t *outbuf);
+
+extern const int16_t opna_ssg_sinctable[OPNA_SSG_SINCTABLELEN*2];
+
#ifdef __cplusplus
}
#endif