diff options
| -rw-r--r-- | fmdsp/fmdsp-vramlookup-c.c | 13 | ||||
| -rw-r--r-- | fmdsp/fmdsp-vramlookup-ssse3.c | 48 | ||||
| -rw-r--r-- | fmdsp/fmdsp.h | 3 | ||||
| -rw-r--r-- | gtk/Makefile.am | 21 | ||||
| -rw-r--r-- | gtk/configure.ac | 8 | ||||
| -rw-r--r-- | gtk/main.c | 32 | ||||
| -rw-r--r-- | libopna/opnassg.h | 2 | ||||
| -rw-r--r-- | win32/amd64/Makefile | 10 | ||||
| -rw-r--r-- | win32/fmplayer.mak | 4 | ||||
| -rw-r--r-- | win32/main.c | 79 | ||||
| -rw-r--r-- | win32/x86/Makefile | 6 | 
11 files changed, 181 insertions, 45 deletions
| diff --git a/fmdsp/fmdsp-vramlookup-c.c b/fmdsp/fmdsp-vramlookup-c.c index f900c8d..3d06f71 100644 --- a/fmdsp/fmdsp-vramlookup-c.c +++ b/fmdsp/fmdsp-vramlookup-c.c @@ -1,14 +1,17 @@  #include "fmdsp/fmdsp.h"  void fmdsp_vramlookup_c(uint8_t *vram32, const uint8_t *vram, const uint8_t *palette, int stride) { +  uint32_t palette32[FMDSP_PALETTE_COLORS]; +  for (int i = 0; i < FMDSP_PALETTE_COLORS; i++) { +    uint8_t r = palette[i*3+0]; +    uint8_t g = palette[i*3+1]; +    uint8_t b = palette[i*3+2]; +    palette32[i] = (((uint32_t)r)<<16) | (((uint32_t)g)<<8) | ((uint32_t)b); +  }    for (int y = 0; y < PC98_H; y++) {      for (int x = 0; x < PC98_W; x++) { -      uint8_t r = palette[vram[y*PC98_W+x]*3+0]; -      uint8_t g = palette[vram[y*PC98_W+x]*3+1]; -      uint8_t b = palette[vram[y*PC98_W+x]*3+2]; -      uint32_t data = (((uint32_t)r)<<16) | (((uint32_t)g)<<8) | ((uint32_t)b);        uint32_t *row = (uint32_t *)(vram32 + y*stride); -      row[x] = data; +      row[x] = palette32[vram[y*PC98_W+x]];      }    }  } diff --git a/fmdsp/fmdsp-vramlookup-ssse3.c b/fmdsp/fmdsp-vramlookup-ssse3.c new file mode 100644 index 0000000..30e7311 --- /dev/null +++ b/fmdsp/fmdsp-vramlookup-ssse3.c @@ -0,0 +1,48 @@ +#include "fmdsp/fmdsp.h" +#include <tmmintrin.h> + +void fmdsp_vramlookup_ssse3(uint8_t *vram32, const uint8_t *vram, const uint8_t *palette, int stride) { +  __m128i z = _mm_setzero_si128(); +  __m128i p[3]; +  { +    union { +      __m128i xmm; +      uint8_t u8[16]; +    } pi[3]; +    for (int i = 0; i < FMDSP_PALETTE_COLORS; i++) { +      for (int c = 0; c < 3; c++) { +        pi[c].u8[i] = palette[i*3+c]; +      } +    } +    for (int c = 0; c < 3; c++) { +      p[c] = _mm_load_si128(&pi[c].xmm); +    } +  } + +  for (int y = 0; y < PC98_H; y++) { +    for (int x = 0; x < 40; x++) { +      // 16 pixels +      __m128i v = _mm_loadu_si128((__m128i *)&vram[y*PC98_W+x*16]); + +      __m128i r = _mm_shuffle_epi8(p[0], v); +      __m128i g = _mm_shuffle_epi8(p[1], v); +      __m128i b = _mm_shuffle_epi8(p[2], v); + +      __m128i gb[2], zr[2]; +      gb[0] = _mm_unpacklo_epi8(b, g); +      gb[1] = _mm_unpackhi_epi8(b, g); +      zr[0] = _mm_unpacklo_epi8(r, z); +      zr[1] = _mm_unpackhi_epi8(r, z); +       +      __m128i o[4]; +      o[0] = _mm_unpacklo_epi16(gb[0], zr[0]); +      o[1] = _mm_unpackhi_epi16(gb[0], zr[0]); +      o[2] = _mm_unpacklo_epi16(gb[1], zr[1]); +      o[3] = _mm_unpackhi_epi16(gb[1], zr[1]); +      for (int i = 0; i < 4; i++) { +        _mm_storeu_si128((__m128i *)&vram32[(x*4+i)*16], o[i]); +      } +    } +    vram32 += stride; +  } +} diff --git a/fmdsp/fmdsp.h b/fmdsp/fmdsp.h index a7e4aab..a8c3edc 100644 --- a/fmdsp/fmdsp.h +++ b/fmdsp/fmdsp.h @@ -59,6 +59,9 @@ void fmdsp_vramlookup_c(uint8_t *vram32,                          const uint8_t *vram,                          const uint8_t *palette,                          int stride); + +void fmdsp_vramlookup_neon(uint8_t *, const uint8_t *, const uint8_t *, int); +void fmdsp_vramlookup_ssse3(uint8_t *, const uint8_t *, const uint8_t *, int);  #ifdef __cplusplus  }  #endif diff --git a/gtk/Makefile.am b/gtk/Makefile.am index 0a9a95b..5d75f84 100644 --- a/gtk/Makefile.am +++ b/gtk/Makefile.am @@ -18,12 +18,29 @@ FMDSP_SRC=../fmdsp/fmdsp.c \            ../fmdsp/font_rom.c \            ../fmdsp/font_fmdsp_small.c +#fmplayer_CFLAGS=$(CFLAGS) +#CFLAGS= +fmplayer_CPPFLAGS=-Wall -Wextra -pedantic \ +                  -I.. \ +                  $(GTK3_CFLAGS) $(PORTAUDIO_CFLAGS) +fmplayer_LDADD=$(GTK3_LIBS) $(PORTAUDIO_LIBS) +  if ENABLE_NEON  LIBOPNA_SRC+=../libopna/opnassg-sinc-neon.s  FMDSP_SRC+=../fmdsp/fmdsp-vramlookup-neon.s  fmplayer_CCASFLAGS=-march=armv8-a -mfpu=crypto-neon-fp-armv8  endif +if ENABLE_SSE +noinst_LIBRARIES=libsse.a +fmplayer_LDADD+=libsse.a +libsse_a_SOURCES=../libopna/opnassg-sinc-sse2.c \ +                 ../fmdsp/fmdsp-vramlookup-ssse3.c +libsse_a_CPPFLAGS=$(fmplayer_CPPFLAGS) +#no way to add -O3?? (always overridden by CFLAGS) +libsse_a_CFLAGS=-mssse3 +endif +  fmplayer_SOURCES=main.c \                   toneview.c \                   oscilloview.c \ @@ -34,7 +51,3 @@ fmplayer_SOURCES=main.c \                   $(FMDRIVER_SRC) \                   $(FMDSP_SRC) -fmplayer_CPPFLAGS=-Wall -Wextra -pedantic \ -                  -I.. \ -                  $(GTK3_CFLAGS) $(PORTAUDIO_CFLAGS) -fmplayer_LDADD=$(GTK3_LIBS) $(PORTAUDIO_LIBS) diff --git a/gtk/configure.ac b/gtk/configure.ac index 2727888..f49bd74 100644 --- a/gtk/configure.ac +++ b/gtk/configure.ac @@ -2,6 +2,8 @@ AC_INIT([fmplayer], [0.1.0])  AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects])  AM_SILENT_RULES([yes])  AC_PROG_CC_C99 +AC_PROG_RANLIB +AM_PROG_AR  AM_PROG_AS  dnl AM_PATH_SDL2([2.0.5]) @@ -14,5 +16,11 @@ AS_IF([test "x$enable_neon" = "xyes"], [    AC_DEFINE([ENABLE_NEON])  ]) +AC_CHECK_HEADER([emmintrin.h], [emmintrin_found=yes]) +AM_CONDITIONAL([ENABLE_SSE], [test "x$emmintrin_found" = "xyes"]) +AS_IF([test "x$emmintrin_found" = "xyes"], [ +  AC_DEFINE([ENABLE_SSE]) +]) +  AC_CONFIG_FILES([Makefile])  AC_OUTPUT @@ -55,7 +55,9 @@ static struct {    const char *current_uri;    bool oscillo_should_update;    struct oscillodata oscillodata_audiothread[LIBOPNA_OSCILLO_TRACK_COUNT]; -} g; +} g = { +  .oscillo_should_update = true +};  static void quit(void) {    if (g.pastream) { @@ -67,18 +69,26 @@ static void quit(void) {  }  static void on_destroy(GtkWidget *w, gpointer ptr) { +  (void)w; +  (void)ptr;    quit();  }  static void on_menu_quit(GtkMenuItem *menuitem, gpointer ptr) { +  (void)menuitem; +  (void)ptr;    quit();  }  static void on_tone_view(GtkMenuItem *menuitem, gpointer ptr) { +  (void)menuitem; +  (void)ptr;    show_toneview();  }  static void on_oscillo_view(GtkMenuItem *menuitem, gpointer ptr) { +  (void)menuitem; +  (void)ptr;    show_oscilloview();  } @@ -95,6 +105,9 @@ static int pastream_cb(const void *inptr, void *outptr, unsigned long frames,                         const PaStreamCallbackTimeInfo *timeinfo,                         PaStreamCallbackFlags statusFlags,                         void *userdata) { +  (void)inptr; +  (void)timeinfo; +  (void)statusFlags;    struct opna_timer *timer = (struct opna_timer *)userdata;    int16_t *buf = (int16_t *)outptr;    memset(outptr, 0, sizeof(int16_t)*frames*2); @@ -133,7 +146,8 @@ static void opna_writereg_libopna(struct fmdriver_work *work, unsigned addr, uns  }  static unsigned opna_readreg_libopna(struct fmdriver_work *work, unsigned addr) { -  struct opna_timer *timer = (struct opna_timer *)work->opna; +  (void)work; +  //struct opna_timer *timer = (struct opna_timer *)work->opna;    return opna_readreg(&g.opna, addr);  } @@ -280,7 +294,7 @@ static bool openfile(const char *uri) {    g.pa_paused = false;    {      const char *turi = strdup(uri); -    free(g.current_uri); +    free((void *)g.current_uri);      g.current_uri = turi;    }    return true; @@ -290,6 +304,7 @@ err:  }  static void on_file_activated(GtkFileChooser *chooser, gpointer ptr) { +  (void)ptr;    gchar *filename = gtk_file_chooser_get_uri(chooser);    if (filename) {      openfile(filename); @@ -326,6 +341,8 @@ static GtkWidget *create_menubar() {  static gboolean draw_cb(GtkWidget *w,                   cairo_t *cr,                   gpointer p) { +  (void)w; +  (void)p;    fmdsp_update(&g.fmdsp, &g.work, &g.opna, g.vram);    fmdsp_vrampalette(&g.fmdsp, g.vram, g.vram32, g.vram32_stride);    cairo_surface_t *s = cairo_image_surface_create_for_data( @@ -341,6 +358,7 @@ static gboolean draw_cb(GtkWidget *w,  static gboolean tick_cb(GtkWidget *w,                          GdkFrameClock *frame_clock,                          gpointer p) { +  (void)w;    (void)frame_clock;    gtk_widget_queue_draw(GTK_WIDGET(p));    return G_SOURCE_CONTINUE; @@ -472,6 +490,7 @@ static void drag_data_recv_cb(    gint x, gint y,    GtkSelectionData *data,    guint info, guint time, gpointer ptr) { +  (void)w;    (void)x;    (void)y;    (void)info; @@ -484,14 +503,15 @@ static void drag_data_recv_cb(    gtk_drag_finish(ctx, TRUE, FALSE, time);  } -void opna_ssg_sinc_calc_neon(unsigned, const int16_t *, int32_t *); -void fmdsp_vramlookup_neon(uint8_t *, const uint8_t *, const uint8_t *, int); -  int main(int argc, char **argv) {  #ifdef ENABLE_NEON    opna_ssg_sinc_calc_func = opna_ssg_sinc_calc_neon;    fmdsp_vramlookup_func = fmdsp_vramlookup_neon;  #endif +#ifdef ENABLE_SSE +  if (__builtin_cpu_supports("sse2")) opna_ssg_sinc_calc_func = opna_ssg_sinc_calc_sse2; +  if (__builtin_cpu_supports("ssse3")) fmdsp_vramlookup_func = fmdsp_vramlookup_ssse3; +#endif    load_fontrom();    gtk_init(&argc, &argv);    GtkWidget *w = gtk_window_new(GTK_WINDOW_TOPLEVEL); diff --git a/libopna/opnassg.h b/libopna/opnassg.h index 223d542..aadf53e 100644 --- a/libopna/opnassg.h +++ b/libopna/opnassg.h @@ -66,6 +66,8 @@ typedef void (*opna_ssg_sinc_calc_func_type)(unsigned resampler_index,  extern opna_ssg_sinc_calc_func_type opna_ssg_sinc_calc_func;  void opna_ssg_sinc_calc_c(unsigned resampler_index,                            const int16_t *inbuf, int32_t *outbuf); +void opna_ssg_sinc_calc_neon(unsigned, const int16_t *, int32_t *); +void opna_ssg_sinc_calc_sse2(unsigned, const int16_t *, int32_t *);  extern const int16_t opna_ssg_sinctable[OPNA_SSG_SINCTABLELEN*2]; diff --git a/win32/amd64/Makefile b/win32/amd64/Makefile index 177ef90..22ef073 100644 --- a/win32/amd64/Makefile +++ b/win32/amd64/Makefile @@ -9,14 +9,16 @@ vpath %.rc ..  include ../fmplayer.mak  OBJS=$(addsuffix .o,$(OBJBASE) $(RESBASE)) +OBJS+=$(addsuffix .sse.o,$(SSEOBJBASE))  ARCH=x86_64  PREFIX=$(ARCH)-w64-mingw32-  CC=$(PREFIX)gcc  WINDRES=$(PREFIX)windres  STRIP=$(PREFIX)strip -CFLAGS=-std=c99 -O2 -Wall -Wextra -Werror -pedantic -Wno-unused-parameter -Wno-missing-field-initializers -I../.. \ +CFLAGS=-std=c99 -O2 -Wall -Werror -Wextra -pedantic -Wno-unused-parameter -Wno-missing-field-initializers -I../.. \         $(addprefix -D,$(DEFINES)) -LIBS=-s -mwindows -municode \ +SSECFLAGS=-mssse3 -O3 +LIBS=-mwindows -municode \       $(addprefix -l,$(LIBBASE))  $(TARGET):	$(OBJS) @@ -28,6 +30,10 @@ $(TARGET):	$(OBJS)  	@echo "  CC       $@"  	@$(CC) $(CFLAGS) -c $< -o $@ +%.sse.o:	%.c +	@echo "  CC       $@" +	@$(CC) $(CFLAGS) $(SSECFLAGS) -c $< -o $@ +  %.o:	%.rc $(ICON)  	@echo "  WINDRES  $@"  	@$(WINDRES) -o $@ -i $< diff --git a/win32/fmplayer.mak b/win32/fmplayer.mak index 1f68f4d..180de1c 100644 --- a/win32/fmplayer.mak +++ b/win32/fmplayer.mak @@ -15,12 +15,16 @@ LIBOPNA_OBJS=opna \               opnatimer \               opnafm \               opnassg \ +             opnassg-sinc-c \               opnadrum \               opnaadpcm  FMDSP_OBJS=fmdsp \ +           fmdsp-vramlookup-c \             font_rom \             font_fmdsp_small  TONEDATA_OBJS=tonedata +SSEOBJBASE=opnassg-sinc-sse2 \ +           fmdsp-vramlookup-ssse3  OBJBASE=main \          toneview \          oscilloview \ diff --git a/win32/main.c b/win32/main.c index ab038ae..8dc6bb8 100644 --- a/win32/main.c +++ b/win32/main.c @@ -52,6 +52,7 @@ static struct {    struct fmplayer_file *fmfile;    struct fmdsp fmdsp;    uint8_t vram[PC98_W*PC98_H]; +  //uint8_t *vram;    struct fmdsp_font font;    uint8_t fontrom[FONT_ROM_FILESIZE];    bool font_loaded; @@ -66,6 +67,8 @@ static struct {    bool fmdsp_2x;    struct oscillodata oscillodata_audiothread[LIBOPNA_OSCILLO_TRACK_COUNT];    UINT mmtimer; +  HBITMAP bitmap_vram; +  uint8_t *vram32;  } g;  HWND g_currentdlg; @@ -445,6 +448,20 @@ static void CALLBACK mmtimer_cb(UINT timerid, UINT msg,  static bool on_create(HWND hwnd, CREATESTRUCT *cs) {    (void)cs; +  struct bitmap_info_fmdsp { +    BITMAPINFOHEADER head; +    RGBQUAD colors[FMDSP_PALETTE_COLORS]; +  } bmi = {0}; +  bmi.head.biSize = sizeof(bmi.head); +  bmi.head.biWidth = PC98_W; +  bmi.head.biHeight = -PC98_H; +  bmi.head.biPlanes = 1; +  bmi.head.biBitCount = 32; +  bmi.head.biCompression = BI_RGB; +  //bmi.head.biClrUsed = FMDSP_PALETTE_COLORS; +  g.bitmap_vram = CreateDIBSection( +    0, (BITMAPINFO *)&bmi, DIB_RGB_COLORS, (void **)&g.vram32, 0, 0 +  );    HWND button = CreateWindowEx(      0,      L"BUTTON", @@ -563,40 +580,26 @@ static void on_destroy(HWND hwnd) {  static void on_paint(HWND hwnd) {    fmdsp_update(&g.fmdsp, &g.work, &g.opna, g.vram); +  fmdsp_vrampalette(&g.fmdsp, g.vram, g.vram32, PC98_W*4);    PAINTSTRUCT ps; -  static BITMAPINFO *bi = 0; -  if (!bi) { -    bi = HeapAlloc(g.heap, HEAP_ZERO_MEMORY, -              sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD)*FMDSP_PALETTE_COLORS); -    if (!bi) return; -    bi->bmiHeader.biSize = sizeof(bi->bmiHeader); -    bi->bmiHeader.biWidth = PC98_W; -    bi->bmiHeader.biHeight = -PC98_H; -    bi->bmiHeader.biPlanes = 1; -    bi->bmiHeader.biBitCount = 8; -    bi->bmiHeader.biCompression = BI_RGB; -    bi->bmiHeader.biClrUsed = FMDSP_PALETTE_COLORS; -  } -  for (int p = 0; p < FMDSP_PALETTE_COLORS; p++) { -    bi->bmiColors[p].rgbRed = g.fmdsp.palette[p*3+0]; -    bi->bmiColors[p].rgbGreen = g.fmdsp.palette[p*3+1]; -    bi->bmiColors[p].rgbBlue = g.fmdsp.palette[p*3+2]; -  }    HDC dc = BeginPaint(hwnd, &ps);    HDC mdc = CreateCompatibleDC(dc); -  HBITMAP bitmap = CreateDIBitmap( -    dc, -    &bi->bmiHeader, CBM_INIT, -    g.vram, -    bi, DIB_RGB_COLORS); -  SelectObject(mdc, bitmap); +  SelectObject(mdc, g.bitmap_vram); +  /* +  RGBQUAD palette[FMDSP_PALETTE_COLORS]; +  for (int p = 0; p < FMDSP_PALETTE_COLORS; p++) { +    palette[p].rgbRed = g.fmdsp.palette[p*3+0]; +    palette[p].rgbGreen = g.fmdsp.palette[p*3+1]; +    palette[p].rgbBlue = g.fmdsp.palette[p*3+2]; +  } +  SetDIBColorTable(mdc, 0, FMDSP_PALETTE_COLORS, palette); +  */    if (g.fmdsp_2x) {      StretchBlt(dc, 0, 80, 1280, 800, mdc, 0, 0, 640, 400, SRCCOPY);    } else {      BitBlt(dc, 0, 80, 640, 400, mdc, 0, 0, SRCCOPY);    }    DeleteDC(mdc); -  DeleteObject(bitmap);    EndPaint(hwnd, &ps);  } @@ -665,8 +668,20 @@ static LRESULT CALLBACK wndproc(    HANDLE_MSG(hwnd, WM_SYSKEYUP, on_syskey);    HANDLE_MSG(hwnd, WM_ACTIVATE, on_activate);    case WM_USER: -    InvalidateRect(hwnd, 0, FALSE); -    return 0; +    { +      RECT r = { +        .left = 0, +        .top = 80, +        .right = 640, +        .bottom = 480, +      }; +      if (g.fmdsp_2x) { +        r.right = 1280; +        r.bottom = 880; +      } +      InvalidateRect(hwnd, &r, FALSE); +      return 0; +    }    }    return DefWindowProc(hwnd, msg, wParam, lParam);  } @@ -688,6 +703,9 @@ int CALLBACK wWinMain(HINSTANCE hinst, HINSTANCE hpinst,    (void)hpinst;    (void)cmdline_; +  if (__builtin_cpu_supports("sse2")) opna_ssg_sinc_calc_func = opna_ssg_sinc_calc_sse2; +  if (__builtin_cpu_supports("ssse3")) fmdsp_vramlookup_func = fmdsp_vramlookup_ssse3; +    const wchar_t *argfile = 0;    {      wchar_t *cmdline = GetCommandLine(); @@ -725,9 +743,14 @@ int CALLBACK wWinMain(HINSTANCE hinst, HINSTANCE hpinst,    wr.top = 0;    wr.bottom = 480;    AdjustWindowRectEx(&wr, style, 0, exStyle); +#ifdef _WIN64 +#define WIN64STR "(amd64)" +#else +#define WIN64STR "" +#endif    g.mainwnd = CreateWindowEx(      exStyle, -    (wchar_t*)((uintptr_t)wcatom), L"FMPlayer/Win32 v" FMPLAYER_VERSION_STR, +    (wchar_t*)((uintptr_t)wcatom), L"FMPlayer/Win32 " WIN64STR " v" FMPLAYER_VERSION_STR,      style,      CW_USEDEFAULT, CW_USEDEFAULT,      wr.right-wr.left, wr.bottom-wr.top, diff --git a/win32/x86/Makefile b/win32/x86/Makefile index b26f3e1..6146d28 100644 --- a/win32/x86/Makefile +++ b/win32/x86/Makefile @@ -9,6 +9,7 @@ vpath %.rc ..  include ../fmplayer.mak  OBJS=$(addsuffix .o,$(OBJBASE) $(RESBASE)) +OBJS+=$(addsuffix .sse.o,$(SSEOBJBASE))  ARCH=i686  PREFIX=$(ARCH)-w64-mingw32-  CC=$(PREFIX)gcc @@ -17,6 +18,7 @@ STRIP=$(PREFIX)strip  CFLAGS=-std=c99 -O2 -Wall -Wextra -Werror -pedantic -I../.. \         $(addprefix -D,$(DEFINES)) \         -march=i586 -Wno-unused-parameter -Wno-missing-field-initializers +SSECFLAGS=-mssse3 -O3  LIBS=-s -mwindows -municode \       $(addprefix -l,$(LIBBASE)) @@ -29,6 +31,10 @@ $(TARGET):	$(OBJS)  	@echo "  CC       $@"  	@$(CC) $(CFLAGS) -c $< -o $@ +%.sse.o:	%.c +	@echo "  CC       $@" +	@$(CC) $(CFLAGS) $(SSECFLAGS) -c $< -o $@ +  %.o:	%.rc $(ICON)  	@echo "  WINDRES  $@"  	@$(WINDRES) -o $@ -i $< | 
