aboutsummaryrefslogtreecommitdiff
path: root/libopna/opnassg-sinc-neon.s
diff options
context:
space:
mode:
authorTakamichi Horikawa <takamichiho@gmail.com>2017-03-31 07:15:28 +0000
committerTakamichi Horikawa <takamichiho@gmail.com>2017-03-31 07:15:28 +0000
commit5460067b61f86843a0435ebb06a6ebb8223c3dca (patch)
treef10ab9c06edc70c72c141e9d31035f47a776c7d1 /libopna/opnassg-sinc-neon.s
parentf47eba7d7d4c6a1d9501e027b63bbab04bb7d417 (diff)
opnassg: 3 -> 4 samples per frame to ease SSE data load
Diffstat (limited to 'libopna/opnassg-sinc-neon.s')
-rw-r--r--libopna/opnassg-sinc-neon.s17
1 files changed, 8 insertions, 9 deletions
diff --git a/libopna/opnassg-sinc-neon.s b/libopna/opnassg-sinc-neon.s
index 19dc3c6..1396562 100644
--- a/libopna/opnassg-sinc-neon.s
+++ b/libopna/opnassg-sinc-neon.s
@@ -22,8 +22,7 @@ opna_ssg_sinc_calc_neon:
@ add offset to ssg input buffer address
bic r0, #1
- add r0, r0, lsl #1
- add r1, r0
+ add r1, r0, lsl #2
@ initialize output register
vmov.i64 q12, #0
@@ -39,12 +38,12 @@ opna_ssg_sinc_calc_neon:
blo .end
@ load SSG channel data
- vld3.16 {d0-d2}, [r1]!
- vld3.16 {d3-d5}, [r1]!
- vld3.16 {d6-d8}, [r1]!
- vld3.16 {d9-d11}, [r1]!
- vld3.16 {d12-d14}, [r1]!
- vld3.16 {d15-d17}, [r1]!
+ vld4.16 {d0-d3}, [r1]!
+ vld4.16 {d3-d6}, [r1]!
+ vld4.16 {d6-d9}, [r1]!
+ vld4.16 {d9-d12}, [r1]!
+ vld4.16 {d12-d15}, [r1]!
+ vld4.16 {d15-d18}, [r1]!
@ load sinc data
vld1.16 {d18-d21}, [r3]!
@@ -73,7 +72,7 @@ opna_ssg_sinc_calc_neon:
.end:
@ 8 samples left
- vld3.16 {d0-d2}, [r1]!
+ vld4.16 {d0-d3}, [r1]!
vld3.16 {d3-d5}, [r1]
vld1.16 {d18-d19}, [r3]