From 5460067b61f86843a0435ebb06a6ebb8223c3dca Mon Sep 17 00:00:00 2001 From: Takamichi Horikawa Date: Fri, 31 Mar 2017 07:15:28 +0000 Subject: opnassg: 3 -> 4 samples per frame to ease SSE data load --- libopna/opnassg-sinc-neon.s | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'libopna/opnassg-sinc-neon.s') diff --git a/libopna/opnassg-sinc-neon.s b/libopna/opnassg-sinc-neon.s index 19dc3c6..1396562 100644 --- a/libopna/opnassg-sinc-neon.s +++ b/libopna/opnassg-sinc-neon.s @@ -22,8 +22,7 @@ opna_ssg_sinc_calc_neon: @ add offset to ssg input buffer address bic r0, #1 - add r0, r0, lsl #1 - add r1, r0 + add r1, r0, lsl #2 @ initialize output register vmov.i64 q12, #0 @@ -39,12 +38,12 @@ opna_ssg_sinc_calc_neon: blo .end @ load SSG channel data - vld3.16 {d0-d2}, [r1]! - vld3.16 {d3-d5}, [r1]! - vld3.16 {d6-d8}, [r1]! - vld3.16 {d9-d11}, [r1]! - vld3.16 {d12-d14}, [r1]! - vld3.16 {d15-d17}, [r1]! + vld4.16 {d0-d3}, [r1]! + vld4.16 {d3-d6}, [r1]! + vld4.16 {d6-d9}, [r1]! + vld4.16 {d9-d12}, [r1]! + vld4.16 {d12-d15}, [r1]! + vld4.16 {d15-d18}, [r1]! @ load sinc data vld1.16 {d18-d21}, [r3]! @@ -73,7 +72,7 @@ opna_ssg_sinc_calc_neon: .end: @ 8 samples left - vld3.16 {d0-d2}, [r1]! + vld4.16 {d0-d3}, [r1]! vld3.16 {d3-d5}, [r1] vld1.16 {d18-d19}, [r3] -- cgit v1.2.3