diff options
author | Takamichi Horikawa <takamichiho@gmail.com> | 2017-03-31 07:15:28 +0000 |
---|---|---|
committer | Takamichi Horikawa <takamichiho@gmail.com> | 2017-03-31 07:15:28 +0000 |
commit | 5460067b61f86843a0435ebb06a6ebb8223c3dca (patch) | |
tree | f10ab9c06edc70c72c141e9d31035f47a776c7d1 /libopna/opnassg-sinc-neon.s | |
parent | f47eba7d7d4c6a1d9501e027b63bbab04bb7d417 (diff) |
opnassg: 3 -> 4 samples per frame to ease SSE data load
Diffstat (limited to 'libopna/opnassg-sinc-neon.s')
-rw-r--r-- | libopna/opnassg-sinc-neon.s | 17 |
1 files changed, 8 insertions, 9 deletions
diff --git a/libopna/opnassg-sinc-neon.s b/libopna/opnassg-sinc-neon.s index 19dc3c6..1396562 100644 --- a/libopna/opnassg-sinc-neon.s +++ b/libopna/opnassg-sinc-neon.s @@ -22,8 +22,7 @@ opna_ssg_sinc_calc_neon: @ add offset to ssg input buffer address bic r0, #1 - add r0, r0, lsl #1 - add r1, r0 + add r1, r0, lsl #2 @ initialize output register vmov.i64 q12, #0 @@ -39,12 +38,12 @@ opna_ssg_sinc_calc_neon: blo .end @ load SSG channel data - vld3.16 {d0-d2}, [r1]! - vld3.16 {d3-d5}, [r1]! - vld3.16 {d6-d8}, [r1]! - vld3.16 {d9-d11}, [r1]! - vld3.16 {d12-d14}, [r1]! - vld3.16 {d15-d17}, [r1]! + vld4.16 {d0-d3}, [r1]! + vld4.16 {d3-d6}, [r1]! + vld4.16 {d6-d9}, [r1]! + vld4.16 {d9-d12}, [r1]! + vld4.16 {d12-d15}, [r1]! + vld4.16 {d15-d18}, [r1]! @ load sinc data vld1.16 {d18-d21}, [r3]! @@ -73,7 +72,7 @@ opna_ssg_sinc_calc_neon: .end: @ 8 samples left - vld3.16 {d0-d2}, [r1]! + vld4.16 {d0-d3}, [r1]! vld3.16 {d3-d5}, [r1] vld1.16 {d18-d19}, [r3] |