annotate arm/simple_idct_neon.S @ 10163:24c03529ccbe libavcodec

Fix a typo in the documentation. Patch by Lars T¸«£uber: firstname taeuber gmx net
author benoit
date Fri, 11 Sep 2009 06:20:05 +0000
parents c65cfd4ad000
children be725249ea67
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
1 /*
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
2 * ARM NEON IDCT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
3 *
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
4 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
5 *
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
6 * Based on Simple IDCT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
7 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
8 *
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
9 * This file is part of FFmpeg.
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
10 *
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
11 * FFmpeg is free software; you can redistribute it and/or
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
12 * modify it under the terms of the GNU Lesser General Public
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
13 * License as published by the Free Software Foundation; either
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
14 * version 2.1 of the License, or (at your option) any later version.
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
15 *
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
16 * FFmpeg is distributed in the hope that it will be useful,
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
19 * Lesser General Public License for more details.
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
20 *
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
21 * You should have received a copy of the GNU Lesser General Public
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
22 * License along with FFmpeg; if not, write to the Free Software
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
24 */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
25
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
26 #include "asm.S"
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
27
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
28 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
29 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
30 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
31 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
32 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
33 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
34 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
35 #define W4c ((1<<(COL_SHIFT-1))/W4)
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
36 #define ROW_SHIFT 11
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
37 #define COL_SHIFT 20
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
38
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
39 #define w1 d0[0]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
40 #define w2 d0[1]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
41 #define w3 d0[2]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
42 #define w4 d0[3]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
43 #define w5 d1[0]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
44 #define w6 d1[1]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
45 #define w7 d1[2]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
46 #define w4c d1[3]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
47
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
48 .fpu neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
49
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
50 .macro idct_col4_top
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
51 vmull.s16 q7, d6, w2 /* q9 = W2 * col[2] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
52 vmull.s16 q8, d6, w6 /* q10 = W6 * col[2] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
53 vmull.s16 q9, d4, w1 /* q9 = W1 * col[1] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
54 vadd.i32 q11, q15, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
55 vmull.s16 q10, d4, w3 /* q10 = W3 * col[1] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
56 vadd.i32 q12, q15, q8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
57 vmull.s16 q5, d4, w5 /* q5 = W5 * col[1] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
58 vsub.i32 q13, q15, q8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
59 vmull.s16 q6, d4, w7 /* q6 = W7 * col[1] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
60 vsub.i32 q14, q15, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
61
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
62 vmlal.s16 q9, d8, w3 /* q9 += W3 * col[3] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
63 vmlsl.s16 q10, d8, w7 /* q10 -= W7 * col[3] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
64 vmlsl.s16 q5, d8, w1 /* q5 -= W1 * col[3] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
65 vmlsl.s16 q6, d8, w5 /* q6 -= W5 * col[3] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
66 .endm
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
67
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
68 .text
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
69 .align 6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
70
9724
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
71 function idct_row4_pld_neon
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
72 pld [r0]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
73 add r3, r0, r1, lsl #2
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
74 pld [r0, r1]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
75 pld [r0, r1, lsl #1]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
76 pld [r3, -r1]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
77 pld [r3]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
78 pld [r3, r1]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
79 add r3, r3, r1, lsl #1
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
80 pld [r3]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
81 pld [r3, r1]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
82 .endfunc
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
83
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
84 function idct_row4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
85 vmov.i32 q15, #(1<<(ROW_SHIFT-1))
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
86 vld1.64 {d2-d5}, [r2,:128]!
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
87 vmlal.s16 q15, d2, w4 /* q15 += W4 * col[0] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
88 vld1.64 {d6,d7}, [r2,:128]!
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
89 vorr d10, d3, d5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
90 vld1.64 {d8,d9}, [r2,:128]!
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
91 add r2, r2, #-64
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
92
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
93 vorr d11, d7, d9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
94 vorr d10, d10, d11
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
95 vmov r3, r4, d10
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
96
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
97 idct_col4_top
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
98
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
99 orrs r3, r3, r4
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
100 beq 1f
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
101
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
102 vmull.s16 q7, d3, w4 /* q7 = W4 * col[4] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
103 vmlal.s16 q9, d5, w5 /* q9 += W5 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
104 vmlsl.s16 q10, d5, w1 /* q10 -= W1 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
105 vmull.s16 q8, d7, w2 /* q8 = W2 * col[6] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
106 vmlal.s16 q5, d5, w7 /* q5 += W7 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
107 vadd.i32 q11, q11, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
108 vsub.i32 q12, q12, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
109 vsub.i32 q13, q13, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
110 vadd.i32 q14, q14, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
111 vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
112 vmull.s16 q7, d7, w6 /* q7 = W6 * col[6] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
113 vmlal.s16 q9, d9, w7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
114 vmlsl.s16 q10, d9, w5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
115 vmlal.s16 q5, d9, w3
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
116 vmlsl.s16 q6, d9, w1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
117 vadd.i32 q11, q11, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
118 vsub.i32 q12, q12, q8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
119 vadd.i32 q13, q13, q8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
120 vsub.i32 q14, q14, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
121
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
122 1: vadd.i32 q3, q11, q9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
123 vadd.i32 q4, q12, q10
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
124 vshrn.i32 d2, q3, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
125 vshrn.i32 d4, q4, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
126 vadd.i32 q7, q13, q5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
127 vadd.i32 q8, q14, q6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
128 vtrn.16 d2, d4
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
129 vshrn.i32 d6, q7, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
130 vshrn.i32 d8, q8, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
131 vsub.i32 q14, q14, q6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
132 vsub.i32 q11, q11, q9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
133 vtrn.16 d6, d8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
134 vsub.i32 q13, q13, q5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
135 vshrn.i32 d3, q14, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
136 vtrn.32 d2, d6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
137 vsub.i32 q12, q12, q10
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
138 vtrn.32 d4, d8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
139 vshrn.i32 d5, q13, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
140 vshrn.i32 d7, q12, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
141 vshrn.i32 d9, q11, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
142
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
143 vtrn.16 d3, d5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
144 vtrn.16 d7, d9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
145 vtrn.32 d3, d7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
146 vtrn.32 d5, d9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
147
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
148 vst1.64 {d2-d5}, [r2,:128]!
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
149 vst1.64 {d6-d9}, [r2,:128]!
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
150
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
151 bx lr
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
152 .endfunc
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
153
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
154 function idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
155 mov ip, #16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
156 vld1.64 {d2}, [r2,:64], ip /* d2 = col[0] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
157 vdup.16 d30, w4c
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
158 vld1.64 {d4}, [r2,:64], ip /* d3 = col[1] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
159 vadd.i16 d30, d30, d2
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
160 vld1.64 {d6}, [r2,:64], ip /* d4 = col[2] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
161 vmull.s16 q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
162 vld1.64 {d8}, [r2,:64], ip /* d5 = col[3] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
163
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
164 ldrd r4, [r2]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
165 ldrd r6, [r2, #16]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
166 orrs r4, r4, r5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
167
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
168 idct_col4_top
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
169 addeq r2, r2, #16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
170 beq 1f
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
171
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
172 vld1.64 {d3}, [r2,:64], ip /* d6 = col[4] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
173 vmull.s16 q7, d3, w4 /* q7 = W4 * col[4] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
174 vadd.i32 q11, q11, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
175 vsub.i32 q12, q12, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
176 vsub.i32 q13, q13, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
177 vadd.i32 q14, q14, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
178
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
179 1: orrs r6, r6, r7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
180 ldrd r4, [r2, #16]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
181 addeq r2, r2, #16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
182 beq 2f
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
183
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
184 vld1.64 {d5}, [r2,:64], ip /* d7 = col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
185 vmlal.s16 q9, d5, w5 /* q9 += W5 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
186 vmlsl.s16 q10, d5, w1 /* q10 -= W1 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
187 vmlal.s16 q5, d5, w7 /* q5 += W7 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
188 vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
189
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
190 2: orrs r4, r4, r5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
191 ldrd r4, [r2, #16]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
192 addeq r2, r2, #16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
193 beq 3f
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
194
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
195 vld1.64 {d7}, [r2,:64], ip /* d8 = col[6] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
196 vmull.s16 q7, d7, w6 /* q7 = W6 * col[6] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
197 vmull.s16 q8, d7, w2 /* q8 = W2 * col[6] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
198 vadd.i32 q11, q11, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
199 vsub.i32 q14, q14, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
200 vsub.i32 q12, q12, q8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
201 vadd.i32 q13, q13, q8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
202
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
203 3: orrs r4, r4, r5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
204 addeq r2, r2, #16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
205 beq 4f
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
206
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
207 vld1.64 {d9}, [r2,:64], ip /* d9 = col[7] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
208 vmlal.s16 q9, d9, w7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
209 vmlsl.s16 q10, d9, w5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
210 vmlal.s16 q5, d9, w3
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
211 vmlsl.s16 q6, d9, w1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
212
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
213 4: vaddhn.i32 d2, q11, q9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
214 vaddhn.i32 d3, q12, q10
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
215 vaddhn.i32 d4, q13, q5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
216 vaddhn.i32 d5, q14, q6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
217 vsubhn.i32 d9, q11, q9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
218 vsubhn.i32 d8, q12, q10
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
219 vsubhn.i32 d7, q13, q5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
220 vsubhn.i32 d6, q14, q6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
221
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
222 bx lr
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
223 .endfunc
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
224
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
225 .align 6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
226
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
227 function idct_col4_st8_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
228 vqshrun.s16 d2, q1, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
229 vqshrun.s16 d3, q2, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
230 vqshrun.s16 d4, q3, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
231 vqshrun.s16 d5, q4, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
232 vst1.32 {d2[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
233 vst1.32 {d2[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
234 vst1.32 {d3[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
235 vst1.32 {d3[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
236 vst1.32 {d4[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
237 vst1.32 {d4[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
238 vst1.32 {d5[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
239 vst1.32 {d5[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
240
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
241 bx lr
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
242 .endfunc
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
243
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
244 .section .rodata
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
245 .align 4
8506
1a629032b24a ARM: rename coefficient table in NEON IDCT
mru
parents: 8359
diff changeset
246 idct_coeff_neon:
1a629032b24a ARM: rename coefficient table in NEON IDCT
mru
parents: 8359
diff changeset
247 .short W1, W2, W3, W4, W5, W6, W7, W4c
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
248 .previous
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
249
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
250 .macro idct_start data
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
251 push {r4-r7, lr}
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
252 pld [\data]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
253 pld [\data, #64]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
254 vpush {d8-d15}
8507
779a9c93bf61 ARM: work around linker bug with movw/movt relocations in shared libs
mru
parents: 8506
diff changeset
255 movrel r3, idct_coeff_neon
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
256 vld1.64 {d0,d1}, [r3,:128]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
257 .endm
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
258
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
259 .macro idct_end
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
260 vpop {d8-d15}
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
261 pop {r4-r7, pc}
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
262 .endm
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
263
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
264 /* void ff_simple_idct_put_neon(uint8_t *dst, int line_size, DCTELEM *data); */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
265 function ff_simple_idct_put_neon, export=1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
266 idct_start r2
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
267
9724
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
268 bl idct_row4_pld_neon
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
269 bl idct_row4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
270 add r2, r2, #-128
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
271 bl idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
272 bl idct_col4_st8_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
273 sub r0, r0, r1, lsl #3
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
274 add r0, r0, #4
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
275 add r2, r2, #-120
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
276 bl idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
277 bl idct_col4_st8_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
278
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
279 idct_end
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
280 .endfunc
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
281
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
282 .align 6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
283
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
284 function idct_col4_add8_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
285 mov ip, r0
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
286
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
287 vld1.32 {d10[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
288 vshr.s16 q1, q1, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
289 vld1.32 {d10[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
290 vshr.s16 q2, q2, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
291 vld1.32 {d11[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
292 vshr.s16 q3, q3, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
293 vld1.32 {d11[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
294 vshr.s16 q4, q4, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
295 vld1.32 {d12[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
296 vaddw.u8 q1, q1, d10
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
297 vld1.32 {d12[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
298 vaddw.u8 q2, q2, d11
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
299 vld1.32 {d13[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
300 vqmovun.s16 d2, q1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
301 vld1.32 {d13[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
302 vaddw.u8 q3, q3, d12
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
303 vst1.32 {d2[0]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
304 vqmovun.s16 d3, q2
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
305 vst1.32 {d2[1]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
306 vaddw.u8 q4, q4, d13
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
307 vst1.32 {d3[0]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
308 vqmovun.s16 d4, q3
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
309 vst1.32 {d3[1]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
310 vqmovun.s16 d5, q4
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
311 vst1.32 {d4[0]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
312 vst1.32 {d4[1]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
313 vst1.32 {d5[0]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
314 vst1.32 {d5[1]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
315
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
316 bx lr
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
317 .endfunc
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
318
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
319 /* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
320 function ff_simple_idct_add_neon, export=1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
321 idct_start r2
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
322
9724
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
323 bl idct_row4_pld_neon
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
324 bl idct_row4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
325 add r2, r2, #-128
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
326 bl idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
327 bl idct_col4_add8_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
328 sub r0, r0, r1, lsl #3
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
329 add r0, r0, #4
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
330 add r2, r2, #-120
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
331 bl idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
332 bl idct_col4_add8_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
333
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
334 idct_end
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
335 .endfunc
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
336
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
337 .align 6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
338
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
339 function idct_col4_st16_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
340 mov ip, #16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
341
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
342 vshr.s16 q1, q1, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
343 vshr.s16 q2, q2, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
344 vst1.64 {d2}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
345 vshr.s16 q3, q3, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
346 vst1.64 {d3}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
347 vshr.s16 q4, q4, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
348 vst1.64 {d4}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
349 vst1.64 {d5}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
350 vst1.64 {d6}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
351 vst1.64 {d7}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
352 vst1.64 {d8}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
353 vst1.64 {d9}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
354
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
355 bx lr
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
356 .endfunc
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
357
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
358 /* void ff_simple_idct_neon(DCTELEM *data); */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
359 function ff_simple_idct_neon, export=1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
360 idct_start r0
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
361
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
362 mov r2, r0
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
363 bl idct_row4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
364 bl idct_row4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
365 add r2, r2, #-128
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
366 bl idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
367 add r2, r2, #-128
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
368 bl idct_col4_st16_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
369 add r2, r2, #-120
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
370 bl idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
371 add r2, r2, #-128
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
372 bl idct_col4_st16_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
373
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
374 idct_end
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
375 .endfunc