annotate arm/simple_idct_neon.S @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 17a110bfdeb6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
1 /*
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
2 * ARM NEON IDCT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
3 *
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
4 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
5 *
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
6 * Based on Simple IDCT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
7 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
8 *
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
9 * This file is part of FFmpeg.
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
10 *
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
11 * FFmpeg is free software; you can redistribute it and/or
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
12 * modify it under the terms of the GNU Lesser General Public
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
13 * License as published by the Free Software Foundation; either
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
14 * version 2.1 of the License, or (at your option) any later version.
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
15 *
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
16 * FFmpeg is distributed in the hope that it will be useful,
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
19 * Lesser General Public License for more details.
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
20 *
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
21 * You should have received a copy of the GNU Lesser General Public
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
22 * License along with FFmpeg; if not, write to the Free Software
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
24 */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
25
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
26 #include "asm.S"
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
27
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
28 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
29 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
30 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
31 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
32 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
33 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
34 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
35 #define W4c ((1<<(COL_SHIFT-1))/W4)
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
36 #define ROW_SHIFT 11
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
37 #define COL_SHIFT 20
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
38
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
39 #define w1 d0[0]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
40 #define w2 d0[1]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
41 #define w3 d0[2]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
42 #define w4 d0[3]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
43 #define w5 d1[0]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
44 #define w6 d1[1]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
45 #define w7 d1[2]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
46 #define w4c d1[3]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
47
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
48 .macro idct_col4_top
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
49 vmull.s16 q7, d6, w2 /* q9 = W2 * col[2] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
50 vmull.s16 q8, d6, w6 /* q10 = W6 * col[2] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
51 vmull.s16 q9, d4, w1 /* q9 = W1 * col[1] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
52 vadd.i32 q11, q15, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
53 vmull.s16 q10, d4, w3 /* q10 = W3 * col[1] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
54 vadd.i32 q12, q15, q8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
55 vmull.s16 q5, d4, w5 /* q5 = W5 * col[1] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
56 vsub.i32 q13, q15, q8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
57 vmull.s16 q6, d4, w7 /* q6 = W7 * col[1] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
58 vsub.i32 q14, q15, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
59
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
60 vmlal.s16 q9, d8, w3 /* q9 += W3 * col[3] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
61 vmlsl.s16 q10, d8, w7 /* q10 -= W7 * col[3] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
62 vmlsl.s16 q5, d8, w1 /* q5 -= W1 * col[3] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
63 vmlsl.s16 q6, d8, w5 /* q6 -= W5 * col[3] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
64 .endm
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
65
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
66 .text
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
67 .align 6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
68
9724
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
69 function idct_row4_pld_neon
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
70 pld [r0]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
71 add r3, r0, r1, lsl #2
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
72 pld [r0, r1]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
73 pld [r0, r1, lsl #1]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
74 pld [r3, -r1]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
75 pld [r3]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
76 pld [r3, r1]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
77 add r3, r3, r1, lsl #1
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
78 pld [r3]
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
79 pld [r3, r1]
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10349
diff changeset
80 endfunc
9724
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
81
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
82 function idct_row4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
83 vmov.i32 q15, #(1<<(ROW_SHIFT-1))
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
84 vld1.64 {d2-d5}, [r2,:128]!
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
85 vmlal.s16 q15, d2, w4 /* q15 += W4 * col[0] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
86 vld1.64 {d6,d7}, [r2,:128]!
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
87 vorr d10, d3, d5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
88 vld1.64 {d8,d9}, [r2,:128]!
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
89 add r2, r2, #-64
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
90
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
91 vorr d11, d7, d9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
92 vorr d10, d10, d11
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
93 vmov r3, r4, d10
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
94
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
95 idct_col4_top
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
96
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
97 orrs r3, r3, r4
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
98 beq 1f
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
99
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
100 vmull.s16 q7, d3, w4 /* q7 = W4 * col[4] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
101 vmlal.s16 q9, d5, w5 /* q9 += W5 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
102 vmlsl.s16 q10, d5, w1 /* q10 -= W1 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
103 vmull.s16 q8, d7, w2 /* q8 = W2 * col[6] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
104 vmlal.s16 q5, d5, w7 /* q5 += W7 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
105 vadd.i32 q11, q11, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
106 vsub.i32 q12, q12, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
107 vsub.i32 q13, q13, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
108 vadd.i32 q14, q14, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
109 vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
110 vmull.s16 q7, d7, w6 /* q7 = W6 * col[6] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
111 vmlal.s16 q9, d9, w7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
112 vmlsl.s16 q10, d9, w5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
113 vmlal.s16 q5, d9, w3
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
114 vmlsl.s16 q6, d9, w1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
115 vadd.i32 q11, q11, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
116 vsub.i32 q12, q12, q8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
117 vadd.i32 q13, q13, q8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
118 vsub.i32 q14, q14, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
119
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
120 1: vadd.i32 q3, q11, q9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
121 vadd.i32 q4, q12, q10
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
122 vshrn.i32 d2, q3, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
123 vshrn.i32 d4, q4, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
124 vadd.i32 q7, q13, q5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
125 vadd.i32 q8, q14, q6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
126 vtrn.16 d2, d4
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
127 vshrn.i32 d6, q7, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
128 vshrn.i32 d8, q8, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
129 vsub.i32 q14, q14, q6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
130 vsub.i32 q11, q11, q9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
131 vtrn.16 d6, d8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
132 vsub.i32 q13, q13, q5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
133 vshrn.i32 d3, q14, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
134 vtrn.32 d2, d6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
135 vsub.i32 q12, q12, q10
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
136 vtrn.32 d4, d8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
137 vshrn.i32 d5, q13, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
138 vshrn.i32 d7, q12, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
139 vshrn.i32 d9, q11, #ROW_SHIFT
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
140
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
141 vtrn.16 d3, d5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
142 vtrn.16 d7, d9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
143 vtrn.32 d3, d7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
144 vtrn.32 d5, d9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
145
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
146 vst1.64 {d2-d5}, [r2,:128]!
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
147 vst1.64 {d6-d9}, [r2,:128]!
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
148
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
149 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10349
diff changeset
150 endfunc
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
151
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
152 function idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
153 mov ip, #16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
154 vld1.64 {d2}, [r2,:64], ip /* d2 = col[0] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
155 vdup.16 d30, w4c
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
156 vld1.64 {d4}, [r2,:64], ip /* d3 = col[1] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
157 vadd.i16 d30, d30, d2
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
158 vld1.64 {d6}, [r2,:64], ip /* d4 = col[2] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
159 vmull.s16 q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
160 vld1.64 {d8}, [r2,:64], ip /* d5 = col[3] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
161
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
162 ldrd r4, [r2]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
163 ldrd r6, [r2, #16]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
164 orrs r4, r4, r5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
165
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
166 idct_col4_top
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
167 addeq r2, r2, #16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
168 beq 1f
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
169
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
170 vld1.64 {d3}, [r2,:64], ip /* d6 = col[4] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
171 vmull.s16 q7, d3, w4 /* q7 = W4 * col[4] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
172 vadd.i32 q11, q11, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
173 vsub.i32 q12, q12, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
174 vsub.i32 q13, q13, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
175 vadd.i32 q14, q14, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
176
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
177 1: orrs r6, r6, r7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
178 ldrd r4, [r2, #16]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
179 addeq r2, r2, #16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
180 beq 2f
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
181
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
182 vld1.64 {d5}, [r2,:64], ip /* d7 = col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
183 vmlal.s16 q9, d5, w5 /* q9 += W5 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
184 vmlsl.s16 q10, d5, w1 /* q10 -= W1 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
185 vmlal.s16 q5, d5, w7 /* q5 += W7 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
186 vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
187
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
188 2: orrs r4, r4, r5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
189 ldrd r4, [r2, #16]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
190 addeq r2, r2, #16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
191 beq 3f
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
192
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
193 vld1.64 {d7}, [r2,:64], ip /* d8 = col[6] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
194 vmull.s16 q7, d7, w6 /* q7 = W6 * col[6] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
195 vmull.s16 q8, d7, w2 /* q8 = W2 * col[6] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
196 vadd.i32 q11, q11, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
197 vsub.i32 q14, q14, q7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
198 vsub.i32 q12, q12, q8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
199 vadd.i32 q13, q13, q8
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
200
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
201 3: orrs r4, r4, r5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
202 addeq r2, r2, #16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
203 beq 4f
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
204
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
205 vld1.64 {d9}, [r2,:64], ip /* d9 = col[7] */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
206 vmlal.s16 q9, d9, w7
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
207 vmlsl.s16 q10, d9, w5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
208 vmlal.s16 q5, d9, w3
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
209 vmlsl.s16 q6, d9, w1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
210
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
211 4: vaddhn.i32 d2, q11, q9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
212 vaddhn.i32 d3, q12, q10
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
213 vaddhn.i32 d4, q13, q5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
214 vaddhn.i32 d5, q14, q6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
215 vsubhn.i32 d9, q11, q9
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
216 vsubhn.i32 d8, q12, q10
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
217 vsubhn.i32 d7, q13, q5
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
218 vsubhn.i32 d6, q14, q6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
219
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
220 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10349
diff changeset
221 endfunc
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
222
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
223 .align 6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
224
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
225 function idct_col4_st8_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
226 vqshrun.s16 d2, q1, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
227 vqshrun.s16 d3, q2, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
228 vqshrun.s16 d4, q3, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
229 vqshrun.s16 d5, q4, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
230 vst1.32 {d2[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
231 vst1.32 {d2[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
232 vst1.32 {d3[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
233 vst1.32 {d3[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
234 vst1.32 {d4[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
235 vst1.32 {d4[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
236 vst1.32 {d5[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
237 vst1.32 {d5[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
238
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
239 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10349
diff changeset
240 endfunc
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
241
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
242 .section .rodata
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
243 .align 4
8506
1a629032b24a ARM: rename coefficient table in NEON IDCT
mru
parents: 8359
diff changeset
244 idct_coeff_neon:
1a629032b24a ARM: rename coefficient table in NEON IDCT
mru
parents: 8359
diff changeset
245 .short W1, W2, W3, W4, W5, W6, W7, W4c
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
246
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
247 .macro idct_start data
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
248 push {r4-r7, lr}
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
249 pld [\data]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
250 pld [\data, #64]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
251 vpush {d8-d15}
8507
779a9c93bf61 ARM: work around linker bug with movw/movt relocations in shared libs
mru
parents: 8506
diff changeset
252 movrel r3, idct_coeff_neon
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
253 vld1.64 {d0,d1}, [r3,:128]
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
254 .endm
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
255
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
256 .macro idct_end
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
257 vpop {d8-d15}
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
258 pop {r4-r7, pc}
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
259 .endm
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
260
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
261 /* void ff_simple_idct_put_neon(uint8_t *dst, int line_size, DCTELEM *data); */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
262 function ff_simple_idct_put_neon, export=1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
263 idct_start r2
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
264
9724
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
265 bl idct_row4_pld_neon
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
266 bl idct_row4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
267 add r2, r2, #-128
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
268 bl idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
269 bl idct_col4_st8_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
270 sub r0, r0, r1, lsl #3
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
271 add r0, r0, #4
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
272 add r2, r2, #-120
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
273 bl idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
274 bl idct_col4_st8_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
275
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
276 idct_end
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10349
diff changeset
277 endfunc
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
278
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
279 .align 6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
280
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
281 function idct_col4_add8_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
282 mov ip, r0
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
283
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
284 vld1.32 {d10[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
285 vshr.s16 q1, q1, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
286 vld1.32 {d10[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
287 vshr.s16 q2, q2, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
288 vld1.32 {d11[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
289 vshr.s16 q3, q3, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
290 vld1.32 {d11[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
291 vshr.s16 q4, q4, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
292 vld1.32 {d12[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
293 vaddw.u8 q1, q1, d10
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
294 vld1.32 {d12[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
295 vaddw.u8 q2, q2, d11
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
296 vld1.32 {d13[0]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
297 vqmovun.s16 d2, q1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
298 vld1.32 {d13[1]}, [r0,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
299 vaddw.u8 q3, q3, d12
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
300 vst1.32 {d2[0]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
301 vqmovun.s16 d3, q2
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
302 vst1.32 {d2[1]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
303 vaddw.u8 q4, q4, d13
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
304 vst1.32 {d3[0]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
305 vqmovun.s16 d4, q3
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
306 vst1.32 {d3[1]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
307 vqmovun.s16 d5, q4
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
308 vst1.32 {d4[0]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
309 vst1.32 {d4[1]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
310 vst1.32 {d5[0]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
311 vst1.32 {d5[1]}, [ip,:32], r1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
312
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
313 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10349
diff changeset
314 endfunc
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
315
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
316 /* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
317 function ff_simple_idct_add_neon, export=1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
318 idct_start r2
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
319
9724
c65cfd4ad000 ARM: add some PLD in NEON IDCT
mru
parents: 8507
diff changeset
320 bl idct_row4_pld_neon
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
321 bl idct_row4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
322 add r2, r2, #-128
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
323 bl idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
324 bl idct_col4_add8_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
325 sub r0, r0, r1, lsl #3
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
326 add r0, r0, #4
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
327 add r2, r2, #-120
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
328 bl idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
329 bl idct_col4_add8_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
330
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
331 idct_end
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10349
diff changeset
332 endfunc
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
333
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
334 .align 6
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
336 function idct_col4_st16_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
337 mov ip, #16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
338
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
339 vshr.s16 q1, q1, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
340 vshr.s16 q2, q2, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
341 vst1.64 {d2}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
342 vshr.s16 q3, q3, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
343 vst1.64 {d3}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
344 vshr.s16 q4, q4, #COL_SHIFT-16
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
345 vst1.64 {d4}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
346 vst1.64 {d5}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
347 vst1.64 {d6}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
348 vst1.64 {d7}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
349 vst1.64 {d8}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
350 vst1.64 {d9}, [r2,:64], ip
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
351
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
352 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10349
diff changeset
353 endfunc
8335
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
354
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
355 /* void ff_simple_idct_neon(DCTELEM *data); */
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
356 function ff_simple_idct_neon, export=1
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
357 idct_start r0
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
358
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
359 mov r2, r0
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
360 bl idct_row4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
361 bl idct_row4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
362 add r2, r2, #-128
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
363 bl idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
364 add r2, r2, #-128
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
365 bl idct_col4_st16_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
366 add r2, r2, #-120
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
367 bl idct_col4_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
368 add r2, r2, #-128
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
369 bl idct_col4_st16_neon
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
370
f19fe0cade86 ARM: NEON optimised simple_idct
mru
parents:
diff changeset
371 idct_end
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10349
diff changeset
372 endfunc