annotate arm/h264idct_neon.S @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 8e7fd2d2193f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8339
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
1 /*
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
3 *
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
4 * This file is part of FFmpeg.
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
5 *
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
10 *
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
14 * Lesser General Public License for more details.
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
15 *
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
19 */
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
20
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
21 #include "asm.S"
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
22
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
23 preserve8
8339
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
24 .text
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
25
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
26 function ff_h264_idct_add_neon, export=1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
27 vld1.64 {d0-d3}, [r1,:128]
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
28
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
29 vswp d1, d2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
30 vadd.i16 d4, d0, d1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
31 vshr.s16 q8, q1, #1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
32 vsub.i16 d5, d0, d1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
33 vadd.i16 d6, d2, d17
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
34 vsub.i16 d7, d16, d3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
35 vadd.i16 q0, q2, q3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
36 vsub.i16 q1, q2, q3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
37
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
38 vtrn.16 d0, d1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
39 vtrn.16 d3, d2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
40 vtrn.32 d0, d3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
41 vtrn.32 d1, d2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
42
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
43 vadd.i16 d4, d0, d3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
44 vld1.32 {d18[0]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
45 vswp d1, d3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
46 vshr.s16 q8, q1, #1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
47 vld1.32 {d19[1]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
48 vsub.i16 d5, d0, d1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
49 vld1.32 {d18[1]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
50 vadd.i16 d6, d16, d3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
51 vld1.32 {d19[0]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
52 vsub.i16 d7, d2, d17
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
53 sub r0, r0, r2, lsl #2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
54 vadd.i16 q0, q2, q3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
55 vsub.i16 q1, q2, q3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
56
10618
9cea4112ffaf ARM: small tweak of NEON H264 IDCT
mru
parents: 10349
diff changeset
57 vrshr.s16 q0, q0, #6
9cea4112ffaf ARM: small tweak of NEON H264 IDCT
mru
parents: 10349
diff changeset
58 vrshr.s16 q1, q1, #6
8339
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
59
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
60 vaddw.u8 q0, q0, d18
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
61 vaddw.u8 q1, q1, d19
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
62
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
63 vqmovun.s16 d0, q0
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
64 vqmovun.s16 d1, q1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
65
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
66 vst1.32 {d0[0]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
67 vst1.32 {d1[1]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
68 vst1.32 {d0[1]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
69 vst1.32 {d1[0]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
70
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
71 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10618
diff changeset
72 endfunc
8340
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
73
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
74 function ff_h264_idct_dc_add_neon, export=1
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
75 vld1.16 {d2[],d3[]}, [r1,:16]
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
76 vrshr.s16 q1, q1, #6
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
77 vld1.32 {d0[0]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
78 vld1.32 {d0[1]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
79 vaddw.u8 q2, q1, d0
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
80 vld1.32 {d1[0]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
81 vld1.32 {d1[1]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
82 vaddw.u8 q1, q1, d1
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
83 vqmovun.s16 d0, q2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
84 vqmovun.s16 d1, q1
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
85 sub r0, r0, r2, lsl #2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
86 vst1.32 {d0[0]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
87 vst1.32 {d0[1]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
88 vst1.32 {d1[0]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
89 vst1.32 {d1[1]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
90 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10618
diff changeset
91 endfunc
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
92
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
93 function ff_h264_idct_add16_neon, export=1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
94 push {r4-r8,lr}
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
95 mov r4, r0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
96 mov r5, r1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
97 mov r1, r2
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
98 mov r2, r3
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
99 ldr r6, [sp, #24]
8507
779a9c93bf61 ARM: work around linker bug with movw/movt relocations in shared libs
mru
parents: 8462
diff changeset
100 movrel r7, scan8
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
101 mov ip, #16
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
102 1: ldrb r8, [r7], #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
103 ldr r0, [r5], #4
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
104 ldrb r8, [r6, r8]
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
105 subs r8, r8, #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
106 blt 2f
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
107 ldrsh lr, [r1]
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
108 add r0, r0, r4
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
109 movne lr, #0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
110 cmp lr, #0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
111 adrne lr, ff_h264_idct_dc_add_neon
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
112 adreq lr, ff_h264_idct_add_neon
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
113 blx lr
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
114 2: subs ip, ip, #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
115 add r1, r1, #32
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
116 bne 1b
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
117 pop {r4-r8,pc}
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10618
diff changeset
118 endfunc
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
119
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
120 function ff_h264_idct_add16intra_neon, export=1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
121 push {r4-r8,lr}
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
122 mov r4, r0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
123 mov r5, r1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
124 mov r1, r2
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
125 mov r2, r3
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
126 ldr r6, [sp, #24]
8507
779a9c93bf61 ARM: work around linker bug with movw/movt relocations in shared libs
mru
parents: 8462
diff changeset
127 movrel r7, scan8
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
128 mov ip, #16
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
129 1: ldrb r8, [r7], #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
130 ldr r0, [r5], #4
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
131 ldrb r8, [r6, r8]
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
132 add r0, r0, r4
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
133 cmp r8, #0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
134 ldrsh r8, [r1]
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
135 adrne lr, ff_h264_idct_add_neon
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
136 adreq lr, ff_h264_idct_dc_add_neon
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
137 cmpeq r8, #0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
138 blxne lr
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
139 subs ip, ip, #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
140 add r1, r1, #32
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
141 bne 1b
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
142 pop {r4-r8,pc}
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10618
diff changeset
143 endfunc
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
144
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
145 function ff_h264_idct_add8_neon, export=1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
146 push {r4-r10,lr}
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
147 ldm r0, {r4,r9}
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
148 add r5, r1, #16*4
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
149 add r1, r2, #16*32
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
150 mov r2, r3
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
151 ldr r6, [sp, #32]
8507
779a9c93bf61 ARM: work around linker bug with movw/movt relocations in shared libs
mru
parents: 8462
diff changeset
152 movrel r7, scan8+16
12493
8e7fd2d2193f ARM: fix NEON h264_idct_add8
mru
parents: 12368
diff changeset
153 mov ip, #7
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
154 1: ldrb r8, [r7], #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
155 ldr r0, [r5], #4
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
156 ldrb r8, [r6, r8]
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
157 tst ip, #4
12493
8e7fd2d2193f ARM: fix NEON h264_idct_add8
mru
parents: 12368
diff changeset
158 addne r0, r0, r4
8e7fd2d2193f ARM: fix NEON h264_idct_add8
mru
parents: 12368
diff changeset
159 addeq r0, r0, r9
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
160 cmp r8, #0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
161 ldrsh r8, [r1]
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
162 adrne lr, ff_h264_idct_add_neon
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
163 adreq lr, ff_h264_idct_dc_add_neon
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
164 cmpeq r8, #0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
165 blxne lr
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
166 subs ip, ip, #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
167 add r1, r1, #32
12493
8e7fd2d2193f ARM: fix NEON h264_idct_add8
mru
parents: 12368
diff changeset
168 bge 1b
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
169 pop {r4-r10,pc}
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10618
diff changeset
170 endfunc
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
171
12368
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
172 .macro idct8x8_cols pass
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
173 .if \pass == 0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
174 qa .req q2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
175 qb .req q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
176 vshr.s16 q2, q10, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
177 vadd.i16 q0, q8, q12
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
178 vld1.16 {q14-q15},[r1,:128]!
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
179 vsub.i16 q1, q8, q12
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
180 vshr.s16 q3, q14, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
181 vsub.i16 q2, q2, q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
182 vadd.i16 q3, q3, q10
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
183 .else
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
184 qa .req q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
185 qb .req q2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
186 vtrn.32 q8, q10
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
187 vtrn.16 q12, q13
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
188 vtrn.32 q9, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
189 vtrn.32 q12, q2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
190 vtrn.32 q13, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
191 vswp d21, d4
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
192 vshr.s16 q14, q10, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
193 vswp d17, d24
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
194 vshr.s16 q3, q2, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
195 vswp d19, d26
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
196 vadd.i16 q0, q8, q12
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
197 vswp d23, d30
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
198 vsub.i16 q1, q8, q12
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
199 vsub.i16 q14, q14, q2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
200 vadd.i16 q3, q3, q10
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
201 .endif
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
202 vadd.i16 q10, q1, qa
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
203 vsub.i16 q12, q1, qa
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
204 vadd.i16 q8, q0, q3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
205 vsub.i16 qb, q0, q3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
206 vsub.i16 q0, q13, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
207 vadd.i16 q1, q15, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
208 vsub.i16 qa, q15, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
209 vadd.i16 q3, q13, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
210 vsub.i16 q0, q0, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
211 vsub.i16 q1, q1, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
212 vadd.i16 qa, qa, q13
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
213 vadd.i16 q3, q3, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
214 vshr.s16 q9, q9, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
215 vshr.s16 q11, q11, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
216 vshr.s16 q13, q13, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
217 vshr.s16 q15, q15, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
218 vsub.i16 q0, q0, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
219 vsub.i16 q1, q1, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
220 vadd.i16 qa, qa, q13
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
221 vadd.i16 q3, q3, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
222 vshr.s16 q9, q0, #2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
223 vshr.s16 q11, q1, #2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
224 vshr.s16 q13, qa, #2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
225 vshr.s16 q15, q3, #2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
226 vsub.i16 q3, q3, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
227 vsub.i16 qa, q11, qa
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
228 vadd.i16 q1, q1, q13
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
229 vadd.i16 q0, q0, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
230 .if \pass == 0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
231 vsub.i16 q15, q8, q3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
232 vadd.i16 q8, q8, q3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
233 vadd.i16 q9, q10, q2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
234 vsub.i16 q2, q10, q2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
235 vtrn.16 q8, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
236 vadd.i16 q10, q12, q1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
237 vtrn.16 q2, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
238 vadd.i16 q11, q14, q0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
239 vsub.i16 q13, q12, q1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
240 vtrn.16 q10, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
241 vsub.i16 q12, q14, q0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
242 .else
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
243 vsub.i16 q15, q8, q3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
244 vadd.i16 q8, q8, q3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
245 vadd.i16 q9, q10, q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
246 vsub.i16 q14, q10, q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
247 vadd.i16 q10, q12, q1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
248 vsub.i16 q13, q12, q1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
249 vadd.i16 q11, q2, q0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
250 vsub.i16 q12, q2, q0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
251 .endif
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
252 .unreq qa
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
253 .unreq qb
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
254 .endm
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
255
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
256 function ff_h264_idct8_add_neon, export=1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
257 vld1.16 {q8-q9}, [r1,:128]!
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
258 vld1.16 {q10-q11},[r1,:128]!
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
259 vld1.16 {q12-q13},[r1,:128]!
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
260
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
261 idct8x8_cols 0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
262 idct8x8_cols 1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
263
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
264 mov r3, r0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
265 vrshr.s16 q8, q8, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
266 vld1.8 {d0}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
267 vrshr.s16 q9, q9, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
268 vld1.8 {d1}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
269 vrshr.s16 q10, q10, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
270 vld1.8 {d2}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
271 vrshr.s16 q11, q11, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
272 vld1.8 {d3}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
273 vrshr.s16 q12, q12, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
274 vld1.8 {d4}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
275 vrshr.s16 q13, q13, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
276 vld1.8 {d5}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
277 vrshr.s16 q14, q14, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
278 vld1.8 {d6}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
279 vrshr.s16 q15, q15, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
280 vld1.8 {d7}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
281 vaddw.u8 q8, q8, d0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
282 vaddw.u8 q9, q9, d1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
283 vaddw.u8 q10, q10, d2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
284 vqmovun.s16 d0, q8
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
285 vaddw.u8 q11, q11, d3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
286 vqmovun.s16 d1, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
287 vaddw.u8 q12, q12, d4
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
288 vqmovun.s16 d2, q10
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
289 vst1.8 {d0}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
290 vaddw.u8 q13, q13, d5
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
291 vqmovun.s16 d3, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
292 vst1.8 {d1}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
293 vaddw.u8 q14, q14, d6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
294 vqmovun.s16 d4, q12
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
295 vst1.8 {d2}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
296 vaddw.u8 q15, q15, d7
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
297 vqmovun.s16 d5, q13
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
298 vst1.8 {d3}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
299 vqmovun.s16 d6, q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
300 vqmovun.s16 d7, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
301 vst1.8 {d4}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
302 vst1.8 {d5}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
303 vst1.8 {d6}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
304 vst1.8 {d7}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
305
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
306 sub r1, r1, #128
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
307 bx lr
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
308 endfunc
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
309
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
310 function ff_h264_idct8_dc_add_neon, export=1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
311 vld1.16 {d30[],d31[]},[r1,:16]
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
312 vld1.32 {d0}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
313 vrshr.s16 q15, q15, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
314 vld1.32 {d1}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
315 vld1.32 {d2}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
316 vaddw.u8 q8, q15, d0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
317 vld1.32 {d3}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
318 vaddw.u8 q9, q15, d1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
319 vld1.32 {d4}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
320 vaddw.u8 q10, q15, d2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
321 vld1.32 {d5}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
322 vaddw.u8 q11, q15, d3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
323 vld1.32 {d6}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
324 vaddw.u8 q12, q15, d4
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
325 vld1.32 {d7}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
326 vaddw.u8 q13, q15, d5
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
327 vaddw.u8 q14, q15, d6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
328 vaddw.u8 q15, q15, d7
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
329 vqmovun.s16 d0, q8
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
330 vqmovun.s16 d1, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
331 vqmovun.s16 d2, q10
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
332 vqmovun.s16 d3, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
333 sub r0, r0, r2, lsl #3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
334 vst1.32 {d0}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
335 vqmovun.s16 d4, q12
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
336 vst1.32 {d1}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
337 vqmovun.s16 d5, q13
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
338 vst1.32 {d2}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
339 vqmovun.s16 d6, q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
340 vst1.32 {d3}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
341 vqmovun.s16 d7, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
342 vst1.32 {d4}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
343 vst1.32 {d5}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
344 vst1.32 {d6}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
345 vst1.32 {d7}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
346 bx lr
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
347 endfunc
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
348
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
349 function ff_h264_idct8_add4_neon, export=1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
350 push {r4-r8,lr}
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
351 mov r4, r0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
352 mov r5, r1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
353 mov r1, r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
354 mov r2, r3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
355 ldr r6, [sp, #24]
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
356 movrel r7, scan8
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
357 mov r12, #16
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
358 1: ldrb r8, [r7], #4
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
359 ldr r0, [r5], #16
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
360 ldrb r8, [r6, r8]
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
361 subs r8, r8, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
362 blt 2f
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
363 ldrsh lr, [r1]
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
364 add r0, r0, r4
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
365 movne lr, #0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
366 cmp lr, #0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
367 adrne lr, ff_h264_idct8_dc_add_neon
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
368 adreq lr, ff_h264_idct8_add_neon
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
369 blx lr
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
370 2: subs r12, r12, #4
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
371 add r1, r1, #128
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
372 bne 1b
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
373 pop {r4-r8,pc}
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
374 endfunc
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
375
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
376 .section .rodata
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
377 scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
378 .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
379 .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
380 .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
381 .byte 1+1*8, 2+1*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
382 .byte 1+2*8, 2+2*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
383 .byte 1+4*8, 2+4*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
384 .byte 1+5*8, 2+5*8