annotate arm/h264idct_neon.S @ 12493:8e7fd2d2193f libavcodec

ARM: fix NEON h264_idct_add8
author mru
date Tue, 14 Sep 2010 17:11:51 +0000
parents ba14e3adeccd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8339
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
1 /*
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
3 *
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
4 * This file is part of FFmpeg.
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
5 *
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
10 *
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
14 * Lesser General Public License for more details.
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
15 *
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
19 */
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
20
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
21 #include "asm.S"
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
22
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
23 preserve8
8339
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
24 .text
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
25
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
26 function ff_h264_idct_add_neon, export=1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
27 vld1.64 {d0-d3}, [r1,:128]
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
28
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
29 vswp d1, d2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
30 vadd.i16 d4, d0, d1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
31 vshr.s16 q8, q1, #1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
32 vsub.i16 d5, d0, d1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
33 vadd.i16 d6, d2, d17
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
34 vsub.i16 d7, d16, d3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
35 vadd.i16 q0, q2, q3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
36 vsub.i16 q1, q2, q3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
37
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
38 vtrn.16 d0, d1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
39 vtrn.16 d3, d2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
40 vtrn.32 d0, d3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
41 vtrn.32 d1, d2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
42
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
43 vadd.i16 d4, d0, d3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
44 vld1.32 {d18[0]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
45 vswp d1, d3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
46 vshr.s16 q8, q1, #1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
47 vld1.32 {d19[1]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
48 vsub.i16 d5, d0, d1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
49 vld1.32 {d18[1]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
50 vadd.i16 d6, d16, d3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
51 vld1.32 {d19[0]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
52 vsub.i16 d7, d2, d17
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
53 sub r0, r0, r2, lsl #2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
54 vadd.i16 q0, q2, q3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
55 vsub.i16 q1, q2, q3
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
56
10618
9cea4112ffaf ARM: small tweak of NEON H264 IDCT
mru
parents: 10349
diff changeset
57 vrshr.s16 q0, q0, #6
9cea4112ffaf ARM: small tweak of NEON H264 IDCT
mru
parents: 10349
diff changeset
58 vrshr.s16 q1, q1, #6
8339
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
59
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
60 vaddw.u8 q0, q0, d18
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
61 vaddw.u8 q1, q1, d19
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
62
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
63 vqmovun.s16 d0, q0
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
64 vqmovun.s16 d1, q1
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
65
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
66 vst1.32 {d0[0]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
67 vst1.32 {d1[1]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
68 vst1.32 {d0[1]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
69 vst1.32 {d1[0]}, [r0,:32], r2
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
70
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents:
diff changeset
71 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10618
diff changeset
72 endfunc
8340
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
73
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
74 function ff_h264_idct_dc_add_neon, export=1
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
75 vld1.16 {d2[],d3[]}, [r1,:16]
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
76 vrshr.s16 q1, q1, #6
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
77 vld1.32 {d0[0]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
78 vld1.32 {d0[1]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
79 vaddw.u8 q2, q1, d0
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
80 vld1.32 {d1[0]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
81 vld1.32 {d1[1]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
82 vaddw.u8 q1, q1, d1
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
83 vqmovun.s16 d0, q2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
84 vqmovun.s16 d1, q1
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
85 sub r0, r0, r2, lsl #2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
86 vst1.32 {d0[0]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
87 vst1.32 {d0[1]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
88 vst1.32 {d1[0]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
89 vst1.32 {d1[1]}, [r0,:32], r2
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
90 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10618
diff changeset
91 endfunc
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
92
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
93 function ff_h264_idct_add16_neon, export=1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
94 push {r4-r8,lr}
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
95 mov r4, r0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
96 mov r5, r1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
97 mov r1, r2
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
98 mov r2, r3
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
99 ldr r6, [sp, #24]
8507
779a9c93bf61 ARM: work around linker bug with movw/movt relocations in shared libs
mru
parents: 8462
diff changeset
100 movrel r7, scan8
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
101 mov ip, #16
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
102 1: ldrb r8, [r7], #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
103 ldr r0, [r5], #4
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
104 ldrb r8, [r6, r8]
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
105 subs r8, r8, #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
106 blt 2f
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
107 ldrsh lr, [r1]
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
108 add r0, r0, r4
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
109 movne lr, #0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
110 cmp lr, #0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
111 adrne lr, ff_h264_idct_dc_add_neon
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
112 adreq lr, ff_h264_idct_add_neon
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
113 blx lr
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
114 2: subs ip, ip, #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
115 add r1, r1, #32
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
116 bne 1b
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
117 pop {r4-r8,pc}
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10618
diff changeset
118 endfunc
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
119
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
120 function ff_h264_idct_add16intra_neon, export=1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
121 push {r4-r8,lr}
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
122 mov r4, r0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
123 mov r5, r1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
124 mov r1, r2
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
125 mov r2, r3
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
126 ldr r6, [sp, #24]
8507
779a9c93bf61 ARM: work around linker bug with movw/movt relocations in shared libs
mru
parents: 8462
diff changeset
127 movrel r7, scan8
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
128 mov ip, #16
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
129 1: ldrb r8, [r7], #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
130 ldr r0, [r5], #4
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
131 ldrb r8, [r6, r8]
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
132 add r0, r0, r4
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
133 cmp r8, #0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
134 ldrsh r8, [r1]
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
135 adrne lr, ff_h264_idct_add_neon
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
136 adreq lr, ff_h264_idct_dc_add_neon
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
137 cmpeq r8, #0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
138 blxne lr
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
139 subs ip, ip, #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
140 add r1, r1, #32
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
141 bne 1b
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
142 pop {r4-r8,pc}
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10618
diff changeset
143 endfunc
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
144
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
145 function ff_h264_idct_add8_neon, export=1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
146 push {r4-r10,lr}
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
147 ldm r0, {r4,r9}
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
148 add r5, r1, #16*4
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
149 add r1, r2, #16*32
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
150 mov r2, r3
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
151 ldr r6, [sp, #32]
8507
779a9c93bf61 ARM: work around linker bug with movw/movt relocations in shared libs
mru
parents: 8462
diff changeset
152 movrel r7, scan8+16
12493
8e7fd2d2193f ARM: fix NEON h264_idct_add8
mru
parents: 12368
diff changeset
153 mov ip, #7
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
154 1: ldrb r8, [r7], #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
155 ldr r0, [r5], #4
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
156 ldrb r8, [r6, r8]
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
157 tst ip, #4
12493
8e7fd2d2193f ARM: fix NEON h264_idct_add8
mru
parents: 12368
diff changeset
158 addne r0, r0, r4
8e7fd2d2193f ARM: fix NEON h264_idct_add8
mru
parents: 12368
diff changeset
159 addeq r0, r0, r9
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
160 cmp r8, #0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
161 ldrsh r8, [r1]
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
162 adrne lr, ff_h264_idct_add_neon
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
163 adreq lr, ff_h264_idct_dc_add_neon
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
164 cmpeq r8, #0
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
165 blxne lr
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
166 subs ip, ip, #1
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
167 add r1, r1, #32
12493
8e7fd2d2193f ARM: fix NEON h264_idct_add8
mru
parents: 12368
diff changeset
168 bge 1b
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
169 pop {r4-r10,pc}
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10618
diff changeset
170 endfunc
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
171
12368
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
172 .macro idct8x8_cols pass
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
173 .if \pass == 0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
174 qa .req q2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
175 qb .req q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
176 vshr.s16 q2, q10, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
177 vadd.i16 q0, q8, q12
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
178 vld1.16 {q14-q15},[r1,:128]!
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
179 vsub.i16 q1, q8, q12
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
180 vshr.s16 q3, q14, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
181 vsub.i16 q2, q2, q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
182 vadd.i16 q3, q3, q10
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
183 .else
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
184 qa .req q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
185 qb .req q2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
186 vtrn.32 q8, q10
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
187 vtrn.16 q12, q13
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
188 vtrn.32 q9, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
189 vtrn.32 q12, q2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
190 vtrn.32 q13, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
191 vswp d21, d4
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
192 vshr.s16 q14, q10, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
193 vswp d17, d24
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
194 vshr.s16 q3, q2, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
195 vswp d19, d26
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
196 vadd.i16 q0, q8, q12
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
197 vswp d23, d30
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
198 vsub.i16 q1, q8, q12
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
199 vsub.i16 q14, q14, q2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
200 vadd.i16 q3, q3, q10
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
201 .endif
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
202 vadd.i16 q10, q1, qa
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
203 vsub.i16 q12, q1, qa
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
204 vadd.i16 q8, q0, q3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
205 vsub.i16 qb, q0, q3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
206 vsub.i16 q0, q13, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
207 vadd.i16 q1, q15, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
208 vsub.i16 qa, q15, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
209 vadd.i16 q3, q13, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
210 vsub.i16 q0, q0, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
211 vsub.i16 q1, q1, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
212 vadd.i16 qa, qa, q13
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
213 vadd.i16 q3, q3, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
214 vshr.s16 q9, q9, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
215 vshr.s16 q11, q11, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
216 vshr.s16 q13, q13, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
217 vshr.s16 q15, q15, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
218 vsub.i16 q0, q0, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
219 vsub.i16 q1, q1, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
220 vadd.i16 qa, qa, q13
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
221 vadd.i16 q3, q3, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
222 vshr.s16 q9, q0, #2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
223 vshr.s16 q11, q1, #2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
224 vshr.s16 q13, qa, #2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
225 vshr.s16 q15, q3, #2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
226 vsub.i16 q3, q3, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
227 vsub.i16 qa, q11, qa
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
228 vadd.i16 q1, q1, q13
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
229 vadd.i16 q0, q0, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
230 .if \pass == 0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
231 vsub.i16 q15, q8, q3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
232 vadd.i16 q8, q8, q3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
233 vadd.i16 q9, q10, q2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
234 vsub.i16 q2, q10, q2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
235 vtrn.16 q8, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
236 vadd.i16 q10, q12, q1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
237 vtrn.16 q2, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
238 vadd.i16 q11, q14, q0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
239 vsub.i16 q13, q12, q1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
240 vtrn.16 q10, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
241 vsub.i16 q12, q14, q0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
242 .else
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
243 vsub.i16 q15, q8, q3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
244 vadd.i16 q8, q8, q3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
245 vadd.i16 q9, q10, q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
246 vsub.i16 q14, q10, q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
247 vadd.i16 q10, q12, q1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
248 vsub.i16 q13, q12, q1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
249 vadd.i16 q11, q2, q0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
250 vsub.i16 q12, q2, q0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
251 .endif
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
252 .unreq qa
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
253 .unreq qb
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
254 .endm
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
255
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
256 function ff_h264_idct8_add_neon, export=1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
257 vld1.16 {q8-q9}, [r1,:128]!
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
258 vld1.16 {q10-q11},[r1,:128]!
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
259 vld1.16 {q12-q13},[r1,:128]!
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
260
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
261 idct8x8_cols 0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
262 idct8x8_cols 1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
263
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
264 mov r3, r0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
265 vrshr.s16 q8, q8, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
266 vld1.8 {d0}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
267 vrshr.s16 q9, q9, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
268 vld1.8 {d1}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
269 vrshr.s16 q10, q10, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
270 vld1.8 {d2}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
271 vrshr.s16 q11, q11, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
272 vld1.8 {d3}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
273 vrshr.s16 q12, q12, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
274 vld1.8 {d4}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
275 vrshr.s16 q13, q13, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
276 vld1.8 {d5}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
277 vrshr.s16 q14, q14, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
278 vld1.8 {d6}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
279 vrshr.s16 q15, q15, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
280 vld1.8 {d7}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
281 vaddw.u8 q8, q8, d0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
282 vaddw.u8 q9, q9, d1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
283 vaddw.u8 q10, q10, d2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
284 vqmovun.s16 d0, q8
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
285 vaddw.u8 q11, q11, d3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
286 vqmovun.s16 d1, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
287 vaddw.u8 q12, q12, d4
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
288 vqmovun.s16 d2, q10
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
289 vst1.8 {d0}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
290 vaddw.u8 q13, q13, d5
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
291 vqmovun.s16 d3, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
292 vst1.8 {d1}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
293 vaddw.u8 q14, q14, d6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
294 vqmovun.s16 d4, q12
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
295 vst1.8 {d2}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
296 vaddw.u8 q15, q15, d7
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
297 vqmovun.s16 d5, q13
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
298 vst1.8 {d3}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
299 vqmovun.s16 d6, q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
300 vqmovun.s16 d7, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
301 vst1.8 {d4}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
302 vst1.8 {d5}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
303 vst1.8 {d6}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
304 vst1.8 {d7}, [r3,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
305
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
306 sub r1, r1, #128
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
307 bx lr
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
308 endfunc
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
309
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
310 function ff_h264_idct8_dc_add_neon, export=1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
311 vld1.16 {d30[],d31[]},[r1,:16]
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
312 vld1.32 {d0}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
313 vrshr.s16 q15, q15, #6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
314 vld1.32 {d1}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
315 vld1.32 {d2}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
316 vaddw.u8 q8, q15, d0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
317 vld1.32 {d3}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
318 vaddw.u8 q9, q15, d1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
319 vld1.32 {d4}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
320 vaddw.u8 q10, q15, d2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
321 vld1.32 {d5}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
322 vaddw.u8 q11, q15, d3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
323 vld1.32 {d6}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
324 vaddw.u8 q12, q15, d4
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
325 vld1.32 {d7}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
326 vaddw.u8 q13, q15, d5
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
327 vaddw.u8 q14, q15, d6
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
328 vaddw.u8 q15, q15, d7
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
329 vqmovun.s16 d0, q8
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
330 vqmovun.s16 d1, q9
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
331 vqmovun.s16 d2, q10
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
332 vqmovun.s16 d3, q11
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
333 sub r0, r0, r2, lsl #3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
334 vst1.32 {d0}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
335 vqmovun.s16 d4, q12
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
336 vst1.32 {d1}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
337 vqmovun.s16 d5, q13
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
338 vst1.32 {d2}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
339 vqmovun.s16 d6, q14
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
340 vst1.32 {d3}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
341 vqmovun.s16 d7, q15
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
342 vst1.32 {d4}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
343 vst1.32 {d5}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
344 vst1.32 {d6}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
345 vst1.32 {d7}, [r0,:64], r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
346 bx lr
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
347 endfunc
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
348
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
349 function ff_h264_idct8_add4_neon, export=1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
350 push {r4-r8,lr}
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
351 mov r4, r0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
352 mov r5, r1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
353 mov r1, r2
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
354 mov r2, r3
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
355 ldr r6, [sp, #24]
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
356 movrel r7, scan8
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
357 mov r12, #16
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
358 1: ldrb r8, [r7], #4
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
359 ldr r0, [r5], #16
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
360 ldrb r8, [r6, r8]
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
361 subs r8, r8, #1
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
362 blt 2f
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
363 ldrsh lr, [r1]
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
364 add r0, r0, r4
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
365 movne lr, #0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
366 cmp lr, #0
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
367 adrne lr, ff_h264_idct8_dc_add_neon
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
368 adreq lr, ff_h264_idct8_add_neon
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
369 blx lr
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
370 2: subs r12, r12, #4
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
371 add r1, r1, #128
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
372 bne 1b
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
373 pop {r4-r8,pc}
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
374 endfunc
ba14e3adeccd ARM: NEON H264 8x8 IDCT
mru
parents: 11443
diff changeset
375
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
376 .section .rodata
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
377 scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
378 .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
379 .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
380 .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
381 .byte 1+1*8, 2+1*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
382 .byte 1+2*8, 2+2*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
383 .byte 1+4*8, 2+4*8
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
384 .byte 1+5*8, 2+5*8