Mercurial > libavcodec.hg
comparison arm/h264idct_neon.S @ 8462:0ca0e3c98ed5 libavcodec
ARM: add new h264 idct functions
author | mru |
---|---|
date | Thu, 25 Dec 2008 23:13:43 +0000 |
parents | 9281a8a9387a |
children | 779a9c93bf61 |
comparison
equal
deleted
inserted
replaced
8461:11307ea31e57 | 8462:0ca0e3c98ed5 |
---|---|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 */ | 19 */ |
20 | 20 |
21 #include "asm.S" | 21 #include "asm.S" |
22 | 22 |
23 preserve8 | |
23 .fpu neon | 24 .fpu neon |
24 | 25 |
25 .text | 26 .text |
26 | 27 |
27 function ff_h264_idct_add_neon, export=1 | 28 function ff_h264_idct_add_neon, export=1 |
92 vst1.32 {d0[1]}, [r0,:32], r2 | 93 vst1.32 {d0[1]}, [r0,:32], r2 |
93 vst1.32 {d1[0]}, [r0,:32], r2 | 94 vst1.32 {d1[0]}, [r0,:32], r2 |
94 vst1.32 {d1[1]}, [r0,:32], r2 | 95 vst1.32 {d1[1]}, [r0,:32], r2 |
95 bx lr | 96 bx lr |
96 .endfunc | 97 .endfunc |
98 | |
99 function ff_h264_idct_add16_neon, export=1 | |
100 push {r4-r8,lr} | |
101 mov r4, r0 | |
102 mov r5, r1 | |
103 mov r1, r2 | |
104 mov r2, r3 | |
105 ldr r6, [sp, #24] | |
106 movw r7, #:lower16:scan8 | |
107 movt r7, #:upper16:scan8 | |
108 mov ip, #16 | |
109 1: ldrb r8, [r7], #1 | |
110 ldr r0, [r5], #4 | |
111 ldrb r8, [r6, r8] | |
112 subs r8, r8, #1 | |
113 blt 2f | |
114 ldrsh lr, [r1] | |
115 add r0, r0, r4 | |
116 movne lr, #0 | |
117 cmp lr, #0 | |
118 adrne lr, ff_h264_idct_dc_add_neon | |
119 adreq lr, ff_h264_idct_add_neon | |
120 blx lr | |
121 2: subs ip, ip, #1 | |
122 add r1, r1, #32 | |
123 bne 1b | |
124 pop {r4-r8,pc} | |
125 .endfunc | |
126 | |
127 function ff_h264_idct_add16intra_neon, export=1 | |
128 push {r4-r8,lr} | |
129 mov r4, r0 | |
130 mov r5, r1 | |
131 mov r1, r2 | |
132 mov r2, r3 | |
133 ldr r6, [sp, #24] | |
134 movw r7, #:lower16:scan8 | |
135 movt r7, #:upper16:scan8 | |
136 mov ip, #16 | |
137 1: ldrb r8, [r7], #1 | |
138 ldr r0, [r5], #4 | |
139 ldrb r8, [r6, r8] | |
140 add r0, r0, r4 | |
141 cmp r8, #0 | |
142 ldrsh r8, [r1] | |
143 adrne lr, ff_h264_idct_add_neon | |
144 adreq lr, ff_h264_idct_dc_add_neon | |
145 cmpeq r8, #0 | |
146 blxne lr | |
147 subs ip, ip, #1 | |
148 add r1, r1, #32 | |
149 bne 1b | |
150 pop {r4-r8,pc} | |
151 .endfunc | |
152 | |
153 function ff_h264_idct_add8_neon, export=1 | |
154 push {r4-r10,lr} | |
155 ldm r0, {r4,r9} | |
156 add r5, r1, #16*4 | |
157 add r1, r2, #16*32 | |
158 mov r2, r3 | |
159 ldr r6, [sp, #32] | |
160 movw r7, #:lower16:scan8+16 | |
161 movt r7, #:upper16:scan8+16 | |
162 mov ip, #8 | |
163 1: ldrb r8, [r7], #1 | |
164 ldr r0, [r5], #4 | |
165 ldrb r8, [r6, r8] | |
166 tst ip, #4 | |
167 addeq r0, r0, r4 | |
168 addne r0, r0, r9 | |
169 cmp r8, #0 | |
170 ldrsh r8, [r1] | |
171 adrne lr, ff_h264_idct_add_neon | |
172 adreq lr, ff_h264_idct_dc_add_neon | |
173 cmpeq r8, #0 | |
174 blxne lr | |
175 subs ip, ip, #1 | |
176 add r1, r1, #32 | |
177 bne 1b | |
178 pop {r4-r10,pc} | |
179 .endfunc | |
180 | |
181 .section .rodata | |
182 scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8 | |
183 .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8 | |
184 .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8 | |
185 .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8 | |
186 .byte 1+1*8, 2+1*8 | |
187 .byte 1+2*8, 2+2*8 | |
188 .byte 1+4*8, 2+4*8 | |
189 .byte 1+5*8, 2+5*8 |