Mercurial > libavcodec.hg
annotate arm/h264idct_neon.S @ 12043:f9a0bd0888a4 libavcodec
mpegaudio: call ff_mpegaudiodec_init_mmx() only from float decoder
The mmx code is floating-point only, and this function does not know
from which decoder it is called. Without this change, the integer
decoder only "works" because the size of the context struct is smaller
in this case, and the mmx init function writes the function pointer
outside the allocated context.
author | mru |
---|---|
date | Thu, 01 Jul 2010 23:21:17 +0000 |
parents | 361a5fcb4393 |
children | ba14e3adeccd |
rev | line source |
---|---|
8339 | 1 /* |
2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | |
3 * | |
4 * This file is part of FFmpeg. | |
5 * | |
6 * FFmpeg is free software; you can redistribute it and/or | |
7 * modify it under the terms of the GNU Lesser General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2.1 of the License, or (at your option) any later version. | |
10 * | |
11 * FFmpeg is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
17 * License along with FFmpeg; if not, write to the Free Software | |
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 */ | |
20 | |
21 #include "asm.S" | |
22 | |
8462 | 23 preserve8 |
8339 | 24 .text |
25 | |
26 function ff_h264_idct_add_neon, export=1 | |
27 vld1.64 {d0-d3}, [r1,:128] | |
28 | |
29 vswp d1, d2 | |
30 vadd.i16 d4, d0, d1 | |
31 vshr.s16 q8, q1, #1 | |
32 vsub.i16 d5, d0, d1 | |
33 vadd.i16 d6, d2, d17 | |
34 vsub.i16 d7, d16, d3 | |
35 vadd.i16 q0, q2, q3 | |
36 vsub.i16 q1, q2, q3 | |
37 | |
38 vtrn.16 d0, d1 | |
39 vtrn.16 d3, d2 | |
40 vtrn.32 d0, d3 | |
41 vtrn.32 d1, d2 | |
42 | |
43 vadd.i16 d4, d0, d3 | |
44 vld1.32 {d18[0]}, [r0,:32], r2 | |
45 vswp d1, d3 | |
46 vshr.s16 q8, q1, #1 | |
47 vld1.32 {d19[1]}, [r0,:32], r2 | |
48 vsub.i16 d5, d0, d1 | |
49 vld1.32 {d18[1]}, [r0,:32], r2 | |
50 vadd.i16 d6, d16, d3 | |
51 vld1.32 {d19[0]}, [r0,:32], r2 | |
52 vsub.i16 d7, d2, d17 | |
53 sub r0, r0, r2, lsl #2 | |
54 vadd.i16 q0, q2, q3 | |
55 vsub.i16 q1, q2, q3 | |
56 | |
10618 | 57 vrshr.s16 q0, q0, #6 |
58 vrshr.s16 q1, q1, #6 | |
8339 | 59 |
60 vaddw.u8 q0, q0, d18 | |
61 vaddw.u8 q1, q1, d19 | |
62 | |
63 vqmovun.s16 d0, q0 | |
64 vqmovun.s16 d1, q1 | |
65 | |
66 vst1.32 {d0[0]}, [r0,:32], r2 | |
67 vst1.32 {d1[1]}, [r0,:32], r2 | |
68 vst1.32 {d0[1]}, [r0,:32], r2 | |
69 vst1.32 {d1[0]}, [r0,:32], r2 | |
70 | |
71 bx lr | |
11443 | 72 endfunc |
8340 | 73 |
74 function ff_h264_idct_dc_add_neon, export=1 | |
75 vld1.16 {d2[],d3[]}, [r1,:16] | |
76 vrshr.s16 q1, q1, #6 | |
77 vld1.32 {d0[0]}, [r0,:32], r2 | |
78 vld1.32 {d0[1]}, [r0,:32], r2 | |
79 vaddw.u8 q2, q1, d0 | |
80 vld1.32 {d1[0]}, [r0,:32], r2 | |
81 vld1.32 {d1[1]}, [r0,:32], r2 | |
82 vaddw.u8 q1, q1, d1 | |
83 vqmovun.s16 d0, q2 | |
84 vqmovun.s16 d1, q1 | |
85 sub r0, r0, r2, lsl #2 | |
86 vst1.32 {d0[0]}, [r0,:32], r2 | |
87 vst1.32 {d0[1]}, [r0,:32], r2 | |
88 vst1.32 {d1[0]}, [r0,:32], r2 | |
89 vst1.32 {d1[1]}, [r0,:32], r2 | |
90 bx lr | |
11443 | 91 endfunc |
8462 | 92 |
93 function ff_h264_idct_add16_neon, export=1 | |
94 push {r4-r8,lr} | |
95 mov r4, r0 | |
96 mov r5, r1 | |
97 mov r1, r2 | |
98 mov r2, r3 | |
99 ldr r6, [sp, #24] | |
8507
779a9c93bf61
ARM: work around linker bug with movw/movt relocations in shared libs
mru
parents:
8462
diff
changeset
|
100 movrel r7, scan8 |
8462 | 101 mov ip, #16 |
102 1: ldrb r8, [r7], #1 | |
103 ldr r0, [r5], #4 | |
104 ldrb r8, [r6, r8] | |
105 subs r8, r8, #1 | |
106 blt 2f | |
107 ldrsh lr, [r1] | |
108 add r0, r0, r4 | |
109 movne lr, #0 | |
110 cmp lr, #0 | |
111 adrne lr, ff_h264_idct_dc_add_neon | |
112 adreq lr, ff_h264_idct_add_neon | |
113 blx lr | |
114 2: subs ip, ip, #1 | |
115 add r1, r1, #32 | |
116 bne 1b | |
117 pop {r4-r8,pc} | |
11443 | 118 endfunc |
8462 | 119 |
120 function ff_h264_idct_add16intra_neon, export=1 | |
121 push {r4-r8,lr} | |
122 mov r4, r0 | |
123 mov r5, r1 | |
124 mov r1, r2 | |
125 mov r2, r3 | |
126 ldr r6, [sp, #24] | |
8507
779a9c93bf61
ARM: work around linker bug with movw/movt relocations in shared libs
mru
parents:
8462
diff
changeset
|
127 movrel r7, scan8 |
8462 | 128 mov ip, #16 |
129 1: ldrb r8, [r7], #1 | |
130 ldr r0, [r5], #4 | |
131 ldrb r8, [r6, r8] | |
132 add r0, r0, r4 | |
133 cmp r8, #0 | |
134 ldrsh r8, [r1] | |
135 adrne lr, ff_h264_idct_add_neon | |
136 adreq lr, ff_h264_idct_dc_add_neon | |
137 cmpeq r8, #0 | |
138 blxne lr | |
139 subs ip, ip, #1 | |
140 add r1, r1, #32 | |
141 bne 1b | |
142 pop {r4-r8,pc} | |
11443 | 143 endfunc |
8462 | 144 |
145 function ff_h264_idct_add8_neon, export=1 | |
146 push {r4-r10,lr} | |
147 ldm r0, {r4,r9} | |
148 add r5, r1, #16*4 | |
149 add r1, r2, #16*32 | |
150 mov r2, r3 | |
151 ldr r6, [sp, #32] | |
8507
779a9c93bf61
ARM: work around linker bug with movw/movt relocations in shared libs
mru
parents:
8462
diff
changeset
|
152 movrel r7, scan8+16 |
8462 | 153 mov ip, #8 |
154 1: ldrb r8, [r7], #1 | |
155 ldr r0, [r5], #4 | |
156 ldrb r8, [r6, r8] | |
157 tst ip, #4 | |
158 addeq r0, r0, r4 | |
159 addne r0, r0, r9 | |
160 cmp r8, #0 | |
161 ldrsh r8, [r1] | |
162 adrne lr, ff_h264_idct_add_neon | |
163 adreq lr, ff_h264_idct_dc_add_neon | |
164 cmpeq r8, #0 | |
165 blxne lr | |
166 subs ip, ip, #1 | |
167 add r1, r1, #32 | |
168 bne 1b | |
169 pop {r4-r10,pc} | |
11443 | 170 endfunc |
8462 | 171 |
172 .section .rodata | |
173 scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8 | |
174 .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8 | |
175 .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8 | |
176 .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8 | |
177 .byte 1+1*8, 2+1*8 | |
178 .byte 1+2*8, 2+2*8 | |
179 .byte 1+4*8, 2+4*8 | |
180 .byte 1+5*8, 2+5*8 |