Mercurial > mplayer.hg
comparison libmpeg2/motion_comp_arm_s.S @ 23236:f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
Ported to SVN by David Bateman % adb014 A gmail P com % from www.mkezx.org
Originally written for Zaurus port http://atty.skr.jp/zplayer/ by AGAWA Koji
Original thread:
Date: Apr 5, 2007 1:11 AM
Subject: [MPlayer-dev-eng] mkezx patches (Was: mplayer zaurus patches)
author | gpoirier |
---|---|
date | Mon, 07 May 2007 19:11:56 +0000 |
parents | |
children | 62abac0c8637 |
comparison
equal
deleted
inserted
replaced
23235:0e8285c7b2fa | 23236:f0ddd02aec27 |
---|---|
1 @ motion_comp_arm_s.S | |
2 @ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp> | |
3 @ | |
4 @ This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | |
5 @ See http://libmpeg2.sourceforge.net/ for updates. | |
6 @ | |
7 @ mpeg2dec is free software; you can redistribute it and/or modify | |
8 @ it under the terms of the GNU General Public License as published by | |
9 @ the Free Software Foundation; either version 2 of the License, or | |
10 @ (at your option) any later version. | |
11 @ | |
12 @ mpeg2dec is distributed in the hope that it will be useful, | |
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 @ GNU General Public License for more details. | |
16 @ | |
17 @ You should have received a copy of the GNU General Public License | |
18 @ along with this program; if not, write to the Free Software | |
19 @ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
20 | |
21 .text | |
22 | |
23 @ ---------------------------------------------------------------- | |
24 .align | |
25 .global MC_put_o_16_arm | |
26 MC_put_o_16_arm: | |
27 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) | |
28 pld [r1] | |
29 stmfd sp!, {r4-r11, lr} @ R14 is also called LR | |
30 and r4, r1, #3 | |
31 adr r5, MC_put_o_16_arm_align_jt | |
32 add r5, r5, r4, lsl #2 | |
33 ldr pc, [r5] | |
34 | |
35 MC_put_o_16_arm_align0: | |
36 ldmia r1, {r4-r7} | |
37 add r1, r1, r2 | |
38 pld [r1] | |
39 stmia r0, {r4-r7} | |
40 subs r3, r3, #1 | |
41 add r0, r0, r2 | |
42 bne MC_put_o_16_arm_align0 | |
43 ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. | |
44 | |
45 .macro PROC shift | |
46 ldmia r1, {r4-r8} | |
47 add r1, r1, r2 | |
48 mov r9, r4, lsr #(\shift) | |
49 pld [r1] | |
50 mov r10, r5, lsr #(\shift) | |
51 orr r9, r9, r5, lsl #(32-\shift) | |
52 mov r11, r6, lsr #(\shift) | |
53 orr r10, r10, r6, lsl #(32-\shift) | |
54 mov r12, r7, lsr #(\shift) | |
55 orr r11, r11, r7, lsl #(32-\shift) | |
56 orr r12, r12, r8, lsl #(32-\shift) | |
57 stmia r0, {r9-r12} | |
58 subs r3, r3, #1 | |
59 add r0, r0, r2 | |
60 .endm | |
61 | |
62 MC_put_o_16_arm_align1: | |
63 and r1, r1, #0xFFFFFFFC | |
64 1: PROC(8) | |
65 bne 1b | |
66 ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. | |
67 MC_put_o_16_arm_align2: | |
68 and r1, r1, #0xFFFFFFFC | |
69 1: PROC(16) | |
70 bne 1b | |
71 ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. | |
72 MC_put_o_16_arm_align3: | |
73 and r1, r1, #0xFFFFFFFC | |
74 1: PROC(24) | |
75 bne 1b | |
76 ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. | |
77 MC_put_o_16_arm_align_jt: | |
78 .word MC_put_o_16_arm_align0 | |
79 .word MC_put_o_16_arm_align1 | |
80 .word MC_put_o_16_arm_align2 | |
81 .word MC_put_o_16_arm_align3 | |
82 | |
83 @ ---------------------------------------------------------------- | |
84 .align | |
85 .global MC_put_o_8_arm | |
86 MC_put_o_8_arm: | |
87 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) | |
88 pld [r1] | |
89 stmfd sp!, {r4-r10, lr} @ R14 is also called LR | |
90 and r4, r1, #3 | |
91 adr r5, MC_put_o_8_arm_align_jt | |
92 add r5, r5, r4, lsl #2 | |
93 ldr pc, [r5] | |
94 MC_put_o_8_arm_align0: | |
95 ldmia r1, {r4-r5} | |
96 add r1, r1, r2 | |
97 pld [r1] | |
98 stmia r0, {r4-r5} | |
99 add r0, r0, r2 | |
100 subs r3, r3, #1 | |
101 bne MC_put_o_8_arm_align0 | |
102 ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. | |
103 | |
104 .macro PROC8 shift | |
105 ldmia r1, {r4-r6} | |
106 add r1, r1, r2 | |
107 mov r9, r4, lsr #(\shift) | |
108 pld [r1] | |
109 mov r10, r5, lsr #(\shift) | |
110 orr r9, r9, r5, lsl #(32-\shift) | |
111 orr r10, r10, r6, lsl #(32-\shift) | |
112 stmia r0, {r9-r10} | |
113 subs r3, r3, #1 | |
114 add r0, r0, r2 | |
115 .endm | |
116 | |
117 MC_put_o_8_arm_align1: | |
118 and r1, r1, #0xFFFFFFFC | |
119 1: PROC8(8) | |
120 bne 1b | |
121 ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. | |
122 | |
123 MC_put_o_8_arm_align2: | |
124 and r1, r1, #0xFFFFFFFC | |
125 1: PROC8(16) | |
126 bne 1b | |
127 ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. | |
128 | |
129 MC_put_o_8_arm_align3: | |
130 and r1, r1, #0xFFFFFFFC | |
131 1: PROC8(24) | |
132 bne 1b | |
133 ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. | |
134 | |
135 MC_put_o_8_arm_align_jt: | |
136 .word MC_put_o_8_arm_align0 | |
137 .word MC_put_o_8_arm_align1 | |
138 .word MC_put_o_8_arm_align2 | |
139 .word MC_put_o_8_arm_align3 | |
140 | |
141 @ ---------------------------------------------------------------- | |
142 .macro AVG_PW rW1, rW2 | |
143 mov \rW2, \rW2, lsl #24 | |
144 orr \rW2, \rW2, \rW1, lsr #8 | |
145 eor r9, \rW1, \rW2 | |
146 and \rW2, \rW1, \rW2 | |
147 and r10, r9, r12 | |
148 add \rW2, \rW2, r10, lsr #1 | |
149 and r10, r9, r11 | |
150 add \rW2, \rW2, r10 | |
151 .endm | |
152 | |
153 .align | |
154 .global MC_put_x_16_arm | |
155 MC_put_x_16_arm: | |
156 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) | |
157 pld [r1] | |
158 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | |
159 and r4, r1, #3 | |
160 adr r5, MC_put_x_16_arm_align_jt | |
161 ldr r11, [r5] | |
162 mvn r12, r11 | |
163 add r5, r5, r4, lsl #2 | |
164 ldr pc, [r5, #4] | |
165 | |
166 .macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4 | |
167 mov \R0, \R0, lsr #(\shift) | |
168 orr \R0, \R0, \R1, lsl #(32 - \shift) | |
169 mov \R1, \R1, lsr #(\shift) | |
170 orr \R1, \R1, \R2, lsl #(32 - \shift) | |
171 mov \R2, \R2, lsr #(\shift) | |
172 orr \R2, \R2, \R3, lsl #(32 - \shift) | |
173 mov \R3, \R3, lsr #(\shift) | |
174 orr \R3, \R3, \R4, lsl #(32 - \shift) | |
175 mov \R4, \R4, lsr #(\shift) | |
176 @ and \R4, \R4, #0xFF | |
177 .endm | |
178 | |
179 MC_put_x_16_arm_align0: | |
180 ldmia r1, {r4-r8} | |
181 add r1, r1, r2 | |
182 pld [r1] | |
183 AVG_PW r7, r8 | |
184 AVG_PW r6, r7 | |
185 AVG_PW r5, r6 | |
186 AVG_PW r4, r5 | |
187 stmia r0, {r5-r8} | |
188 subs r3, r3, #1 | |
189 add r0, r0, r2 | |
190 bne MC_put_x_16_arm_align0 | |
191 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
192 MC_put_x_16_arm_align1: | |
193 and r1, r1, #0xFFFFFFFC | |
194 1: ldmia r1, {r4-r8} | |
195 add r1, r1, r2 | |
196 pld [r1] | |
197 ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8 | |
198 AVG_PW r7, r8 | |
199 AVG_PW r6, r7 | |
200 AVG_PW r5, r6 | |
201 AVG_PW r4, r5 | |
202 stmia r0, {r5-r8} | |
203 subs r3, r3, #1 | |
204 add r0, r0, r2 | |
205 bne 1b | |
206 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
207 MC_put_x_16_arm_align2: | |
208 and r1, r1, #0xFFFFFFFC | |
209 1: ldmia r1, {r4-r8} | |
210 add r1, r1, r2 | |
211 pld [r1] | |
212 ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8 | |
213 AVG_PW r7, r8 | |
214 AVG_PW r6, r7 | |
215 AVG_PW r5, r6 | |
216 AVG_PW r4, r5 | |
217 stmia r0, {r5-r8} | |
218 subs r3, r3, #1 | |
219 add r0, r0, r2 | |
220 bne 1b | |
221 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
222 MC_put_x_16_arm_align3: | |
223 and r1, r1, #0xFFFFFFFC | |
224 1: ldmia r1, {r4-r8} | |
225 add r1, r1, r2 | |
226 pld [r1] | |
227 ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8 | |
228 AVG_PW r7, r8 | |
229 AVG_PW r6, r7 | |
230 AVG_PW r5, r6 | |
231 AVG_PW r4, r5 | |
232 stmia r0, {r5-r8} | |
233 subs r3, r3, #1 | |
234 add r0, r0, r2 | |
235 bne 1b | |
236 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
237 MC_put_x_16_arm_align_jt: | |
238 .word 0x01010101 | |
239 .word MC_put_x_16_arm_align0 | |
240 .word MC_put_x_16_arm_align1 | |
241 .word MC_put_x_16_arm_align2 | |
242 .word MC_put_x_16_arm_align3 | |
243 | |
244 @ ---------------------------------------------------------------- | |
245 .align | |
246 .global MC_put_x_8_arm | |
247 MC_put_x_8_arm: | |
248 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) | |
249 pld [r1] | |
250 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | |
251 and r4, r1, #3 | |
252 adr r5, MC_put_x_8_arm_align_jt | |
253 ldr r11, [r5] | |
254 mvn r12, r11 | |
255 add r5, r5, r4, lsl #2 | |
256 ldr pc, [r5, #4] | |
257 | |
258 .macro ADJ_ALIGN_DW shift, R0, R1, R2 | |
259 mov \R0, \R0, lsr #(\shift) | |
260 orr \R0, \R0, \R1, lsl #(32 - \shift) | |
261 mov \R1, \R1, lsr #(\shift) | |
262 orr \R1, \R1, \R2, lsl #(32 - \shift) | |
263 mov \R2, \R2, lsr #(\shift) | |
264 @ and \R4, \R4, #0xFF | |
265 .endm | |
266 | |
267 MC_put_x_8_arm_align0: | |
268 ldmia r1, {r4-r6} | |
269 add r1, r1, r2 | |
270 pld [r1] | |
271 AVG_PW r5, r6 | |
272 AVG_PW r4, r5 | |
273 stmia r0, {r5-r6} | |
274 subs r3, r3, #1 | |
275 add r0, r0, r2 | |
276 bne MC_put_x_8_arm_align0 | |
277 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
278 MC_put_x_8_arm_align1: | |
279 and r1, r1, #0xFFFFFFFC | |
280 1: ldmia r1, {r4-r6} | |
281 add r1, r1, r2 | |
282 pld [r1] | |
283 ADJ_ALIGN_DW 8, r4, r5, r6 | |
284 AVG_PW r5, r6 | |
285 AVG_PW r4, r5 | |
286 stmia r0, {r5-r6} | |
287 subs r3, r3, #1 | |
288 add r0, r0, r2 | |
289 bne 1b | |
290 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
291 MC_put_x_8_arm_align2: | |
292 and r1, r1, #0xFFFFFFFC | |
293 1: ldmia r1, {r4-r6} | |
294 add r1, r1, r2 | |
295 pld [r1] | |
296 ADJ_ALIGN_DW 16, r4, r5, r6 | |
297 AVG_PW r5, r6 | |
298 AVG_PW r4, r5 | |
299 stmia r0, {r5-r6} | |
300 subs r3, r3, #1 | |
301 add r0, r0, r2 | |
302 bne 1b | |
303 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
304 MC_put_x_8_arm_align3: | |
305 and r1, r1, #0xFFFFFFFC | |
306 1: ldmia r1, {r4-r6} | |
307 add r1, r1, r2 | |
308 pld [r1] | |
309 ADJ_ALIGN_DW 24, r4, r5, r6 | |
310 AVG_PW r5, r6 | |
311 AVG_PW r4, r5 | |
312 stmia r0, {r5-r6} | |
313 subs r3, r3, #1 | |
314 add r0, r0, r2 | |
315 bne 1b | |
316 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
317 MC_put_x_8_arm_align_jt: | |
318 .word 0x01010101 | |
319 .word MC_put_x_8_arm_align0 | |
320 .word MC_put_x_8_arm_align1 | |
321 .word MC_put_x_8_arm_align2 | |
322 .word MC_put_x_8_arm_align3 |