Mercurial > mplayer.hg
annotate libmpeg2/motion_comp_arm_s.S @ 29214:a1abd8d51b81
Change VOFW for x86 to 5120, it allows larger images to be scaled and was
not slower. Other archs are not changed as the larger VOFW was slower on PPC.
author | michael |
---|---|
date | Tue, 05 May 2009 01:34:16 +0000 |
parents | da2271c341ee |
children |
rev | line source |
---|---|
23236
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
1 @ motion_comp_arm_s.S |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
2 @ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp> |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
3 @ |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
4 @ This file is part of mpeg2dec, a free MPEG-2 video stream decoder. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
5 @ See http://libmpeg2.sourceforge.net/ for updates. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
6 @ |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
7 @ mpeg2dec is free software; you can redistribute it and/or modify |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
8 @ it under the terms of the GNU General Public License as published by |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
9 @ the Free Software Foundation; either version 2 of the License, or |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
10 @ (at your option) any later version. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
11 @ |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
12 @ mpeg2dec is distributed in the hope that it will be useful, |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
15 @ GNU General Public License for more details. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
16 @ |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
17 @ You should have received a copy of the GNU General Public License |
27572 | 18 @ along with mpeg2dec; if not, write to the Free Software |
23236
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
19 @ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
20 |
27572 | 21 |
23236
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
22 .text |
26283
62abac0c8637
cosmetics: Fix some typos and trailing whitespace in local changes.
diego
parents:
23236
diff
changeset
|
23 |
23236
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
24 @ ---------------------------------------------------------------- |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
25 .align |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
26 .global MC_put_o_16_arm |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
27 MC_put_o_16_arm: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
28 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
29 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
30 stmfd sp!, {r4-r11, lr} @ R14 is also called LR |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
31 and r4, r1, #3 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
32 adr r5, MC_put_o_16_arm_align_jt |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
33 add r5, r5, r4, lsl #2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
34 ldr pc, [r5] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
35 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
36 MC_put_o_16_arm_align0: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
37 ldmia r1, {r4-r7} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
38 add r1, r1, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
39 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
40 stmia r0, {r4-r7} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
41 subs r3, r3, #1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
42 add r0, r0, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
43 bne MC_put_o_16_arm_align0 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
44 ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
45 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
46 .macro PROC shift |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
47 ldmia r1, {r4-r8} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
48 add r1, r1, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
49 mov r9, r4, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
50 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
51 mov r10, r5, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
52 orr r9, r9, r5, lsl #(32-\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
53 mov r11, r6, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
54 orr r10, r10, r6, lsl #(32-\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
55 mov r12, r7, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
56 orr r11, r11, r7, lsl #(32-\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
57 orr r12, r12, r8, lsl #(32-\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
58 stmia r0, {r9-r12} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
59 subs r3, r3, #1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
60 add r0, r0, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
61 .endm |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
62 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
63 MC_put_o_16_arm_align1: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
64 and r1, r1, #0xFFFFFFFC |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
65 1: PROC(8) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
66 bne 1b |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
67 ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
68 MC_put_o_16_arm_align2: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
69 and r1, r1, #0xFFFFFFFC |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
70 1: PROC(16) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
71 bne 1b |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
72 ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
73 MC_put_o_16_arm_align3: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
74 and r1, r1, #0xFFFFFFFC |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
75 1: PROC(24) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
76 bne 1b |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
77 ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
78 MC_put_o_16_arm_align_jt: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
79 .word MC_put_o_16_arm_align0 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
80 .word MC_put_o_16_arm_align1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
81 .word MC_put_o_16_arm_align2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
82 .word MC_put_o_16_arm_align3 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
83 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
84 @ ---------------------------------------------------------------- |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
85 .align |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
86 .global MC_put_o_8_arm |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
87 MC_put_o_8_arm: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
88 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
89 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
90 stmfd sp!, {r4-r10, lr} @ R14 is also called LR |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
91 and r4, r1, #3 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
92 adr r5, MC_put_o_8_arm_align_jt |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
93 add r5, r5, r4, lsl #2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
94 ldr pc, [r5] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
95 MC_put_o_8_arm_align0: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
96 ldmia r1, {r4-r5} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
97 add r1, r1, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
98 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
99 stmia r0, {r4-r5} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
100 add r0, r0, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
101 subs r3, r3, #1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
102 bne MC_put_o_8_arm_align0 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
103 ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
104 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
105 .macro PROC8 shift |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
106 ldmia r1, {r4-r6} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
107 add r1, r1, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
108 mov r9, r4, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
109 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
110 mov r10, r5, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
111 orr r9, r9, r5, lsl #(32-\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
112 orr r10, r10, r6, lsl #(32-\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
113 stmia r0, {r9-r10} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
114 subs r3, r3, #1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
115 add r0, r0, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
116 .endm |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
117 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
118 MC_put_o_8_arm_align1: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
119 and r1, r1, #0xFFFFFFFC |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
120 1: PROC8(8) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
121 bne 1b |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
122 ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
123 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
124 MC_put_o_8_arm_align2: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
125 and r1, r1, #0xFFFFFFFC |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
126 1: PROC8(16) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
127 bne 1b |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
128 ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
129 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
130 MC_put_o_8_arm_align3: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
131 and r1, r1, #0xFFFFFFFC |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
132 1: PROC8(24) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
133 bne 1b |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
134 ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
135 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
136 MC_put_o_8_arm_align_jt: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
137 .word MC_put_o_8_arm_align0 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
138 .word MC_put_o_8_arm_align1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
139 .word MC_put_o_8_arm_align2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
140 .word MC_put_o_8_arm_align3 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
141 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
142 @ ---------------------------------------------------------------- |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
143 .macro AVG_PW rW1, rW2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
144 mov \rW2, \rW2, lsl #24 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
145 orr \rW2, \rW2, \rW1, lsr #8 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
146 eor r9, \rW1, \rW2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
147 and \rW2, \rW1, \rW2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
148 and r10, r9, r12 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
149 add \rW2, \rW2, r10, lsr #1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
150 and r10, r9, r11 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
151 add \rW2, \rW2, r10 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
152 .endm |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
153 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
154 .align |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
155 .global MC_put_x_16_arm |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
156 MC_put_x_16_arm: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
157 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
158 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
159 stmfd sp!, {r4-r11,lr} @ R14 is also called LR |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
160 and r4, r1, #3 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
161 adr r5, MC_put_x_16_arm_align_jt |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
162 ldr r11, [r5] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
163 mvn r12, r11 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
164 add r5, r5, r4, lsl #2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
165 ldr pc, [r5, #4] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
166 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
167 .macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
168 mov \R0, \R0, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
169 orr \R0, \R0, \R1, lsl #(32 - \shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
170 mov \R1, \R1, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
171 orr \R1, \R1, \R2, lsl #(32 - \shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
172 mov \R2, \R2, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
173 orr \R2, \R2, \R3, lsl #(32 - \shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
174 mov \R3, \R3, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
175 orr \R3, \R3, \R4, lsl #(32 - \shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
176 mov \R4, \R4, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
177 @ and \R4, \R4, #0xFF |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
178 .endm |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
179 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
180 MC_put_x_16_arm_align0: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
181 ldmia r1, {r4-r8} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
182 add r1, r1, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
183 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
184 AVG_PW r7, r8 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
185 AVG_PW r6, r7 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
186 AVG_PW r5, r6 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
187 AVG_PW r4, r5 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
188 stmia r0, {r5-r8} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
189 subs r3, r3, #1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
190 add r0, r0, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
191 bne MC_put_x_16_arm_align0 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
192 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
193 MC_put_x_16_arm_align1: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
194 and r1, r1, #0xFFFFFFFC |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
195 1: ldmia r1, {r4-r8} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
196 add r1, r1, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
197 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
198 ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
199 AVG_PW r7, r8 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
200 AVG_PW r6, r7 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
201 AVG_PW r5, r6 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
202 AVG_PW r4, r5 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
203 stmia r0, {r5-r8} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
204 subs r3, r3, #1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
205 add r0, r0, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
206 bne 1b |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
207 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
208 MC_put_x_16_arm_align2: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
209 and r1, r1, #0xFFFFFFFC |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
210 1: ldmia r1, {r4-r8} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
211 add r1, r1, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
212 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
213 ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
214 AVG_PW r7, r8 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
215 AVG_PW r6, r7 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
216 AVG_PW r5, r6 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
217 AVG_PW r4, r5 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
218 stmia r0, {r5-r8} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
219 subs r3, r3, #1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
220 add r0, r0, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
221 bne 1b |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
222 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
223 MC_put_x_16_arm_align3: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
224 and r1, r1, #0xFFFFFFFC |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
225 1: ldmia r1, {r4-r8} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
226 add r1, r1, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
227 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
228 ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
229 AVG_PW r7, r8 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
230 AVG_PW r6, r7 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
231 AVG_PW r5, r6 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
232 AVG_PW r4, r5 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
233 stmia r0, {r5-r8} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
234 subs r3, r3, #1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
235 add r0, r0, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
236 bne 1b |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
237 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
238 MC_put_x_16_arm_align_jt: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
239 .word 0x01010101 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
240 .word MC_put_x_16_arm_align0 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
241 .word MC_put_x_16_arm_align1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
242 .word MC_put_x_16_arm_align2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
243 .word MC_put_x_16_arm_align3 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
244 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
245 @ ---------------------------------------------------------------- |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
246 .align |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
247 .global MC_put_x_8_arm |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
248 MC_put_x_8_arm: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
249 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
250 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
251 stmfd sp!, {r4-r11,lr} @ R14 is also called LR |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
252 and r4, r1, #3 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
253 adr r5, MC_put_x_8_arm_align_jt |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
254 ldr r11, [r5] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
255 mvn r12, r11 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
256 add r5, r5, r4, lsl #2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
257 ldr pc, [r5, #4] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
258 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
259 .macro ADJ_ALIGN_DW shift, R0, R1, R2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
260 mov \R0, \R0, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
261 orr \R0, \R0, \R1, lsl #(32 - \shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
262 mov \R1, \R1, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
263 orr \R1, \R1, \R2, lsl #(32 - \shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
264 mov \R2, \R2, lsr #(\shift) |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
265 @ and \R4, \R4, #0xFF |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
266 .endm |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
267 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
268 MC_put_x_8_arm_align0: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
269 ldmia r1, {r4-r6} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
270 add r1, r1, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
271 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
272 AVG_PW r5, r6 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
273 AVG_PW r4, r5 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
274 stmia r0, {r5-r6} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
275 subs r3, r3, #1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
276 add r0, r0, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
277 bne MC_put_x_8_arm_align0 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
278 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
279 MC_put_x_8_arm_align1: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
280 and r1, r1, #0xFFFFFFFC |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
281 1: ldmia r1, {r4-r6} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
282 add r1, r1, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
283 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
284 ADJ_ALIGN_DW 8, r4, r5, r6 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
285 AVG_PW r5, r6 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
286 AVG_PW r4, r5 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
287 stmia r0, {r5-r6} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
288 subs r3, r3, #1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
289 add r0, r0, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
290 bne 1b |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
291 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
292 MC_put_x_8_arm_align2: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
293 and r1, r1, #0xFFFFFFFC |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
294 1: ldmia r1, {r4-r6} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
295 add r1, r1, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
296 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
297 ADJ_ALIGN_DW 16, r4, r5, r6 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
298 AVG_PW r5, r6 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
299 AVG_PW r4, r5 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
300 stmia r0, {r5-r6} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
301 subs r3, r3, #1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
302 add r0, r0, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
303 bne 1b |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
304 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
305 MC_put_x_8_arm_align3: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
306 and r1, r1, #0xFFFFFFFC |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
307 1: ldmia r1, {r4-r6} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
308 add r1, r1, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
309 pld [r1] |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
310 ADJ_ALIGN_DW 24, r4, r5, r6 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
311 AVG_PW r5, r6 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
312 AVG_PW r4, r5 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
313 stmia r0, {r5-r6} |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
314 subs r3, r3, #1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
315 add r0, r0, r2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
316 bne 1b |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
317 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
318 MC_put_x_8_arm_align_jt: |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
319 .word 0x01010101 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
320 .word MC_put_x_8_arm_align0 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
321 .word MC_put_x_8_arm_align1 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
322 .word MC_put_x_8_arm_align2 |
f0ddd02aec27
iWMMXt-accelerated DCT and motion compensation for ARM processors
gpoirier
parents:
diff
changeset
|
323 .word MC_put_x_8_arm_align3 |