Mercurial > libavcodec.hg
annotate arm/dsputil_armv6.S @ 11305:1a349d669184 libavcodec
Optimize (amvd>2)+(amvd>32), about 1 cpu cycles faster.
patch by Zhou Zongyi @ zhouzy () os punkt pku dot edu speck cn
author | michael |
---|---|
date | Fri, 26 Feb 2010 22:45:35 +0000 |
parents | cbf3161706f4 |
children | 361a5fcb4393 |
rev | line source |
---|---|
10372 | 1 /* |
2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |
3 * | |
4 * This file is part of FFmpeg. | |
5 * | |
6 * FFmpeg is free software; you can redistribute it and/or | |
7 * modify it under the terms of the GNU Lesser General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2.1 of the License, or (at your option) any later version. | |
10 * | |
11 * FFmpeg is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
17 * License along with FFmpeg; if not, write to the Free Software | |
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 */ | |
20 | |
21 #include "asm.S" | |
22 | |
11241 | 23 preserve8 |
24 | |
10372 | 25 .text |
26 | |
11108
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
27 .macro call_2x_pixels type, subp |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
28 function ff_\type\()_pixels16\subp\()_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
29 push {r0-r3, lr} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
30 bl ff_\type\()_pixels8\subp\()_armv6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
31 pop {r0-r3, lr} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
32 add r0, r0, #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
33 add r1, r1, #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
34 b ff_\type\()_pixels8\subp\()_armv6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
35 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
36 .endm |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
37 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
38 call_2x_pixels avg |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
39 call_2x_pixels put, _x2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
40 call_2x_pixels put, _y2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
41 call_2x_pixels put, _x2_no_rnd |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
42 call_2x_pixels put, _y2_no_rnd |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
43 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
44 function ff_put_pixels16_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
45 push {r4-r11} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
46 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
47 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
48 ldr r6, [r1, #8] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
49 ldr r7, [r1, #12] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
50 ldr r4, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
51 strd r6, r7, [r0, #8] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
52 ldr r9, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
53 strd r4, r5, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
54 ldr r10, [r1, #8] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
55 ldr r11, [r1, #12] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
56 ldr r8, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
57 strd r10, r11, [r0, #8] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
58 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
59 strd r8, r9, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
60 bne 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
61 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
62 pop {r4-r11} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
63 bx lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
64 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
65 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
66 function ff_put_pixels8_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
67 push {r4-r7} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
68 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
69 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
70 ldr r4, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
71 ldr r7, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
72 strd r4, r5, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
73 ldr r6, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
74 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
75 strd r6, r7, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
76 bne 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
77 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
78 pop {r4-r7} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
79 bx lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
80 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
81 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
82 function ff_put_pixels8_x2_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
83 push {r4-r11, lr} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
84 mov r12, #1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
85 orr r12, r12, r12, lsl #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
86 orr r12, r12, r12, lsl #16 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
87 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
88 ldr r4, [r1] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
89 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
90 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
91 ldr r7, [r1, #5] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
92 lsr r6, r4, #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
93 ldr r8, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
94 orr r6, r6, r5, lsl #24 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
95 ldr r9, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
96 ldr r11, [r1, #5] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
97 lsr r10, r8, #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
98 add r1, r1, r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
99 orr r10, r10, r9, lsl #24 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
100 eor r14, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
101 uhadd8 r4, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
102 eor r6, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
103 uhadd8 r5, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
104 and r14, r14, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
105 and r6, r6, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
106 uadd8 r4, r4, r14 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
107 eor r14, r8, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
108 uadd8 r5, r5, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
109 eor r6, r9, r11 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
110 uhadd8 r8, r8, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
111 and r14, r14, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
112 uhadd8 r9, r9, r11 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
113 and r6, r6, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
114 uadd8 r8, r8, r14 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
115 strd r4, r5, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
116 uadd8 r9, r9, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
117 strd r8, r9, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
118 bne 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
119 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
120 pop {r4-r11, pc} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
121 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
122 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
123 function ff_put_pixels8_y2_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
124 push {r4-r11} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
125 mov r12, #1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
126 orr r12, r12, r12, lsl #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
127 orr r12, r12, r12, lsl #16 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
128 ldr r4, [r1] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
129 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
130 ldr r6, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
131 ldr r7, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
132 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
133 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
134 uhadd8 r8, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
135 eor r10, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
136 uhadd8 r9, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
137 eor r11, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
138 and r10, r10, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
139 ldr r4, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
140 uadd8 r8, r8, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
141 and r11, r11, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
142 uadd8 r9, r9, r11 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
143 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
144 uhadd8 r10, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
145 eor r6, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
146 uhadd8 r11, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
147 and r6, r6, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
148 eor r7, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
149 uadd8 r10, r10, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
150 and r7, r7, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
151 ldr r6, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
152 uadd8 r11, r11, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
153 strd r8, r9, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
154 ldr r7, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
155 strd r10, r11, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
156 bne 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
157 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
158 pop {r4-r11} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
159 bx lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
160 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
161 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
162 function ff_put_pixels8_x2_no_rnd_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
163 push {r4-r9, lr} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
164 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
165 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
166 ldr r4, [r1] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
167 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
168 ldr r7, [r1, #5] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
169 ldr r8, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
170 ldr r9, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
171 ldr r14, [r1, #5] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
172 add r1, r1, r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
173 lsr r6, r4, #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
174 orr r6, r6, r5, lsl #24 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
175 lsr r12, r8, #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
176 orr r12, r12, r9, lsl #24 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
177 uhadd8 r4, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
178 uhadd8 r5, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
179 uhadd8 r8, r8, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
180 uhadd8 r9, r9, r14 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
181 stm r0, {r4,r5} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
182 add r0, r0, r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
183 stm r0, {r8,r9} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
184 add r0, r0, r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
185 bne 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
186 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
187 pop {r4-r9, pc} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
188 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
189 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
190 function ff_put_pixels8_y2_no_rnd_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
191 push {r4-r9, lr} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
192 ldr r4, [r1] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
193 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
194 ldr r6, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
195 ldr r7, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
196 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
197 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
198 uhadd8 r8, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
199 ldr r4, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
200 uhadd8 r9, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
201 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
202 uhadd8 r12, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
203 ldr r6, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
204 uhadd8 r14, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
205 ldr r7, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
206 stm r0, {r8,r9} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
207 add r0, r0, r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
208 stm r0, {r12,r14} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
209 add r0, r0, r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
210 bne 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
211 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
212 pop {r4-r9, pc} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
213 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
214 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
215 function ff_avg_pixels8_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
216 pld [r1, r2] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
217 push {r4-r10, lr} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
218 mov lr, #1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
219 orr lr, lr, lr, lsl #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
220 orr lr, lr, lr, lsl #16 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
221 ldrd r4, r5, [r0] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
222 ldr r10, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
223 ldr r9, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
224 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
225 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
226 pld [r1, r2] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
227 eor r8, r4, r9 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
228 uhadd8 r4, r4, r9 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
229 eor r12, r5, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
230 ldrd r6, r7, [r0, r2] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
231 uhadd8 r5, r5, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
232 and r8, r8, lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
233 ldr r10, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
234 and r12, r12, lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
235 uadd8 r4, r4, r8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
236 ldr r9, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
237 eor r8, r6, r9 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
238 uadd8 r5, r5, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
239 pld [r1, r2, lsl #1] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
240 eor r12, r7, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
241 uhadd8 r6, r6, r9 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
242 strd r4, r5, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
243 uhadd8 r7, r7, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
244 beq 2f |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
245 and r8, r8, lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
246 ldrd r4, r5, [r0, r2] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
247 uadd8 r6, r6, r8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
248 ldr r10, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
249 and r12, r12, lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
250 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
251 uadd8 r7, r7, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
252 ldr r9, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
253 strd r6, r7, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
254 b 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
255 2: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
256 and r8, r8, lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
257 and r12, r12, lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
258 uadd8 r6, r6, r8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
259 uadd8 r7, r7, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
260 strd r6, r7, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
261 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
262 pop {r4-r10, pc} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
263 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
264 |
10372 | 265 function ff_add_pixels_clamped_armv6, export=1 |
266 push {r4-r8,lr} | |
267 mov r3, #8 | |
268 1: | |
269 ldm r0!, {r4,r5,r12,lr} | |
270 ldrd r6, r7, [r1] | |
271 pkhbt r8, r4, r5, lsl #16 | |
272 pkhtb r5, r5, r4, asr #16 | |
273 pkhbt r4, r12, lr, lsl #16 | |
274 pkhtb lr, lr, r12, asr #16 | |
275 pld [r1, r2] | |
276 uxtab16 r8, r8, r6 | |
277 uxtab16 r5, r5, r6, ror #8 | |
278 uxtab16 r4, r4, r7 | |
279 uxtab16 lr, lr, r7, ror #8 | |
280 usat16 r8, #8, r8 | |
281 usat16 r5, #8, r5 | |
282 usat16 r4, #8, r4 | |
283 usat16 lr, #8, lr | |
284 orr r6, r8, r5, lsl #8 | |
285 orr r7, r4, lr, lsl #8 | |
286 subs r3, r3, #1 | |
287 strd r6, r7, [r1], r2 | |
288 bgt 1b | |
289 pop {r4-r8,pc} | |
290 .endfunc | |
11109 | 291 |
11113 | 292 function ff_get_pixels_armv6, export=1 |
293 pld [r1, r2] | |
294 push {r4-r8, lr} | |
295 mov lr, #8 | |
296 1: | |
297 ldrd r4, r5, [r1], r2 | |
298 subs lr, lr, #1 | |
299 uxtb16 r6, r4 | |
300 uxtb16 r4, r4, ror #8 | |
301 uxtb16 r12, r5 | |
302 uxtb16 r8, r5, ror #8 | |
303 pld [r1, r2] | |
304 pkhbt r5, r6, r4, lsl #16 | |
305 pkhtb r6, r4, r6, asr #16 | |
306 pkhbt r7, r12, r8, lsl #16 | |
307 pkhtb r12, r8, r12, asr #16 | |
308 stm r0!, {r5,r6,r7,r12} | |
309 bgt 1b | |
310 | |
311 pop {r4-r8, pc} | |
312 .endfunc | |
313 | |
11114 | 314 function ff_diff_pixels_armv6, export=1 |
315 pld [r1, r3] | |
316 pld [r2, r3] | |
317 push {r4-r9, lr} | |
318 mov lr, #8 | |
319 1: | |
320 ldrd r4, r5, [r1], r3 | |
321 ldrd r6, r7, [r2], r3 | |
322 uxtb16 r8, r4 | |
323 uxtb16 r4, r4, ror #8 | |
324 uxtb16 r9, r6 | |
325 uxtb16 r6, r6, ror #8 | |
326 pld [r1, r3] | |
327 ssub16 r9, r8, r9 | |
328 ssub16 r6, r4, r6 | |
329 uxtb16 r8, r5 | |
330 uxtb16 r5, r5, ror #8 | |
331 pld [r2, r3] | |
332 pkhbt r4, r9, r6, lsl #16 | |
333 pkhtb r6, r6, r9, asr #16 | |
334 uxtb16 r9, r7 | |
335 uxtb16 r7, r7, ror #8 | |
336 ssub16 r9, r8, r9 | |
337 ssub16 r5, r5, r7 | |
338 subs lr, lr, #1 | |
339 pkhbt r8, r9, r5, lsl #16 | |
340 pkhtb r9, r5, r9, asr #16 | |
341 stm r0!, {r4,r6,r8,r9} | |
342 bgt 1b | |
343 | |
344 pop {r4-r9, pc} | |
345 .endfunc | |
346 | |
11109 | 347 function ff_pix_abs16_armv6, export=1 |
348 ldr r0, [sp] | |
349 push {r4-r9, lr} | |
350 mov r12, #0 | |
351 mov lr, #0 | |
352 ldm r1, {r4-r7} | |
353 ldr r8, [r2] | |
354 1: | |
355 ldr r9, [r2, #4] | |
356 pld [r1, r3] | |
357 usada8 r12, r4, r8, r12 | |
358 ldr r8, [r2, #8] | |
359 pld [r2, r3] | |
360 usada8 lr, r5, r9, lr | |
361 ldr r9, [r2, #12] | |
362 usada8 r12, r6, r8, r12 | |
363 subs r0, r0, #1 | |
364 usada8 lr, r7, r9, lr | |
365 beq 2f | |
366 add r1, r1, r3 | |
367 ldm r1, {r4-r7} | |
368 add r2, r2, r3 | |
369 ldr r8, [r2] | |
370 b 1b | |
371 2: | |
372 add r0, r12, lr | |
373 pop {r4-r9, pc} | |
374 .endfunc | |
11110 | 375 |
376 function ff_pix_abs16_x2_armv6, export=1 | |
377 ldr r12, [sp] | |
378 push {r4-r11, lr} | |
379 mov r0, #0 | |
380 mov lr, #1 | |
381 orr lr, lr, lr, lsl #8 | |
382 orr lr, lr, lr, lsl #16 | |
383 1: | |
384 ldr r8, [r2] | |
385 ldr r9, [r2, #4] | |
386 lsr r10, r8, #8 | |
387 ldr r4, [r1] | |
388 lsr r6, r9, #8 | |
389 orr r10, r10, r9, lsl #24 | |
390 ldr r5, [r2, #8] | |
391 eor r11, r8, r10 | |
392 uhadd8 r7, r8, r10 | |
393 orr r6, r6, r5, lsl #24 | |
394 and r11, r11, lr | |
395 uadd8 r7, r7, r11 | |
396 ldr r8, [r1, #4] | |
397 usada8 r0, r4, r7, r0 | |
398 eor r7, r9, r6 | |
399 lsr r10, r5, #8 | |
400 and r7, r7, lr | |
401 uhadd8 r4, r9, r6 | |
402 ldr r6, [r2, #12] | |
403 uadd8 r4, r4, r7 | |
404 pld [r1, r3] | |
405 orr r10, r10, r6, lsl #24 | |
406 usada8 r0, r8, r4, r0 | |
407 ldr r4, [r1, #8] | |
408 eor r11, r5, r10 | |
409 ldrb r7, [r2, #16] | |
410 and r11, r11, lr | |
411 uhadd8 r8, r5, r10 | |
412 ldr r5, [r1, #12] | |
413 uadd8 r8, r8, r11 | |
414 pld [r2, r3] | |
415 lsr r10, r6, #8 | |
416 usada8 r0, r4, r8, r0 | |
417 orr r10, r10, r7, lsl #24 | |
418 subs r12, r12, #1 | |
419 eor r11, r6, r10 | |
420 add r1, r1, r3 | |
421 uhadd8 r9, r6, r10 | |
422 and r11, r11, lr | |
423 uadd8 r9, r9, r11 | |
424 add r2, r2, r3 | |
425 usada8 r0, r5, r9, r0 | |
426 bgt 1b | |
427 | |
428 pop {r4-r11, pc} | |
429 .endfunc | |
11111 | 430 |
431 .macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3 | |
432 ldr \n0, [r2] | |
433 eor \n1, \p0, \n0 | |
434 uhadd8 \p0, \p0, \n0 | |
435 and \n1, \n1, lr | |
436 ldr \n2, [r1] | |
437 uadd8 \p0, \p0, \n1 | |
438 ldr \n1, [r2, #4] | |
439 usada8 r0, \p0, \n2, r0 | |
440 pld [r1, r3] | |
441 eor \n3, \p1, \n1 | |
442 uhadd8 \p1, \p1, \n1 | |
443 and \n3, \n3, lr | |
444 ldr \p0, [r1, #4] | |
445 uadd8 \p1, \p1, \n3 | |
446 ldr \n2, [r2, #8] | |
447 usada8 r0, \p1, \p0, r0 | |
448 pld [r2, r3] | |
449 eor \p0, \p2, \n2 | |
450 uhadd8 \p2, \p2, \n2 | |
451 and \p0, \p0, lr | |
452 ldr \p1, [r1, #8] | |
453 uadd8 \p2, \p2, \p0 | |
454 ldr \n3, [r2, #12] | |
455 usada8 r0, \p2, \p1, r0 | |
456 eor \p1, \p3, \n3 | |
457 uhadd8 \p3, \p3, \n3 | |
458 and \p1, \p1, lr | |
459 ldr \p0, [r1, #12] | |
460 uadd8 \p3, \p3, \p1 | |
461 add r1, r1, r3 | |
462 usada8 r0, \p3, \p0, r0 | |
463 add r2, r2, r3 | |
464 .endm | |
465 | |
466 function ff_pix_abs16_y2_armv6, export=1 | |
467 pld [r1] | |
468 pld [r2] | |
469 ldr r12, [sp] | |
470 push {r4-r11, lr} | |
471 mov r0, #0 | |
472 mov lr, #1 | |
473 orr lr, lr, lr, lsl #8 | |
474 orr lr, lr, lr, lsl #16 | |
475 ldr r4, [r2] | |
476 ldr r5, [r2, #4] | |
477 ldr r6, [r2, #8] | |
478 ldr r7, [r2, #12] | |
479 add r2, r2, r3 | |
480 1: | |
481 usad_y2 r4, r5, r6, r7, r8, r9, r10, r11 | |
482 subs r12, r12, #2 | |
483 usad_y2 r8, r9, r10, r11, r4, r5, r6, r7 | |
484 bgt 1b | |
485 | |
486 pop {r4-r11, pc} | |
487 .endfunc | |
11112 | 488 |
489 function ff_pix_abs8_armv6, export=1 | |
490 pld [r2, r3] | |
491 ldr r12, [sp] | |
492 push {r4-r9, lr} | |
493 mov r0, #0 | |
494 mov lr, #0 | |
495 ldrd r4, r5, [r1], r3 | |
496 1: | |
497 subs r12, r12, #2 | |
498 ldr r7, [r2, #4] | |
499 ldr r6, [r2], r3 | |
500 ldrd r8, r9, [r1], r3 | |
501 usada8 r0, r4, r6, r0 | |
502 pld [r2, r3] | |
503 usada8 lr, r5, r7, lr | |
504 ldr r7, [r2, #4] | |
505 ldr r6, [r2], r3 | |
506 beq 2f | |
507 ldrd r4, r5, [r1], r3 | |
508 usada8 r0, r8, r6, r0 | |
509 pld [r2, r3] | |
510 usada8 lr, r9, r7, lr | |
511 b 1b | |
512 2: | |
513 usada8 r0, r8, r6, r0 | |
514 usada8 lr, r9, r7, lr | |
515 add r0, r0, lr | |
516 pop {r4-r9, pc} | |
517 .endfunc | |
11115 | 518 |
519 function ff_sse16_armv6, export=1 | |
520 ldr r12, [sp] | |
521 push {r4-r9, lr} | |
522 mov r0, #0 | |
523 1: | |
524 ldrd r4, r5, [r1] | |
525 ldr r8, [r2] | |
526 uxtb16 lr, r4 | |
527 uxtb16 r4, r4, ror #8 | |
528 uxtb16 r9, r8 | |
529 uxtb16 r8, r8, ror #8 | |
530 ldr r7, [r2, #4] | |
531 usub16 lr, lr, r9 | |
532 usub16 r4, r4, r8 | |
533 smlad r0, lr, lr, r0 | |
534 uxtb16 r6, r5 | |
535 uxtb16 lr, r5, ror #8 | |
536 uxtb16 r8, r7 | |
537 uxtb16 r9, r7, ror #8 | |
538 smlad r0, r4, r4, r0 | |
539 ldrd r4, r5, [r1, #8] | |
540 usub16 r6, r6, r8 | |
541 usub16 r8, lr, r9 | |
542 ldr r7, [r2, #8] | |
543 smlad r0, r6, r6, r0 | |
544 uxtb16 lr, r4 | |
545 uxtb16 r4, r4, ror #8 | |
546 uxtb16 r9, r7 | |
547 uxtb16 r7, r7, ror #8 | |
548 smlad r0, r8, r8, r0 | |
549 ldr r8, [r2, #12] | |
550 usub16 lr, lr, r9 | |
551 usub16 r4, r4, r7 | |
552 smlad r0, lr, lr, r0 | |
553 uxtb16 r6, r5 | |
554 uxtb16 r5, r5, ror #8 | |
555 uxtb16 r9, r8 | |
556 uxtb16 r8, r8, ror #8 | |
557 smlad r0, r4, r4, r0 | |
558 usub16 r6, r6, r9 | |
559 usub16 r5, r5, r8 | |
560 smlad r0, r6, r6, r0 | |
561 add r1, r1, r3 | |
562 add r2, r2, r3 | |
563 subs r12, r12, #1 | |
564 smlad r0, r5, r5, r0 | |
565 bgt 1b | |
566 | |
567 pop {r4-r9, pc} | |
568 .endfunc | |
11116 | 569 |
570 function ff_pix_norm1_armv6, export=1 | |
571 push {r4-r6, lr} | |
572 mov r12, #16 | |
573 mov lr, #0 | |
574 1: | |
575 ldm r0, {r2-r5} | |
576 uxtb16 r6, r2 | |
577 uxtb16 r2, r2, ror #8 | |
578 smlad lr, r6, r6, lr | |
579 uxtb16 r6, r3 | |
580 smlad lr, r2, r2, lr | |
581 uxtb16 r3, r3, ror #8 | |
582 smlad lr, r6, r6, lr | |
583 uxtb16 r6, r4 | |
584 smlad lr, r3, r3, lr | |
585 uxtb16 r4, r4, ror #8 | |
586 smlad lr, r6, r6, lr | |
587 uxtb16 r6, r5 | |
588 smlad lr, r4, r4, lr | |
589 uxtb16 r5, r5, ror #8 | |
590 smlad lr, r6, r6, lr | |
591 subs r12, r12, #1 | |
592 add r0, r0, r1 | |
593 smlad lr, r5, r5, lr | |
594 bgt 1b | |
595 | |
596 mov r0, lr | |
597 pop {r4-r6, pc} | |
598 .endfunc | |
11117 | 599 |
600 function ff_pix_sum_armv6, export=1 | |
601 push {r4-r7, lr} | |
602 mov r12, #16 | |
603 mov r2, #0 | |
604 mov r3, #0 | |
605 mov lr, #0 | |
606 ldr r4, [r0] | |
607 1: | |
608 subs r12, r12, #1 | |
609 ldr r5, [r0, #4] | |
610 usada8 r2, r4, lr, r2 | |
611 ldr r6, [r0, #8] | |
612 usada8 r3, r5, lr, r3 | |
613 ldr r7, [r0, #12] | |
614 usada8 r2, r6, lr, r2 | |
615 beq 2f | |
616 ldr r4, [r0, r1]! | |
617 usada8 r3, r7, lr, r3 | |
618 bgt 1b | |
619 2: | |
620 usada8 r3, r7, lr, r3 | |
621 add r0, r2, r3 | |
622 pop {r4-r7, pc} | |
623 .endfunc |