Mercurial > libavcodec.hg
annotate arm/dsputil_armv6.S @ 11229:d53e56a6228b libavcodec
Simplify determing whether fragments are coded
No measurable speed difference
author | conrad |
---|---|
date | Sun, 21 Feb 2010 00:11:01 +0000 |
parents | ad6d17b36a3a |
children | cbf3161706f4 |
rev | line source |
---|---|
10372 | 1 /* |
2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |
3 * | |
4 * This file is part of FFmpeg. | |
5 * | |
6 * FFmpeg is free software; you can redistribute it and/or | |
7 * modify it under the terms of the GNU Lesser General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2.1 of the License, or (at your option) any later version. | |
10 * | |
11 * FFmpeg is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
17 * License along with FFmpeg; if not, write to the Free Software | |
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 */ | |
20 | |
21 #include "asm.S" | |
22 | |
23 .text | |
24 | |
11108
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
25 .macro call_2x_pixels type, subp |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
26 function ff_\type\()_pixels16\subp\()_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
27 push {r0-r3, lr} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
28 bl ff_\type\()_pixels8\subp\()_armv6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
29 pop {r0-r3, lr} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
30 add r0, r0, #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
31 add r1, r1, #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
32 b ff_\type\()_pixels8\subp\()_armv6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
33 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
34 .endm |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
35 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
36 call_2x_pixels avg |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
37 call_2x_pixels put, _x2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
38 call_2x_pixels put, _y2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
39 call_2x_pixels put, _x2_no_rnd |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
40 call_2x_pixels put, _y2_no_rnd |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
41 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
42 function ff_put_pixels16_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
43 push {r4-r11} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
44 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
45 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
46 ldr r6, [r1, #8] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
47 ldr r7, [r1, #12] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
48 ldr r4, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
49 strd r6, r7, [r0, #8] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
50 ldr r9, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
51 strd r4, r5, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
52 ldr r10, [r1, #8] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
53 ldr r11, [r1, #12] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
54 ldr r8, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
55 strd r10, r11, [r0, #8] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
56 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
57 strd r8, r9, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
58 bne 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
59 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
60 pop {r4-r11} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
61 bx lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
62 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
63 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
64 function ff_put_pixels8_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
65 push {r4-r7} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
66 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
67 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
68 ldr r4, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
69 ldr r7, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
70 strd r4, r5, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
71 ldr r6, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
72 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
73 strd r6, r7, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
74 bne 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
75 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
76 pop {r4-r7} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
77 bx lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
78 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
79 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
80 function ff_put_pixels8_x2_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
81 push {r4-r11, lr} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
82 mov r12, #1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
83 orr r12, r12, r12, lsl #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
84 orr r12, r12, r12, lsl #16 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
85 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
86 ldr r4, [r1] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
87 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
88 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
89 ldr r7, [r1, #5] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
90 lsr r6, r4, #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
91 ldr r8, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
92 orr r6, r6, r5, lsl #24 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
93 ldr r9, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
94 ldr r11, [r1, #5] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
95 lsr r10, r8, #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
96 add r1, r1, r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
97 orr r10, r10, r9, lsl #24 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
98 eor r14, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
99 uhadd8 r4, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
100 eor r6, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
101 uhadd8 r5, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
102 and r14, r14, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
103 and r6, r6, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
104 uadd8 r4, r4, r14 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
105 eor r14, r8, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
106 uadd8 r5, r5, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
107 eor r6, r9, r11 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
108 uhadd8 r8, r8, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
109 and r14, r14, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
110 uhadd8 r9, r9, r11 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
111 and r6, r6, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
112 uadd8 r8, r8, r14 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
113 strd r4, r5, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
114 uadd8 r9, r9, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
115 strd r8, r9, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
116 bne 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
117 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
118 pop {r4-r11, pc} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
119 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
120 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
121 function ff_put_pixels8_y2_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
122 push {r4-r11} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
123 mov r12, #1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
124 orr r12, r12, r12, lsl #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
125 orr r12, r12, r12, lsl #16 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
126 ldr r4, [r1] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
127 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
128 ldr r6, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
129 ldr r7, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
130 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
131 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
132 uhadd8 r8, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
133 eor r10, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
134 uhadd8 r9, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
135 eor r11, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
136 and r10, r10, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
137 ldr r4, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
138 uadd8 r8, r8, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
139 and r11, r11, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
140 uadd8 r9, r9, r11 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
141 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
142 uhadd8 r10, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
143 eor r6, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
144 uhadd8 r11, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
145 and r6, r6, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
146 eor r7, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
147 uadd8 r10, r10, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
148 and r7, r7, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
149 ldr r6, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
150 uadd8 r11, r11, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
151 strd r8, r9, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
152 ldr r7, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
153 strd r10, r11, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
154 bne 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
155 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
156 pop {r4-r11} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
157 bx lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
158 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
159 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
160 function ff_put_pixels8_x2_no_rnd_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
161 push {r4-r9, lr} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
162 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
163 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
164 ldr r4, [r1] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
165 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
166 ldr r7, [r1, #5] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
167 ldr r8, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
168 ldr r9, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
169 ldr r14, [r1, #5] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
170 add r1, r1, r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
171 lsr r6, r4, #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
172 orr r6, r6, r5, lsl #24 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
173 lsr r12, r8, #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
174 orr r12, r12, r9, lsl #24 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
175 uhadd8 r4, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
176 uhadd8 r5, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
177 uhadd8 r8, r8, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
178 uhadd8 r9, r9, r14 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
179 stm r0, {r4,r5} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
180 add r0, r0, r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
181 stm r0, {r8,r9} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
182 add r0, r0, r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
183 bne 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
184 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
185 pop {r4-r9, pc} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
186 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
187 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
188 function ff_put_pixels8_y2_no_rnd_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
189 push {r4-r9, lr} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
190 ldr r4, [r1] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
191 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
192 ldr r6, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
193 ldr r7, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
194 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
195 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
196 uhadd8 r8, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
197 ldr r4, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
198 uhadd8 r9, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
199 ldr r5, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
200 uhadd8 r12, r4, r6 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
201 ldr r6, [r1, r2]! |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
202 uhadd8 r14, r5, r7 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
203 ldr r7, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
204 stm r0, {r8,r9} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
205 add r0, r0, r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
206 stm r0, {r12,r14} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
207 add r0, r0, r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
208 bne 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
209 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
210 pop {r4-r9, pc} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
211 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
212 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
213 function ff_avg_pixels8_armv6, export=1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
214 pld [r1, r2] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
215 push {r4-r10, lr} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
216 mov lr, #1 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
217 orr lr, lr, lr, lsl #8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
218 orr lr, lr, lr, lsl #16 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
219 ldrd r4, r5, [r0] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
220 ldr r10, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
221 ldr r9, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
222 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
223 1: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
224 pld [r1, r2] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
225 eor r8, r4, r9 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
226 uhadd8 r4, r4, r9 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
227 eor r12, r5, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
228 ldrd r6, r7, [r0, r2] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
229 uhadd8 r5, r5, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
230 and r8, r8, lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
231 ldr r10, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
232 and r12, r12, lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
233 uadd8 r4, r4, r8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
234 ldr r9, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
235 eor r8, r6, r9 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
236 uadd8 r5, r5, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
237 pld [r1, r2, lsl #1] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
238 eor r12, r7, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
239 uhadd8 r6, r6, r9 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
240 strd r4, r5, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
241 uhadd8 r7, r7, r10 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
242 beq 2f |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
243 and r8, r8, lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
244 ldrd r4, r5, [r0, r2] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
245 uadd8 r6, r6, r8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
246 ldr r10, [r1, #4] |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
247 and r12, r12, lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
248 subs r3, r3, #2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
249 uadd8 r7, r7, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
250 ldr r9, [r1], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
251 strd r6, r7, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
252 b 1b |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
253 2: |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
254 and r8, r8, lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
255 and r12, r12, lr |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
256 uadd8 r6, r6, r8 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
257 uadd8 r7, r7, r12 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
258 strd r6, r7, [r0], r2 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
259 |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
260 pop {r4-r10, pc} |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
261 .endfunc |
0f845e20982a
ARMv6 optimised put_pixels functions except xy2 variants
mru
parents:
10372
diff
changeset
|
262 |
10372 | 263 function ff_add_pixels_clamped_armv6, export=1 |
264 push {r4-r8,lr} | |
265 mov r3, #8 | |
266 1: | |
267 ldm r0!, {r4,r5,r12,lr} | |
268 ldrd r6, r7, [r1] | |
269 pkhbt r8, r4, r5, lsl #16 | |
270 pkhtb r5, r5, r4, asr #16 | |
271 pkhbt r4, r12, lr, lsl #16 | |
272 pkhtb lr, lr, r12, asr #16 | |
273 pld [r1, r2] | |
274 uxtab16 r8, r8, r6 | |
275 uxtab16 r5, r5, r6, ror #8 | |
276 uxtab16 r4, r4, r7 | |
277 uxtab16 lr, lr, r7, ror #8 | |
278 usat16 r8, #8, r8 | |
279 usat16 r5, #8, r5 | |
280 usat16 r4, #8, r4 | |
281 usat16 lr, #8, lr | |
282 orr r6, r8, r5, lsl #8 | |
283 orr r7, r4, lr, lsl #8 | |
284 subs r3, r3, #1 | |
285 strd r6, r7, [r1], r2 | |
286 bgt 1b | |
287 pop {r4-r8,pc} | |
288 .endfunc | |
11109 | 289 |
11113 | 290 function ff_get_pixels_armv6, export=1 |
291 pld [r1, r2] | |
292 push {r4-r8, lr} | |
293 mov lr, #8 | |
294 1: | |
295 ldrd r4, r5, [r1], r2 | |
296 subs lr, lr, #1 | |
297 uxtb16 r6, r4 | |
298 uxtb16 r4, r4, ror #8 | |
299 uxtb16 r12, r5 | |
300 uxtb16 r8, r5, ror #8 | |
301 pld [r1, r2] | |
302 pkhbt r5, r6, r4, lsl #16 | |
303 pkhtb r6, r4, r6, asr #16 | |
304 pkhbt r7, r12, r8, lsl #16 | |
305 pkhtb r12, r8, r12, asr #16 | |
306 stm r0!, {r5,r6,r7,r12} | |
307 bgt 1b | |
308 | |
309 pop {r4-r8, pc} | |
310 .endfunc | |
311 | |
11114 | 312 function ff_diff_pixels_armv6, export=1 |
313 pld [r1, r3] | |
314 pld [r2, r3] | |
315 push {r4-r9, lr} | |
316 mov lr, #8 | |
317 1: | |
318 ldrd r4, r5, [r1], r3 | |
319 ldrd r6, r7, [r2], r3 | |
320 uxtb16 r8, r4 | |
321 uxtb16 r4, r4, ror #8 | |
322 uxtb16 r9, r6 | |
323 uxtb16 r6, r6, ror #8 | |
324 pld [r1, r3] | |
325 ssub16 r9, r8, r9 | |
326 ssub16 r6, r4, r6 | |
327 uxtb16 r8, r5 | |
328 uxtb16 r5, r5, ror #8 | |
329 pld [r2, r3] | |
330 pkhbt r4, r9, r6, lsl #16 | |
331 pkhtb r6, r6, r9, asr #16 | |
332 uxtb16 r9, r7 | |
333 uxtb16 r7, r7, ror #8 | |
334 ssub16 r9, r8, r9 | |
335 ssub16 r5, r5, r7 | |
336 subs lr, lr, #1 | |
337 pkhbt r8, r9, r5, lsl #16 | |
338 pkhtb r9, r5, r9, asr #16 | |
339 stm r0!, {r4,r6,r8,r9} | |
340 bgt 1b | |
341 | |
342 pop {r4-r9, pc} | |
343 .endfunc | |
344 | |
11109 | 345 function ff_pix_abs16_armv6, export=1 |
346 ldr r0, [sp] | |
347 push {r4-r9, lr} | |
348 mov r12, #0 | |
349 mov lr, #0 | |
350 ldm r1, {r4-r7} | |
351 ldr r8, [r2] | |
352 1: | |
353 ldr r9, [r2, #4] | |
354 pld [r1, r3] | |
355 usada8 r12, r4, r8, r12 | |
356 ldr r8, [r2, #8] | |
357 pld [r2, r3] | |
358 usada8 lr, r5, r9, lr | |
359 ldr r9, [r2, #12] | |
360 usada8 r12, r6, r8, r12 | |
361 subs r0, r0, #1 | |
362 usada8 lr, r7, r9, lr | |
363 beq 2f | |
364 add r1, r1, r3 | |
365 ldm r1, {r4-r7} | |
366 add r2, r2, r3 | |
367 ldr r8, [r2] | |
368 b 1b | |
369 2: | |
370 add r0, r12, lr | |
371 pop {r4-r9, pc} | |
372 .endfunc | |
11110 | 373 |
374 function ff_pix_abs16_x2_armv6, export=1 | |
375 ldr r12, [sp] | |
376 push {r4-r11, lr} | |
377 mov r0, #0 | |
378 mov lr, #1 | |
379 orr lr, lr, lr, lsl #8 | |
380 orr lr, lr, lr, lsl #16 | |
381 1: | |
382 ldr r8, [r2] | |
383 ldr r9, [r2, #4] | |
384 lsr r10, r8, #8 | |
385 ldr r4, [r1] | |
386 lsr r6, r9, #8 | |
387 orr r10, r10, r9, lsl #24 | |
388 ldr r5, [r2, #8] | |
389 eor r11, r8, r10 | |
390 uhadd8 r7, r8, r10 | |
391 orr r6, r6, r5, lsl #24 | |
392 and r11, r11, lr | |
393 uadd8 r7, r7, r11 | |
394 ldr r8, [r1, #4] | |
395 usada8 r0, r4, r7, r0 | |
396 eor r7, r9, r6 | |
397 lsr r10, r5, #8 | |
398 and r7, r7, lr | |
399 uhadd8 r4, r9, r6 | |
400 ldr r6, [r2, #12] | |
401 uadd8 r4, r4, r7 | |
402 pld [r1, r3] | |
403 orr r10, r10, r6, lsl #24 | |
404 usada8 r0, r8, r4, r0 | |
405 ldr r4, [r1, #8] | |
406 eor r11, r5, r10 | |
407 ldrb r7, [r2, #16] | |
408 and r11, r11, lr | |
409 uhadd8 r8, r5, r10 | |
410 ldr r5, [r1, #12] | |
411 uadd8 r8, r8, r11 | |
412 pld [r2, r3] | |
413 lsr r10, r6, #8 | |
414 usada8 r0, r4, r8, r0 | |
415 orr r10, r10, r7, lsl #24 | |
416 subs r12, r12, #1 | |
417 eor r11, r6, r10 | |
418 add r1, r1, r3 | |
419 uhadd8 r9, r6, r10 | |
420 and r11, r11, lr | |
421 uadd8 r9, r9, r11 | |
422 add r2, r2, r3 | |
423 usada8 r0, r5, r9, r0 | |
424 bgt 1b | |
425 | |
426 pop {r4-r11, pc} | |
427 .endfunc | |
11111 | 428 |
429 .macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3 | |
430 ldr \n0, [r2] | |
431 eor \n1, \p0, \n0 | |
432 uhadd8 \p0, \p0, \n0 | |
433 and \n1, \n1, lr | |
434 ldr \n2, [r1] | |
435 uadd8 \p0, \p0, \n1 | |
436 ldr \n1, [r2, #4] | |
437 usada8 r0, \p0, \n2, r0 | |
438 pld [r1, r3] | |
439 eor \n3, \p1, \n1 | |
440 uhadd8 \p1, \p1, \n1 | |
441 and \n3, \n3, lr | |
442 ldr \p0, [r1, #4] | |
443 uadd8 \p1, \p1, \n3 | |
444 ldr \n2, [r2, #8] | |
445 usada8 r0, \p1, \p0, r0 | |
446 pld [r2, r3] | |
447 eor \p0, \p2, \n2 | |
448 uhadd8 \p2, \p2, \n2 | |
449 and \p0, \p0, lr | |
450 ldr \p1, [r1, #8] | |
451 uadd8 \p2, \p2, \p0 | |
452 ldr \n3, [r2, #12] | |
453 usada8 r0, \p2, \p1, r0 | |
454 eor \p1, \p3, \n3 | |
455 uhadd8 \p3, \p3, \n3 | |
456 and \p1, \p1, lr | |
457 ldr \p0, [r1, #12] | |
458 uadd8 \p3, \p3, \p1 | |
459 add r1, r1, r3 | |
460 usada8 r0, \p3, \p0, r0 | |
461 add r2, r2, r3 | |
462 .endm | |
463 | |
464 function ff_pix_abs16_y2_armv6, export=1 | |
465 pld [r1] | |
466 pld [r2] | |
467 ldr r12, [sp] | |
468 push {r4-r11, lr} | |
469 mov r0, #0 | |
470 mov lr, #1 | |
471 orr lr, lr, lr, lsl #8 | |
472 orr lr, lr, lr, lsl #16 | |
473 ldr r4, [r2] | |
474 ldr r5, [r2, #4] | |
475 ldr r6, [r2, #8] | |
476 ldr r7, [r2, #12] | |
477 add r2, r2, r3 | |
478 1: | |
479 usad_y2 r4, r5, r6, r7, r8, r9, r10, r11 | |
480 subs r12, r12, #2 | |
481 usad_y2 r8, r9, r10, r11, r4, r5, r6, r7 | |
482 bgt 1b | |
483 | |
484 pop {r4-r11, pc} | |
485 .endfunc | |
11112 | 486 |
487 function ff_pix_abs8_armv6, export=1 | |
488 pld [r2, r3] | |
489 ldr r12, [sp] | |
490 push {r4-r9, lr} | |
491 mov r0, #0 | |
492 mov lr, #0 | |
493 ldrd r4, r5, [r1], r3 | |
494 1: | |
495 subs r12, r12, #2 | |
496 ldr r7, [r2, #4] | |
497 ldr r6, [r2], r3 | |
498 ldrd r8, r9, [r1], r3 | |
499 usada8 r0, r4, r6, r0 | |
500 pld [r2, r3] | |
501 usada8 lr, r5, r7, lr | |
502 ldr r7, [r2, #4] | |
503 ldr r6, [r2], r3 | |
504 beq 2f | |
505 ldrd r4, r5, [r1], r3 | |
506 usada8 r0, r8, r6, r0 | |
507 pld [r2, r3] | |
508 usada8 lr, r9, r7, lr | |
509 b 1b | |
510 2: | |
511 usada8 r0, r8, r6, r0 | |
512 usada8 lr, r9, r7, lr | |
513 add r0, r0, lr | |
514 pop {r4-r9, pc} | |
515 .endfunc | |
11115 | 516 |
517 function ff_sse16_armv6, export=1 | |
518 ldr r12, [sp] | |
519 push {r4-r9, lr} | |
520 mov r0, #0 | |
521 1: | |
522 ldrd r4, r5, [r1] | |
523 ldr r8, [r2] | |
524 uxtb16 lr, r4 | |
525 uxtb16 r4, r4, ror #8 | |
526 uxtb16 r9, r8 | |
527 uxtb16 r8, r8, ror #8 | |
528 ldr r7, [r2, #4] | |
529 usub16 lr, lr, r9 | |
530 usub16 r4, r4, r8 | |
531 smlad r0, lr, lr, r0 | |
532 uxtb16 r6, r5 | |
533 uxtb16 lr, r5, ror #8 | |
534 uxtb16 r8, r7 | |
535 uxtb16 r9, r7, ror #8 | |
536 smlad r0, r4, r4, r0 | |
537 ldrd r4, r5, [r1, #8] | |
538 usub16 r6, r6, r8 | |
539 usub16 r8, lr, r9 | |
540 ldr r7, [r2, #8] | |
541 smlad r0, r6, r6, r0 | |
542 uxtb16 lr, r4 | |
543 uxtb16 r4, r4, ror #8 | |
544 uxtb16 r9, r7 | |
545 uxtb16 r7, r7, ror #8 | |
546 smlad r0, r8, r8, r0 | |
547 ldr r8, [r2, #12] | |
548 usub16 lr, lr, r9 | |
549 usub16 r4, r4, r7 | |
550 smlad r0, lr, lr, r0 | |
551 uxtb16 r6, r5 | |
552 uxtb16 r5, r5, ror #8 | |
553 uxtb16 r9, r8 | |
554 uxtb16 r8, r8, ror #8 | |
555 smlad r0, r4, r4, r0 | |
556 usub16 r6, r6, r9 | |
557 usub16 r5, r5, r8 | |
558 smlad r0, r6, r6, r0 | |
559 add r1, r1, r3 | |
560 add r2, r2, r3 | |
561 subs r12, r12, #1 | |
562 smlad r0, r5, r5, r0 | |
563 bgt 1b | |
564 | |
565 pop {r4-r9, pc} | |
566 .endfunc | |
11116 | 567 |
568 function ff_pix_norm1_armv6, export=1 | |
569 push {r4-r6, lr} | |
570 mov r12, #16 | |
571 mov lr, #0 | |
572 1: | |
573 ldm r0, {r2-r5} | |
574 uxtb16 r6, r2 | |
575 uxtb16 r2, r2, ror #8 | |
576 smlad lr, r6, r6, lr | |
577 uxtb16 r6, r3 | |
578 smlad lr, r2, r2, lr | |
579 uxtb16 r3, r3, ror #8 | |
580 smlad lr, r6, r6, lr | |
581 uxtb16 r6, r4 | |
582 smlad lr, r3, r3, lr | |
583 uxtb16 r4, r4, ror #8 | |
584 smlad lr, r6, r6, lr | |
585 uxtb16 r6, r5 | |
586 smlad lr, r4, r4, lr | |
587 uxtb16 r5, r5, ror #8 | |
588 smlad lr, r6, r6, lr | |
589 subs r12, r12, #1 | |
590 add r0, r0, r1 | |
591 smlad lr, r5, r5, lr | |
592 bgt 1b | |
593 | |
594 mov r0, lr | |
595 pop {r4-r6, pc} | |
596 .endfunc | |
11117 | 597 |
598 function ff_pix_sum_armv6, export=1 | |
599 push {r4-r7, lr} | |
600 mov r12, #16 | |
601 mov r2, #0 | |
602 mov r3, #0 | |
603 mov lr, #0 | |
604 ldr r4, [r0] | |
605 1: | |
606 subs r12, r12, #1 | |
607 ldr r5, [r0, #4] | |
608 usada8 r2, r4, lr, r2 | |
609 ldr r6, [r0, #8] | |
610 usada8 r3, r5, lr, r3 | |
611 ldr r7, [r0, #12] | |
612 usada8 r2, r6, lr, r2 | |
613 beq 2f | |
614 ldr r4, [r0, r1]! | |
615 usada8 r3, r7, lr, r3 | |
616 bgt 1b | |
617 2: | |
618 usada8 r3, r7, lr, r3 | |
619 add r0, r2, r3 | |
620 pop {r4-r7, pc} | |
621 .endfunc |