annotate armv4l/dsputil_neon_s.S @ 8334:6bdd6dfc3574 libavcodec

ARM: NEON optimised put_pixels functions
author mru
date Mon, 15 Dec 2008 22:12:32 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8334
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
1 /*
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
2 * ARM NEON optimised DSP functions
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
4 *
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
5 * This file is part of FFmpeg.
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
6 *
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
11 *
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
15 * Lesser General Public License for more details.
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
16 *
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
20 */
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
21
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
22 #include "asm.S"
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
23
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
24 preserve8
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
25 .fpu neon
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
26 .text
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
27
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
28 .macro pixels16 avg=0
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
29 .if \avg
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
30 mov ip, r0
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
31 .endif
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
32 1: vld1.64 {d0, d1}, [r1], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
33 vld1.64 {d2, d3}, [r1], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
34 vld1.64 {d4, d5}, [r1], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
35 pld [r1, r2, lsl #2]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
36 vld1.64 {d6, d7}, [r1], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
37 pld [r1]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
38 pld [r1, r2]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
39 pld [r1, r2, lsl #1]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
40 .if \avg
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
41 vld1.64 {d16,d17}, [ip], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
42 vrhadd.u8 q0, q0, q8
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
43 vld1.64 {d18,d19}, [ip], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
44 vrhadd.u8 q1, q1, q9
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
45 vld1.64 {d20,d21}, [ip], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
46 vrhadd.u8 q2, q2, q10
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
47 vld1.64 {d22,d23}, [ip], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
48 vrhadd.u8 q3, q3, q11
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
49 .endif
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
50 subs r3, r3, #4
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
51 vst1.64 {d0, d1}, [r0,:128], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
52 vst1.64 {d2, d3}, [r0,:128], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
53 vst1.64 {d4, d5}, [r0,:128], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
54 vst1.64 {d6, d7}, [r0,:128], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
55 bne 1b
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
56 bx lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
57 .endm
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
58
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
59 .macro pixels16_x2 vhadd=vrhadd.u8
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
60 1: vld1.64 {d0-d2}, [r1], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
61 vld1.64 {d4-d6}, [r1], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
62 pld [r1]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
63 pld [r1, r2]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
64 subs r3, r3, #2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
65 vext.8 q1, q0, q1, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
66 \vhadd q0, q0, q1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
67 vext.8 q3, q2, q3, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
68 \vhadd q2, q2, q3
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
69 vst1.64 {d0, d1}, [r0,:128], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
70 vst1.64 {d4, d5}, [r0,:128], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
71 bne 1b
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
72 bx lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
73 .endm
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
74
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
75 .macro pixels16_y2 vhadd=vrhadd.u8
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
76 push {lr}
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
77 add ip, r1, r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
78 lsl lr, r2, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
79 vld1.64 {d0, d1}, [r1], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
80 vld1.64 {d2, d3}, [ip], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
81 1: subs r3, r3, #2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
82 \vhadd q2, q0, q1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
83 vld1.64 {d0, d1}, [r1], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
84 \vhadd q3, q0, q1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
85 vld1.64 {d2, d3}, [ip], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
86 pld [r1]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
87 pld [ip]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
88 vst1.64 {d4, d5}, [r0,:128], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
89 vst1.64 {d6, d7}, [r0,:128], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
90 bne 1b
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
91 pop {pc}
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
92 .endm
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
93
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
94 .macro pixels16_xy2 vshrn=vrshrn.u16 no_rnd=0
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
95 push {lr}
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
96 lsl lr, r2, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
97 add ip, r1, r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
98 vld1.64 {d0-d2}, [r1], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
99 vld1.64 {d4-d6}, [ip], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
100 .if \no_rnd
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
101 vmov.i16 q13, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
102 .endif
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
103 pld [r1]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
104 pld [ip]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
105 vext.8 q1, q0, q1, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
106 vext.8 q3, q2, q3, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
107 vaddl.u8 q8, d0, d2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
108 vaddl.u8 q10, d1, d3
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
109 vaddl.u8 q9, d4, d6
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
110 vaddl.u8 q11, d5, d7
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
111 1: subs r3, r3, #2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
112 vld1.64 {d0-d2}, [r1], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
113 vadd.u16 q12, q8, q9
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
114 pld [r1]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
115 .if \no_rnd
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
116 vadd.u16 q12, q12, q13
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
117 .endif
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
118 vext.8 q15, q0, q1, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
119 vadd.u16 q1 , q10, q11
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
120 \vshrn d28, q12, #2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
121 .if \no_rnd
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
122 vadd.u16 q1, q1, q13
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
123 .endif
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
124 \vshrn d29, q1, #2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
125 vaddl.u8 q8, d0, d30
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
126 vld1.64 {d2-d4}, [ip], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
127 vaddl.u8 q10, d1, d31
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
128 vst1.64 {d28,d29}, [r0,:128], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
129 vadd.u16 q12, q8, q9
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
130 pld [ip]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
131 .if \no_rnd
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
132 vadd.u16 q12, q12, q13
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
133 .endif
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
134 vext.8 q2, q1, q2, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
135 vadd.u16 q0, q10, q11
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
136 \vshrn d30, q12, #2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
137 .if \no_rnd
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
138 vadd.u16 q0, q0, q13
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
139 .endif
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
140 \vshrn d31, q0, #2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
141 vaddl.u8 q9, d2, d4
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
142 vaddl.u8 q11, d3, d5
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
143 vst1.64 {d30,d31}, [r0,:128], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
144 bgt 1b
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
145 pop {pc}
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
146 .endm
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
147
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
148 .macro pixels8
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
149 1: vld1.64 {d0}, [r1], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
150 vld1.64 {d1}, [r1], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
151 vld1.64 {d2}, [r1], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
152 pld [r1, r2, lsl #2]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
153 vld1.64 {d3}, [r1], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
154 pld [r1]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
155 pld [r1, r2]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
156 pld [r1, r2, lsl #1]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
157 subs r3, r3, #4
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
158 vst1.64 {d0}, [r0,:64], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
159 vst1.64 {d1}, [r0,:64], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
160 vst1.64 {d2}, [r0,:64], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
161 vst1.64 {d3}, [r0,:64], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
162 bne 1b
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
163 bx lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
164 .endm
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
165
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
166 .macro pixels8_x2 vhadd=vrhadd.u8
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
167 1: vld1.64 {d0, d1}, [r1], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
168 vext.8 d1, d0, d1, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
169 vld1.64 {d2, d3}, [r1], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
170 vext.8 d3, d2, d3, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
171 pld [r1]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
172 pld [r1, r2]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
173 subs r3, r3, #2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
174 vswp d1, d2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
175 \vhadd q0, q0, q1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
176 vst1.64 {d0}, [r0,:64], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
177 vst1.64 {d1}, [r0,:64], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
178 bne 1b
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
179 bx lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
180 .endm
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
181
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
182 .macro pixels8_y2 vhadd=vrhadd.u8
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
183 push {lr}
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
184 add ip, r1, r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
185 lsl lr, r2, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
186 vld1.64 {d0}, [r1], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
187 vld1.64 {d1}, [ip], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
188 1: subs r3, r3, #2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
189 \vhadd d4, d0, d1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
190 vld1.64 {d0}, [r1], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
191 \vhadd d5, d0, d1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
192 vld1.64 {d1}, [ip], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
193 pld [r1]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
194 pld [ip]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
195 vst1.64 {d4}, [r0,:64], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
196 vst1.64 {d5}, [r0,:64], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
197 bne 1b
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
198 pop {pc}
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
199 .endm
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
200
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
201 .macro pixels8_xy2 vshrn=vrshrn.u16 no_rnd=0
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
202 push {lr}
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
203 lsl lr, r2, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
204 add ip, r1, r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
205 vld1.64 {d0, d1}, [r1], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
206 vld1.64 {d2, d3}, [ip], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
207 .if \no_rnd
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
208 vmov.i16 q11, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
209 .endif
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
210 pld [r1]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
211 pld [ip]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
212 vext.8 d4, d0, d1, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
213 vext.8 d6, d2, d3, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
214 vaddl.u8 q8, d0, d4
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
215 vaddl.u8 q9, d2, d6
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
216 1: subs r3, r3, #2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
217 vld1.64 {d0, d1}, [r1], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
218 pld [r1]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
219 vadd.u16 q10, q8, q9
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
220 vext.8 d4, d0, d1, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
221 .if \no_rnd
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
222 vadd.u16 q10, q10, q11
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
223 .endif
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
224 vaddl.u8 q8, d0, d4
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
225 \vshrn d5, q10, #2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
226 vld1.64 {d2, d3}, [ip], lr
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
227 vadd.u16 q10, q8, q9
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
228 pld [ip]
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
229 .if \no_rnd
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
230 vadd.u16 q10, q10, q11
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
231 .endif
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
232 vst1.64 {d5}, [r0,:64], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
233 \vshrn d7, q10, #2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
234 vext.8 d6, d2, d3, #1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
235 vaddl.u8 q9, d2, d6
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
236 vst1.64 {d7}, [r0,:64], r2
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
237 bgt 1b
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
238 pop {pc}
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
239 .endm
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
240
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
241 .macro pixfunc pfx name suf rnd_op args:vararg
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
242 function ff_\pfx\name\suf\()_neon, export=1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
243 \name \rnd_op \args
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
244 .endfunc
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
245 .endm
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
246
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
247 .macro pixfunc2 pfx name args:vararg
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
248 pixfunc \pfx \name
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
249 pixfunc \pfx \name \args
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
250 .endm
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
251
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
252 function ff_put_h264_qpel16_mc00_neon, export=1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
253 mov r3, #16
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
254 .endfunc
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
255
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
256 pixfunc put_ pixels16
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
257 pixfunc2 put_ pixels16_x2, _no_rnd, vhadd.u8
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
258 pixfunc2 put_ pixels16_y2, _no_rnd, vhadd.u8
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
259 pixfunc2 put_ pixels16_xy2, _no_rnd, vshrn.u16, 1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
260
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
261 function ff_avg_h264_qpel16_mc00_neon, export=1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
262 mov r3, #16
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
263 .endfunc
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
264
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
265 pixfunc avg_ pixels16,, 1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
266
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
267 function ff_put_h264_qpel8_mc00_neon, export=1
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
268 mov r3, #8
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
269 .endfunc
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
270
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
271 pixfunc put_ pixels8
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
272 pixfunc2 put_ pixels8_x2, _no_rnd, vhadd.u8
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
273 pixfunc2 put_ pixels8_y2, _no_rnd, vhadd.u8
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
274 pixfunc2 put_ pixels8_xy2, _no_rnd, vshrn.u16, 1