Mercurial > libavcodec.hg
annotate i386/dsputil_mmx_rnd.h @ 5410:1efd5aa20168 libavcodec
Rename h264dsp.c to h264dspenc.c
author | takis |
---|---|
date | Sat, 28 Jul 2007 18:23:35 +0000 |
parents | 470601203f44 |
children | 1876bc447aa4 |
rev | line source |
---|---|
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
1 /* |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
2 * DSP utils mmx functions are compiled twice for rnd/no_rnd |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
3 * Copyright (c) 2000, 2001 Fabrice Bellard. |
1739
07a484280a82
copyright year update of the files i touched and remembered, things look annoyingly unmaintained otherwise
michael
parents:
1064
diff
changeset
|
4 * Copyright (c) 2003-2004 Michael Niedermayer <michaelni@gmx.at> |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
5 * |
5214 | 6 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
7 * mostly rewritten by Michael Niedermayer <michaelni@gmx.at> | |
8 * and improved by Zdenek Kabelac <kabi@users.sf.net> | |
9 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3576
diff
changeset
|
10 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3576
diff
changeset
|
11 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3576
diff
changeset
|
12 * FFmpeg is free software; you can redistribute it and/or |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
13 * modify it under the terms of the GNU Lesser General Public |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
14 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3576
diff
changeset
|
15 * version 2.1 of the License, or (at your option) any later version. |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
16 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3576
diff
changeset
|
17 * FFmpeg is distributed in the hope that it will be useful, |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
20 * Lesser General Public License for more details. |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
21 * |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
22 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3576
diff
changeset
|
23 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
25 */ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
26 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
27 // put_pixels |
1064 | 28 static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
29 { |
448 | 30 MOVQ_BFE(mm6); |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
31 __asm __volatile( |
2979 | 32 "lea (%3, %3), %%"REG_a" \n\t" |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
33 ASMALIGN(3) |
2979 | 34 "1: \n\t" |
35 "movq (%1), %%mm0 \n\t" | |
36 "movq 1(%1), %%mm1 \n\t" | |
37 "movq (%1, %3), %%mm2 \n\t" | |
38 "movq 1(%1, %3), %%mm3 \n\t" | |
39 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
40 "movq %%mm4, (%2) \n\t" | |
41 "movq %%mm5, (%2, %3) \n\t" | |
42 "add %%"REG_a", %1 \n\t" | |
43 "add %%"REG_a", %2 \n\t" | |
44 "movq (%1), %%mm0 \n\t" | |
45 "movq 1(%1), %%mm1 \n\t" | |
46 "movq (%1, %3), %%mm2 \n\t" | |
47 "movq 1(%1, %3), %%mm3 \n\t" | |
48 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
49 "movq %%mm4, (%2) \n\t" | |
50 "movq %%mm5, (%2, %3) \n\t" | |
51 "add %%"REG_a", %1 \n\t" | |
52 "add %%"REG_a", %2 \n\t" | |
53 "subl $4, %0 \n\t" | |
54 "jnz 1b \n\t" | |
55 :"+g"(h), "+S"(pixels), "+D"(block) | |
56 :"r"((long)line_size) | |
57 :REG_a, "memory"); | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
58 } |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
59 |
5083
ce36118abbbb
rename attribute_unused to av_unused and moves its declaration to common.h
benoit
parents:
5006
diff
changeset
|
60 static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
954 | 61 { |
62 MOVQ_BFE(mm6); | |
63 __asm __volatile( | |
2979 | 64 "testl $1, %0 \n\t" |
65 " jz 1f \n\t" | |
66 "movq (%1), %%mm0 \n\t" | |
67 "movq (%2), %%mm1 \n\t" | |
68 "add %4, %1 \n\t" | |
69 "add $8, %2 \n\t" | |
70 PAVGB(%%mm0, %%mm1, %%mm4, %%mm6) | |
71 "movq %%mm4, (%3) \n\t" | |
72 "add %5, %3 \n\t" | |
73 "decl %0 \n\t" | |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
74 ASMALIGN(3) |
2979 | 75 "1: \n\t" |
76 "movq (%1), %%mm0 \n\t" | |
77 "movq (%2), %%mm1 \n\t" | |
78 "add %4, %1 \n\t" | |
79 "movq (%1), %%mm2 \n\t" | |
80 "movq 8(%2), %%mm3 \n\t" | |
81 "add %4, %1 \n\t" | |
82 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
83 "movq %%mm4, (%3) \n\t" | |
84 "add %5, %3 \n\t" | |
85 "movq %%mm5, (%3) \n\t" | |
86 "add %5, %3 \n\t" | |
87 "movq (%1), %%mm0 \n\t" | |
88 "movq 16(%2), %%mm1 \n\t" | |
89 "add %4, %1 \n\t" | |
90 "movq (%1), %%mm2 \n\t" | |
91 "movq 24(%2), %%mm3 \n\t" | |
92 "add %4, %1 \n\t" | |
93 "add $32, %2 \n\t" | |
94 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
95 "movq %%mm4, (%3) \n\t" | |
96 "add %5, %3 \n\t" | |
97 "movq %%mm5, (%3) \n\t" | |
98 "add %5, %3 \n\t" | |
99 "subl $4, %0 \n\t" | |
100 "jnz 1b \n\t" | |
5127 | 101 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used |
967 | 102 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) |
103 #else | |
104 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |
105 #endif | |
2979 | 106 :"S"((long)src1Stride), "D"((long)dstStride) |
107 :"memory"); | |
954 | 108 } |
109 | |
1064 | 110 static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
651 | 111 { |
112 MOVQ_BFE(mm6); | |
113 __asm __volatile( | |
2979 | 114 "lea (%3, %3), %%"REG_a" \n\t" |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
115 ASMALIGN(3) |
2979 | 116 "1: \n\t" |
117 "movq (%1), %%mm0 \n\t" | |
118 "movq 1(%1), %%mm1 \n\t" | |
119 "movq (%1, %3), %%mm2 \n\t" | |
120 "movq 1(%1, %3), %%mm3 \n\t" | |
121 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
122 "movq %%mm4, (%2) \n\t" | |
123 "movq %%mm5, (%2, %3) \n\t" | |
124 "movq 8(%1), %%mm0 \n\t" | |
125 "movq 9(%1), %%mm1 \n\t" | |
126 "movq 8(%1, %3), %%mm2 \n\t" | |
127 "movq 9(%1, %3), %%mm3 \n\t" | |
128 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
129 "movq %%mm4, 8(%2) \n\t" | |
130 "movq %%mm5, 8(%2, %3) \n\t" | |
131 "add %%"REG_a", %1 \n\t" | |
132 "add %%"REG_a", %2 \n\t" | |
133 "movq (%1), %%mm0 \n\t" | |
134 "movq 1(%1), %%mm1 \n\t" | |
135 "movq (%1, %3), %%mm2 \n\t" | |
136 "movq 1(%1, %3), %%mm3 \n\t" | |
137 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
138 "movq %%mm4, (%2) \n\t" | |
139 "movq %%mm5, (%2, %3) \n\t" | |
140 "movq 8(%1), %%mm0 \n\t" | |
141 "movq 9(%1), %%mm1 \n\t" | |
142 "movq 8(%1, %3), %%mm2 \n\t" | |
143 "movq 9(%1, %3), %%mm3 \n\t" | |
144 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
145 "movq %%mm4, 8(%2) \n\t" | |
146 "movq %%mm5, 8(%2, %3) \n\t" | |
147 "add %%"REG_a", %1 \n\t" | |
148 "add %%"REG_a", %2 \n\t" | |
149 "subl $4, %0 \n\t" | |
150 "jnz 1b \n\t" | |
151 :"+g"(h), "+S"(pixels), "+D"(block) | |
152 :"r"((long)line_size) | |
153 :REG_a, "memory"); | |
651 | 154 } |
155 | |
5083
ce36118abbbb
rename attribute_unused to av_unused and moves its declaration to common.h
benoit
parents:
5006
diff
changeset
|
156 static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
954 | 157 { |
158 MOVQ_BFE(mm6); | |
159 __asm __volatile( | |
2979 | 160 "testl $1, %0 \n\t" |
161 " jz 1f \n\t" | |
162 "movq (%1), %%mm0 \n\t" | |
163 "movq (%2), %%mm1 \n\t" | |
164 "movq 8(%1), %%mm2 \n\t" | |
165 "movq 8(%2), %%mm3 \n\t" | |
166 "add %4, %1 \n\t" | |
167 "add $16, %2 \n\t" | |
168 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
169 "movq %%mm4, (%3) \n\t" | |
170 "movq %%mm5, 8(%3) \n\t" | |
171 "add %5, %3 \n\t" | |
172 "decl %0 \n\t" | |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
173 ASMALIGN(3) |
2979 | 174 "1: \n\t" |
175 "movq (%1), %%mm0 \n\t" | |
176 "movq (%2), %%mm1 \n\t" | |
177 "movq 8(%1), %%mm2 \n\t" | |
178 "movq 8(%2), %%mm3 \n\t" | |
179 "add %4, %1 \n\t" | |
180 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
181 "movq %%mm4, (%3) \n\t" | |
182 "movq %%mm5, 8(%3) \n\t" | |
183 "add %5, %3 \n\t" | |
184 "movq (%1), %%mm0 \n\t" | |
185 "movq 16(%2), %%mm1 \n\t" | |
186 "movq 8(%1), %%mm2 \n\t" | |
187 "movq 24(%2), %%mm3 \n\t" | |
188 "add %4, %1 \n\t" | |
189 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
190 "movq %%mm4, (%3) \n\t" | |
191 "movq %%mm5, 8(%3) \n\t" | |
192 "add %5, %3 \n\t" | |
193 "add $32, %2 \n\t" | |
194 "subl $2, %0 \n\t" | |
195 "jnz 1b \n\t" | |
5127 | 196 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used |
2979 | 197 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) |
967 | 198 #else |
2979 | 199 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) |
967 | 200 #endif |
2979 | 201 :"S"((long)src1Stride), "D"((long)dstStride) |
202 :"memory"); | |
954 | 203 } |
204 | |
1064 | 205 static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
206 { |
448 | 207 MOVQ_BFE(mm6); |
208 __asm __volatile( | |
2979 | 209 "lea (%3, %3), %%"REG_a" \n\t" |
210 "movq (%1), %%mm0 \n\t" | |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
211 ASMALIGN(3) |
2979 | 212 "1: \n\t" |
213 "movq (%1, %3), %%mm1 \n\t" | |
214 "movq (%1, %%"REG_a"),%%mm2 \n\t" | |
215 PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) | |
216 "movq %%mm4, (%2) \n\t" | |
217 "movq %%mm5, (%2, %3) \n\t" | |
218 "add %%"REG_a", %1 \n\t" | |
219 "add %%"REG_a", %2 \n\t" | |
220 "movq (%1, %3), %%mm1 \n\t" | |
221 "movq (%1, %%"REG_a"),%%mm0 \n\t" | |
222 PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) | |
223 "movq %%mm4, (%2) \n\t" | |
224 "movq %%mm5, (%2, %3) \n\t" | |
225 "add %%"REG_a", %1 \n\t" | |
226 "add %%"REG_a", %2 \n\t" | |
227 "subl $4, %0 \n\t" | |
228 "jnz 1b \n\t" | |
229 :"+g"(h), "+S"(pixels), "+D"(block) | |
230 :"r"((long)line_size) | |
231 :REG_a, "memory"); | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
232 } |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
233 |
1064 | 234 static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
235 { |
448 | 236 MOVQ_ZERO(mm7); |
237 SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
238 __asm __volatile( |
2979 | 239 "movq (%1), %%mm0 \n\t" |
240 "movq 1(%1), %%mm4 \n\t" | |
241 "movq %%mm0, %%mm1 \n\t" | |
242 "movq %%mm4, %%mm5 \n\t" | |
243 "punpcklbw %%mm7, %%mm0 \n\t" | |
244 "punpcklbw %%mm7, %%mm4 \n\t" | |
245 "punpckhbw %%mm7, %%mm1 \n\t" | |
246 "punpckhbw %%mm7, %%mm5 \n\t" | |
247 "paddusw %%mm0, %%mm4 \n\t" | |
248 "paddusw %%mm1, %%mm5 \n\t" | |
249 "xor %%"REG_a", %%"REG_a" \n\t" | |
250 "add %3, %1 \n\t" | |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
251 ASMALIGN(3) |
2979 | 252 "1: \n\t" |
253 "movq (%1, %%"REG_a"), %%mm0 \n\t" | |
254 "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |
255 "movq %%mm0, %%mm1 \n\t" | |
256 "movq %%mm2, %%mm3 \n\t" | |
257 "punpcklbw %%mm7, %%mm0 \n\t" | |
258 "punpcklbw %%mm7, %%mm2 \n\t" | |
259 "punpckhbw %%mm7, %%mm1 \n\t" | |
260 "punpckhbw %%mm7, %%mm3 \n\t" | |
261 "paddusw %%mm2, %%mm0 \n\t" | |
262 "paddusw %%mm3, %%mm1 \n\t" | |
263 "paddusw %%mm6, %%mm4 \n\t" | |
264 "paddusw %%mm6, %%mm5 \n\t" | |
265 "paddusw %%mm0, %%mm4 \n\t" | |
266 "paddusw %%mm1, %%mm5 \n\t" | |
267 "psrlw $2, %%mm4 \n\t" | |
268 "psrlw $2, %%mm5 \n\t" | |
269 "packuswb %%mm5, %%mm4 \n\t" | |
270 "movq %%mm4, (%2, %%"REG_a") \n\t" | |
271 "add %3, %%"REG_a" \n\t" | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
272 |
2979 | 273 "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 |
274 "movq 1(%1, %%"REG_a"), %%mm4 \n\t" | |
275 "movq %%mm2, %%mm3 \n\t" | |
276 "movq %%mm4, %%mm5 \n\t" | |
277 "punpcklbw %%mm7, %%mm2 \n\t" | |
278 "punpcklbw %%mm7, %%mm4 \n\t" | |
279 "punpckhbw %%mm7, %%mm3 \n\t" | |
280 "punpckhbw %%mm7, %%mm5 \n\t" | |
281 "paddusw %%mm2, %%mm4 \n\t" | |
282 "paddusw %%mm3, %%mm5 \n\t" | |
283 "paddusw %%mm6, %%mm0 \n\t" | |
284 "paddusw %%mm6, %%mm1 \n\t" | |
285 "paddusw %%mm4, %%mm0 \n\t" | |
286 "paddusw %%mm5, %%mm1 \n\t" | |
287 "psrlw $2, %%mm0 \n\t" | |
288 "psrlw $2, %%mm1 \n\t" | |
289 "packuswb %%mm1, %%mm0 \n\t" | |
290 "movq %%mm0, (%2, %%"REG_a") \n\t" | |
291 "add %3, %%"REG_a" \n\t" | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
292 |
2979 | 293 "subl $2, %0 \n\t" |
294 "jnz 1b \n\t" | |
295 :"+g"(h), "+S"(pixels) | |
296 :"D"(block), "r"((long)line_size) | |
297 :REG_a, "memory"); | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
298 } |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
299 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
300 // avg_pixels |
5083
ce36118abbbb
rename attribute_unused to av_unused and moves its declaration to common.h
benoit
parents:
5006
diff
changeset
|
301 static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
2209 | 302 { |
303 MOVQ_BFE(mm6); | |
304 JUMPALIGN(); | |
305 do { | |
2979 | 306 __asm __volatile( |
307 "movd %0, %%mm0 \n\t" | |
308 "movd %1, %%mm1 \n\t" | |
309 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
310 "movd %%mm2, %0 \n\t" | |
311 :"+m"(*block) | |
312 :"m"(*pixels) | |
313 :"memory"); | |
314 pixels += line_size; | |
315 block += line_size; | |
2209 | 316 } |
317 while (--h); | |
318 } | |
319 | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
320 // in case more speed is needed - unroling would certainly help |
1064 | 321 static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
322 { |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
323 MOVQ_BFE(mm6); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
324 JUMPALIGN(); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
325 do { |
2979 | 326 __asm __volatile( |
327 "movq %0, %%mm0 \n\t" | |
328 "movq %1, %%mm1 \n\t" | |
329 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
330 "movq %%mm2, %0 \n\t" | |
331 :"+m"(*block) | |
332 :"m"(*pixels) | |
333 :"memory"); | |
334 pixels += line_size; | |
335 block += line_size; | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
336 } |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
337 while (--h); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
338 } |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
339 |
1064 | 340 static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
651 | 341 { |
342 MOVQ_BFE(mm6); | |
343 JUMPALIGN(); | |
344 do { | |
2979 | 345 __asm __volatile( |
346 "movq %0, %%mm0 \n\t" | |
347 "movq %1, %%mm1 \n\t" | |
348 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
349 "movq %%mm2, %0 \n\t" | |
350 "movq 8%0, %%mm0 \n\t" | |
351 "movq 8%1, %%mm1 \n\t" | |
352 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
353 "movq %%mm2, 8%0 \n\t" | |
354 :"+m"(*block) | |
355 :"m"(*pixels) | |
356 :"memory"); | |
357 pixels += line_size; | |
358 block += line_size; | |
651 | 359 } |
360 while (--h); | |
361 } | |
362 | |
1064 | 363 static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
364 { |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
365 MOVQ_BFE(mm6); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
366 JUMPALIGN(); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
367 do { |
2979 | 368 __asm __volatile( |
369 "movq %1, %%mm0 \n\t" | |
370 "movq 1%1, %%mm1 \n\t" | |
371 "movq %0, %%mm3 \n\t" | |
372 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
373 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |
374 "movq %%mm0, %0 \n\t" | |
375 :"+m"(*block) | |
376 :"m"(*pixels) | |
377 :"memory"); | |
378 pixels += line_size; | |
379 block += line_size; | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
380 } while (--h); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
381 } |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
382 |
5083
ce36118abbbb
rename attribute_unused to av_unused and moves its declaration to common.h
benoit
parents:
5006
diff
changeset
|
383 static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
954 | 384 { |
385 MOVQ_BFE(mm6); | |
386 JUMPALIGN(); | |
387 do { | |
2979 | 388 __asm __volatile( |
389 "movq %1, %%mm0 \n\t" | |
390 "movq %2, %%mm1 \n\t" | |
391 "movq %0, %%mm3 \n\t" | |
392 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
393 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |
394 "movq %%mm0, %0 \n\t" | |
395 :"+m"(*dst) | |
396 :"m"(*src1), "m"(*src2) | |
397 :"memory"); | |
398 dst += dstStride; | |
954 | 399 src1 += src1Stride; |
400 src2 += 8; | |
401 } while (--h); | |
402 } | |
403 | |
1064 | 404 static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
651 | 405 { |
406 MOVQ_BFE(mm6); | |
407 JUMPALIGN(); | |
408 do { | |
2979 | 409 __asm __volatile( |
410 "movq %1, %%mm0 \n\t" | |
411 "movq 1%1, %%mm1 \n\t" | |
412 "movq %0, %%mm3 \n\t" | |
413 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
414 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |
415 "movq %%mm0, %0 \n\t" | |
416 "movq 8%1, %%mm0 \n\t" | |
417 "movq 9%1, %%mm1 \n\t" | |
418 "movq 8%0, %%mm3 \n\t" | |
419 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
420 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |
421 "movq %%mm0, 8%0 \n\t" | |
422 :"+m"(*block) | |
423 :"m"(*pixels) | |
424 :"memory"); | |
425 pixels += line_size; | |
426 block += line_size; | |
651 | 427 } while (--h); |
428 } | |
429 | |
5083
ce36118abbbb
rename attribute_unused to av_unused and moves its declaration to common.h
benoit
parents:
5006
diff
changeset
|
430 static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
954 | 431 { |
432 MOVQ_BFE(mm6); | |
433 JUMPALIGN(); | |
434 do { | |
2979 | 435 __asm __volatile( |
436 "movq %1, %%mm0 \n\t" | |
437 "movq %2, %%mm1 \n\t" | |
438 "movq %0, %%mm3 \n\t" | |
439 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
440 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |
441 "movq %%mm0, %0 \n\t" | |
442 "movq 8%1, %%mm0 \n\t" | |
443 "movq 8%2, %%mm1 \n\t" | |
444 "movq 8%0, %%mm3 \n\t" | |
445 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
446 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |
447 "movq %%mm0, 8%0 \n\t" | |
448 :"+m"(*dst) | |
449 :"m"(*src1), "m"(*src2) | |
450 :"memory"); | |
451 dst += dstStride; | |
954 | 452 src1 += src1Stride; |
453 src2 += 16; | |
454 } while (--h); | |
455 } | |
456 | |
1064 | 457 static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
458 { |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
459 MOVQ_BFE(mm6); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
460 __asm __volatile( |
2979 | 461 "lea (%3, %3), %%"REG_a" \n\t" |
462 "movq (%1), %%mm0 \n\t" | |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
463 ASMALIGN(3) |
2979 | 464 "1: \n\t" |
465 "movq (%1, %3), %%mm1 \n\t" | |
466 "movq (%1, %%"REG_a"), %%mm2 \n\t" | |
467 PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) | |
468 "movq (%2), %%mm3 \n\t" | |
469 PAVGB(%%mm3, %%mm4, %%mm0, %%mm6) | |
470 "movq (%2, %3), %%mm3 \n\t" | |
471 PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) | |
472 "movq %%mm0, (%2) \n\t" | |
473 "movq %%mm1, (%2, %3) \n\t" | |
474 "add %%"REG_a", %1 \n\t" | |
475 "add %%"REG_a", %2 \n\t" | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
476 |
2979 | 477 "movq (%1, %3), %%mm1 \n\t" |
478 "movq (%1, %%"REG_a"), %%mm0 \n\t" | |
479 PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) | |
480 "movq (%2), %%mm3 \n\t" | |
481 PAVGB(%%mm3, %%mm4, %%mm2, %%mm6) | |
482 "movq (%2, %3), %%mm3 \n\t" | |
483 PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) | |
484 "movq %%mm2, (%2) \n\t" | |
485 "movq %%mm1, (%2, %3) \n\t" | |
486 "add %%"REG_a", %1 \n\t" | |
487 "add %%"REG_a", %2 \n\t" | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
488 |
2979 | 489 "subl $4, %0 \n\t" |
490 "jnz 1b \n\t" | |
491 :"+g"(h), "+S"(pixels), "+D"(block) | |
492 :"r"((long)line_size) | |
493 :REG_a, "memory"); | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
494 } |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
495 |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
496 // this routine is 'slightly' suboptimal but mostly unused |
1064 | 497 static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
498 { |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
499 MOVQ_ZERO(mm7); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
500 SET_RND(mm6); // =2 for rnd and =1 for no_rnd version |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
501 __asm __volatile( |
2979 | 502 "movq (%1), %%mm0 \n\t" |
503 "movq 1(%1), %%mm4 \n\t" | |
504 "movq %%mm0, %%mm1 \n\t" | |
505 "movq %%mm4, %%mm5 \n\t" | |
506 "punpcklbw %%mm7, %%mm0 \n\t" | |
507 "punpcklbw %%mm7, %%mm4 \n\t" | |
508 "punpckhbw %%mm7, %%mm1 \n\t" | |
509 "punpckhbw %%mm7, %%mm5 \n\t" | |
510 "paddusw %%mm0, %%mm4 \n\t" | |
511 "paddusw %%mm1, %%mm5 \n\t" | |
512 "xor %%"REG_a", %%"REG_a" \n\t" | |
513 "add %3, %1 \n\t" | |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
514 ASMALIGN(3) |
2979 | 515 "1: \n\t" |
516 "movq (%1, %%"REG_a"), %%mm0 \n\t" | |
517 "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |
518 "movq %%mm0, %%mm1 \n\t" | |
519 "movq %%mm2, %%mm3 \n\t" | |
520 "punpcklbw %%mm7, %%mm0 \n\t" | |
521 "punpcklbw %%mm7, %%mm2 \n\t" | |
522 "punpckhbw %%mm7, %%mm1 \n\t" | |
523 "punpckhbw %%mm7, %%mm3 \n\t" | |
524 "paddusw %%mm2, %%mm0 \n\t" | |
525 "paddusw %%mm3, %%mm1 \n\t" | |
526 "paddusw %%mm6, %%mm4 \n\t" | |
527 "paddusw %%mm6, %%mm5 \n\t" | |
528 "paddusw %%mm0, %%mm4 \n\t" | |
529 "paddusw %%mm1, %%mm5 \n\t" | |
530 "psrlw $2, %%mm4 \n\t" | |
531 "psrlw $2, %%mm5 \n\t" | |
532 "movq (%2, %%"REG_a"), %%mm3 \n\t" | |
533 "packuswb %%mm5, %%mm4 \n\t" | |
534 "pcmpeqd %%mm2, %%mm2 \n\t" | |
535 "paddb %%mm2, %%mm2 \n\t" | |
536 PAVGB(%%mm3, %%mm4, %%mm5, %%mm2) | |
537 "movq %%mm5, (%2, %%"REG_a") \n\t" | |
538 "add %3, %%"REG_a" \n\t" | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
539 |
2979 | 540 "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 |
541 "movq 1(%1, %%"REG_a"), %%mm4 \n\t" | |
542 "movq %%mm2, %%mm3 \n\t" | |
543 "movq %%mm4, %%mm5 \n\t" | |
544 "punpcklbw %%mm7, %%mm2 \n\t" | |
545 "punpcklbw %%mm7, %%mm4 \n\t" | |
546 "punpckhbw %%mm7, %%mm3 \n\t" | |
547 "punpckhbw %%mm7, %%mm5 \n\t" | |
548 "paddusw %%mm2, %%mm4 \n\t" | |
549 "paddusw %%mm3, %%mm5 \n\t" | |
550 "paddusw %%mm6, %%mm0 \n\t" | |
551 "paddusw %%mm6, %%mm1 \n\t" | |
552 "paddusw %%mm4, %%mm0 \n\t" | |
553 "paddusw %%mm5, %%mm1 \n\t" | |
554 "psrlw $2, %%mm0 \n\t" | |
555 "psrlw $2, %%mm1 \n\t" | |
556 "movq (%2, %%"REG_a"), %%mm3 \n\t" | |
557 "packuswb %%mm1, %%mm0 \n\t" | |
558 "pcmpeqd %%mm2, %%mm2 \n\t" | |
559 "paddb %%mm2, %%mm2 \n\t" | |
560 PAVGB(%%mm3, %%mm0, %%mm1, %%mm2) | |
561 "movq %%mm1, (%2, %%"REG_a") \n\t" | |
562 "add %3, %%"REG_a" \n\t" | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
563 |
2979 | 564 "subl $2, %0 \n\t" |
565 "jnz 1b \n\t" | |
566 :"+g"(h), "+S"(pixels) | |
567 :"D"(block), "r"((long)line_size) | |
568 :REG_a, "memory"); | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
569 } |
651 | 570 |
571 //FIXME optimize | |
1064 | 572 static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
651 | 573 DEF(put, pixels8_y2)(block , pixels , line_size, h); |
574 DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h); | |
575 } | |
576 | |
1064 | 577 static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
651 | 578 DEF(put, pixels8_xy2)(block , pixels , line_size, h); |
579 DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h); | |
580 } | |
581 | |
1064 | 582 static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
651 | 583 DEF(avg, pixels8_y2)(block , pixels , line_size, h); |
584 DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h); | |
585 } | |
586 | |
1064 | 587 static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
651 | 588 DEF(avg, pixels8_xy2)(block , pixels , line_size, h); |
589 DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h); | |
590 } | |
591 | |
592 |