Mercurial > libavcodec.hg
annotate x86/motion_est_mmx.c @ 10392:32ee88f14239 libavcodec
Fix w32thread implementation to handle job count > thread_count.
author | reimar |
---|---|
date | Tue, 13 Oct 2009 13:03:12 +0000 |
parents | 8b9fc0c8f1cc |
children | 34a65026fa06 |
rev | line source |
---|---|
8430 | 1 /* |
2 * MMX optimized motion estimation | |
8629
04423b2f6e0b
cosmetics: Remove pointless period after copyright statement non-sentences.
diego
parents:
8430
diff
changeset
|
3 * Copyright (c) 2001 Fabrice Bellard |
8430 | 4 * Copyright (c) 2002-2004 Michael Niedermayer |
5 * | |
6 * mostly by Michael Niedermayer <michaelni@gmx.at> | |
7 * | |
8 * This file is part of FFmpeg. | |
9 * | |
10 * FFmpeg is free software; you can redistribute it and/or | |
11 * modify it under the terms of the GNU Lesser General Public | |
12 * License as published by the Free Software Foundation; either | |
13 * version 2.1 of the License, or (at your option) any later version. | |
14 * | |
15 * FFmpeg is distributed in the hope that it will be useful, | |
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 * Lesser General Public License for more details. | |
19 * | |
20 * You should have received a copy of the GNU Lesser General Public | |
21 * License along with FFmpeg; if not, write to the Free Software | |
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
23 */ | |
24 | |
25 #include "libavutil/x86_cpu.h" | |
26 #include "libavcodec/dsputil.h" | |
10114
8b9fc0c8f1cc
Move declarations of some mmx functions to dsputil_mmx.h
mru
parents:
9378
diff
changeset
|
27 #include "dsputil_mmx.h" |
8430 | 28 |
29 DECLARE_ASM_CONST(8, uint64_t, round_tab[3])={ | |
30 0x0000000000000000ULL, | |
31 0x0001000100010001ULL, | |
32 0x0002000200020002ULL, | |
33 }; | |
34 | |
35 DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL; | |
36 | |
37 static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |
38 { | |
39 x86_reg len= -(stride*h); | |
40 __asm__ volatile( | |
41 ASMALIGN(4) | |
42 "1: \n\t" | |
43 "movq (%1, %%"REG_a"), %%mm0 \n\t" | |
44 "movq (%2, %%"REG_a"), %%mm2 \n\t" | |
45 "movq (%2, %%"REG_a"), %%mm4 \n\t" | |
46 "add %3, %%"REG_a" \n\t" | |
47 "psubusb %%mm0, %%mm2 \n\t" | |
48 "psubusb %%mm4, %%mm0 \n\t" | |
49 "movq (%1, %%"REG_a"), %%mm1 \n\t" | |
50 "movq (%2, %%"REG_a"), %%mm3 \n\t" | |
51 "movq (%2, %%"REG_a"), %%mm5 \n\t" | |
52 "psubusb %%mm1, %%mm3 \n\t" | |
53 "psubusb %%mm5, %%mm1 \n\t" | |
54 "por %%mm2, %%mm0 \n\t" | |
55 "por %%mm1, %%mm3 \n\t" | |
56 "movq %%mm0, %%mm1 \n\t" | |
57 "movq %%mm3, %%mm2 \n\t" | |
58 "punpcklbw %%mm7, %%mm0 \n\t" | |
59 "punpckhbw %%mm7, %%mm1 \n\t" | |
60 "punpcklbw %%mm7, %%mm3 \n\t" | |
61 "punpckhbw %%mm7, %%mm2 \n\t" | |
62 "paddw %%mm1, %%mm0 \n\t" | |
63 "paddw %%mm3, %%mm2 \n\t" | |
64 "paddw %%mm2, %%mm0 \n\t" | |
65 "paddw %%mm0, %%mm6 \n\t" | |
66 "add %3, %%"REG_a" \n\t" | |
67 " js 1b \n\t" | |
68 : "+a" (len) | |
69 : "r" (blk1 - len), "r" (blk2 - len), "r" ((x86_reg)stride) | |
70 ); | |
71 } | |
72 | |
73 static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |
74 { | |
75 __asm__ volatile( | |
76 ASMALIGN(4) | |
77 "1: \n\t" | |
78 "movq (%1), %%mm0 \n\t" | |
79 "movq (%1, %3), %%mm1 \n\t" | |
80 "psadbw (%2), %%mm0 \n\t" | |
81 "psadbw (%2, %3), %%mm1 \n\t" | |
82 "paddw %%mm0, %%mm6 \n\t" | |
83 "paddw %%mm1, %%mm6 \n\t" | |
84 "lea (%1,%3,2), %1 \n\t" | |
85 "lea (%2,%3,2), %2 \n\t" | |
86 "sub $2, %0 \n\t" | |
87 " jg 1b \n\t" | |
88 : "+r" (h), "+r" (blk1), "+r" (blk2) | |
89 : "r" ((x86_reg)stride) | |
90 ); | |
91 } | |
92 | |
93 static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) | |
94 { | |
95 int ret; | |
96 __asm__ volatile( | |
97 "pxor %%xmm6, %%xmm6 \n\t" | |
98 ASMALIGN(4) | |
99 "1: \n\t" | |
100 "movdqu (%1), %%xmm0 \n\t" | |
101 "movdqu (%1, %3), %%xmm1 \n\t" | |
102 "psadbw (%2), %%xmm0 \n\t" | |
103 "psadbw (%2, %3), %%xmm1 \n\t" | |
104 "paddw %%xmm0, %%xmm6 \n\t" | |
105 "paddw %%xmm1, %%xmm6 \n\t" | |
106 "lea (%1,%3,2), %1 \n\t" | |
107 "lea (%2,%3,2), %2 \n\t" | |
108 "sub $2, %0 \n\t" | |
109 " jg 1b \n\t" | |
110 : "+r" (h), "+r" (blk1), "+r" (blk2) | |
111 : "r" ((x86_reg)stride) | |
112 ); | |
113 __asm__ volatile( | |
114 "movhlps %%xmm6, %%xmm0 \n\t" | |
115 "paddw %%xmm0, %%xmm6 \n\t" | |
116 "movd %%xmm6, %0 \n\t" | |
117 : "=r"(ret) | |
118 ); | |
119 return ret; | |
120 } | |
121 | |
122 static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |
123 { | |
124 __asm__ volatile( | |
125 ASMALIGN(4) | |
126 "1: \n\t" | |
127 "movq (%1), %%mm0 \n\t" | |
128 "movq (%1, %3), %%mm1 \n\t" | |
129 "pavgb 1(%1), %%mm0 \n\t" | |
130 "pavgb 1(%1, %3), %%mm1 \n\t" | |
131 "psadbw (%2), %%mm0 \n\t" | |
132 "psadbw (%2, %3), %%mm1 \n\t" | |
133 "paddw %%mm0, %%mm6 \n\t" | |
134 "paddw %%mm1, %%mm6 \n\t" | |
135 "lea (%1,%3,2), %1 \n\t" | |
136 "lea (%2,%3,2), %2 \n\t" | |
137 "sub $2, %0 \n\t" | |
138 " jg 1b \n\t" | |
139 : "+r" (h), "+r" (blk1), "+r" (blk2) | |
140 : "r" ((x86_reg)stride) | |
141 ); | |
142 } | |
143 | |
144 static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |
145 { | |
146 __asm__ volatile( | |
147 "movq (%1), %%mm0 \n\t" | |
148 "add %3, %1 \n\t" | |
149 ASMALIGN(4) | |
150 "1: \n\t" | |
151 "movq (%1), %%mm1 \n\t" | |
152 "movq (%1, %3), %%mm2 \n\t" | |
153 "pavgb %%mm1, %%mm0 \n\t" | |
154 "pavgb %%mm2, %%mm1 \n\t" | |
155 "psadbw (%2), %%mm0 \n\t" | |
156 "psadbw (%2, %3), %%mm1 \n\t" | |
157 "paddw %%mm0, %%mm6 \n\t" | |
158 "paddw %%mm1, %%mm6 \n\t" | |
159 "movq %%mm2, %%mm0 \n\t" | |
160 "lea (%1,%3,2), %1 \n\t" | |
161 "lea (%2,%3,2), %2 \n\t" | |
162 "sub $2, %0 \n\t" | |
163 " jg 1b \n\t" | |
164 : "+r" (h), "+r" (blk1), "+r" (blk2) | |
165 : "r" ((x86_reg)stride) | |
166 ); | |
167 } | |
168 | |
169 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |
170 { | |
171 __asm__ volatile( | |
172 "movq "MANGLE(bone)", %%mm5 \n\t" | |
173 "movq (%1), %%mm0 \n\t" | |
174 "pavgb 1(%1), %%mm0 \n\t" | |
175 "add %3, %1 \n\t" | |
176 ASMALIGN(4) | |
177 "1: \n\t" | |
178 "movq (%1), %%mm1 \n\t" | |
179 "movq (%1,%3), %%mm2 \n\t" | |
180 "pavgb 1(%1), %%mm1 \n\t" | |
181 "pavgb 1(%1,%3), %%mm2 \n\t" | |
182 "psubusb %%mm5, %%mm1 \n\t" | |
183 "pavgb %%mm1, %%mm0 \n\t" | |
184 "pavgb %%mm2, %%mm1 \n\t" | |
185 "psadbw (%2), %%mm0 \n\t" | |
186 "psadbw (%2,%3), %%mm1 \n\t" | |
187 "paddw %%mm0, %%mm6 \n\t" | |
188 "paddw %%mm1, %%mm6 \n\t" | |
189 "movq %%mm2, %%mm0 \n\t" | |
190 "lea (%1,%3,2), %1 \n\t" | |
191 "lea (%2,%3,2), %2 \n\t" | |
192 "sub $2, %0 \n\t" | |
193 " jg 1b \n\t" | |
194 : "+r" (h), "+r" (blk1), "+r" (blk2) | |
195 : "r" ((x86_reg)stride) | |
196 ); | |
197 } | |
198 | |
199 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) | |
200 { | |
201 x86_reg len= -(stride*h); | |
202 __asm__ volatile( | |
203 ASMALIGN(4) | |
204 "1: \n\t" | |
205 "movq (%1, %%"REG_a"), %%mm0 \n\t" | |
206 "movq (%2, %%"REG_a"), %%mm1 \n\t" | |
207 "movq (%1, %%"REG_a"), %%mm2 \n\t" | |
208 "movq (%2, %%"REG_a"), %%mm3 \n\t" | |
209 "punpcklbw %%mm7, %%mm0 \n\t" | |
210 "punpcklbw %%mm7, %%mm1 \n\t" | |
211 "punpckhbw %%mm7, %%mm2 \n\t" | |
212 "punpckhbw %%mm7, %%mm3 \n\t" | |
213 "paddw %%mm0, %%mm1 \n\t" | |
214 "paddw %%mm2, %%mm3 \n\t" | |
215 "movq (%3, %%"REG_a"), %%mm4 \n\t" | |
216 "movq (%3, %%"REG_a"), %%mm2 \n\t" | |
217 "paddw %%mm5, %%mm1 \n\t" | |
218 "paddw %%mm5, %%mm3 \n\t" | |
219 "psrlw $1, %%mm1 \n\t" | |
220 "psrlw $1, %%mm3 \n\t" | |
221 "packuswb %%mm3, %%mm1 \n\t" | |
222 "psubusb %%mm1, %%mm4 \n\t" | |
223 "psubusb %%mm2, %%mm1 \n\t" | |
224 "por %%mm4, %%mm1 \n\t" | |
225 "movq %%mm1, %%mm0 \n\t" | |
226 "punpcklbw %%mm7, %%mm0 \n\t" | |
227 "punpckhbw %%mm7, %%mm1 \n\t" | |
228 "paddw %%mm1, %%mm0 \n\t" | |
229 "paddw %%mm0, %%mm6 \n\t" | |
230 "add %4, %%"REG_a" \n\t" | |
231 " js 1b \n\t" | |
232 : "+a" (len) | |
233 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((x86_reg)stride) | |
234 ); | |
235 } | |
236 | |
237 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |
238 { | |
239 x86_reg len= -(stride*h); | |
240 __asm__ volatile( | |
241 "movq (%1, %%"REG_a"), %%mm0 \n\t" | |
242 "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |
243 "movq %%mm0, %%mm1 \n\t" | |
244 "movq %%mm2, %%mm3 \n\t" | |
245 "punpcklbw %%mm7, %%mm0 \n\t" | |
246 "punpckhbw %%mm7, %%mm1 \n\t" | |
247 "punpcklbw %%mm7, %%mm2 \n\t" | |
248 "punpckhbw %%mm7, %%mm3 \n\t" | |
249 "paddw %%mm2, %%mm0 \n\t" | |
250 "paddw %%mm3, %%mm1 \n\t" | |
251 ASMALIGN(4) | |
252 "1: \n\t" | |
253 "movq (%2, %%"REG_a"), %%mm2 \n\t" | |
254 "movq 1(%2, %%"REG_a"), %%mm4 \n\t" | |
255 "movq %%mm2, %%mm3 \n\t" | |
256 "movq %%mm4, %%mm5 \n\t" | |
257 "punpcklbw %%mm7, %%mm2 \n\t" | |
258 "punpckhbw %%mm7, %%mm3 \n\t" | |
259 "punpcklbw %%mm7, %%mm4 \n\t" | |
260 "punpckhbw %%mm7, %%mm5 \n\t" | |
261 "paddw %%mm4, %%mm2 \n\t" | |
262 "paddw %%mm5, %%mm3 \n\t" | |
263 "movq 16+"MANGLE(round_tab)", %%mm5 \n\t" | |
264 "paddw %%mm2, %%mm0 \n\t" | |
265 "paddw %%mm3, %%mm1 \n\t" | |
266 "paddw %%mm5, %%mm0 \n\t" | |
267 "paddw %%mm5, %%mm1 \n\t" | |
268 "movq (%3, %%"REG_a"), %%mm4 \n\t" | |
269 "movq (%3, %%"REG_a"), %%mm5 \n\t" | |
270 "psrlw $2, %%mm0 \n\t" | |
271 "psrlw $2, %%mm1 \n\t" | |
272 "packuswb %%mm1, %%mm0 \n\t" | |
273 "psubusb %%mm0, %%mm4 \n\t" | |
274 "psubusb %%mm5, %%mm0 \n\t" | |
275 "por %%mm4, %%mm0 \n\t" | |
276 "movq %%mm0, %%mm4 \n\t" | |
277 "punpcklbw %%mm7, %%mm0 \n\t" | |
278 "punpckhbw %%mm7, %%mm4 \n\t" | |
279 "paddw %%mm0, %%mm6 \n\t" | |
280 "paddw %%mm4, %%mm6 \n\t" | |
281 "movq %%mm2, %%mm0 \n\t" | |
282 "movq %%mm3, %%mm1 \n\t" | |
283 "add %4, %%"REG_a" \n\t" | |
284 " js 1b \n\t" | |
285 : "+a" (len) | |
286 : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((x86_reg)stride) | |
287 ); | |
288 } | |
289 | |
290 static inline int sum_mmx(void) | |
291 { | |
292 int ret; | |
293 __asm__ volatile( | |
294 "movq %%mm6, %%mm0 \n\t" | |
295 "psrlq $32, %%mm6 \n\t" | |
296 "paddw %%mm0, %%mm6 \n\t" | |
297 "movq %%mm6, %%mm0 \n\t" | |
298 "psrlq $16, %%mm6 \n\t" | |
299 "paddw %%mm0, %%mm6 \n\t" | |
300 "movd %%mm6, %0 \n\t" | |
301 : "=r" (ret) | |
302 ); | |
303 return ret&0xFFFF; | |
304 } | |
305 | |
306 static inline int sum_mmx2(void) | |
307 { | |
308 int ret; | |
309 __asm__ volatile( | |
310 "movd %%mm6, %0 \n\t" | |
311 : "=r" (ret) | |
312 ); | |
313 return ret; | |
314 } | |
315 | |
316 static inline void sad8_x2a_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |
317 { | |
318 sad8_2_mmx(blk1, blk1+1, blk2, stride, h); | |
319 } | |
320 static inline void sad8_y2a_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |
321 { | |
322 sad8_2_mmx(blk1, blk1+stride, blk2, stride, h); | |
323 } | |
324 | |
325 | |
326 #define PIX_SAD(suf)\ | |
327 static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |
328 {\ | |
329 assert(h==8);\ | |
330 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ | |
331 "pxor %%mm6, %%mm6 \n\t":);\ | |
332 \ | |
333 sad8_1_ ## suf(blk1, blk2, stride, 8);\ | |
334 \ | |
335 return sum_ ## suf();\ | |
336 }\ | |
337 static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |
338 {\ | |
339 assert(h==8);\ | |
340 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ | |
341 "pxor %%mm6, %%mm6 \n\t"\ | |
342 "movq %0, %%mm5 \n\t"\ | |
343 :: "m"(round_tab[1]) \ | |
344 );\ | |
345 \ | |
346 sad8_x2a_ ## suf(blk1, blk2, stride, 8);\ | |
347 \ | |
348 return sum_ ## suf();\ | |
349 }\ | |
350 \ | |
351 static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |
352 {\ | |
353 assert(h==8);\ | |
354 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ | |
355 "pxor %%mm6, %%mm6 \n\t"\ | |
356 "movq %0, %%mm5 \n\t"\ | |
357 :: "m"(round_tab[1]) \ | |
358 );\ | |
359 \ | |
360 sad8_y2a_ ## suf(blk1, blk2, stride, 8);\ | |
361 \ | |
362 return sum_ ## suf();\ | |
363 }\ | |
364 \ | |
365 static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |
366 {\ | |
367 assert(h==8);\ | |
368 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ | |
369 "pxor %%mm6, %%mm6 \n\t"\ | |
370 ::);\ | |
371 \ | |
372 sad8_4_ ## suf(blk1, blk2, stride, 8);\ | |
373 \ | |
374 return sum_ ## suf();\ | |
375 }\ | |
376 \ | |
377 static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |
378 {\ | |
379 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ | |
380 "pxor %%mm6, %%mm6 \n\t":);\ | |
381 \ | |
382 sad8_1_ ## suf(blk1 , blk2 , stride, h);\ | |
383 sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\ | |
384 \ | |
385 return sum_ ## suf();\ | |
386 }\ | |
387 static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |
388 {\ | |
389 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ | |
390 "pxor %%mm6, %%mm6 \n\t"\ | |
391 "movq %0, %%mm5 \n\t"\ | |
392 :: "m"(round_tab[1]) \ | |
393 );\ | |
394 \ | |
395 sad8_x2a_ ## suf(blk1 , blk2 , stride, h);\ | |
396 sad8_x2a_ ## suf(blk1+8, blk2+8, stride, h);\ | |
397 \ | |
398 return sum_ ## suf();\ | |
399 }\ | |
400 static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |
401 {\ | |
402 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ | |
403 "pxor %%mm6, %%mm6 \n\t"\ | |
404 "movq %0, %%mm5 \n\t"\ | |
405 :: "m"(round_tab[1]) \ | |
406 );\ | |
407 \ | |
408 sad8_y2a_ ## suf(blk1 , blk2 , stride, h);\ | |
409 sad8_y2a_ ## suf(blk1+8, blk2+8, stride, h);\ | |
410 \ | |
411 return sum_ ## suf();\ | |
412 }\ | |
413 static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |
414 {\ | |
415 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ | |
416 "pxor %%mm6, %%mm6 \n\t"\ | |
417 ::);\ | |
418 \ | |
419 sad8_4_ ## suf(blk1 , blk2 , stride, h);\ | |
420 sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\ | |
421 \ | |
422 return sum_ ## suf();\ | |
423 }\ | |
424 | |
425 PIX_SAD(mmx) | |
426 PIX_SAD(mmx2) | |
427 | |
428 void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx) | |
429 { | |
430 if (mm_flags & FF_MM_MMX) { | |
431 c->pix_abs[0][0] = sad16_mmx; | |
432 c->pix_abs[0][1] = sad16_x2_mmx; | |
433 c->pix_abs[0][2] = sad16_y2_mmx; | |
434 c->pix_abs[0][3] = sad16_xy2_mmx; | |
435 c->pix_abs[1][0] = sad8_mmx; | |
436 c->pix_abs[1][1] = sad8_x2_mmx; | |
437 c->pix_abs[1][2] = sad8_y2_mmx; | |
438 c->pix_abs[1][3] = sad8_xy2_mmx; | |
439 | |
440 c->sad[0]= sad16_mmx; | |
441 c->sad[1]= sad8_mmx; | |
442 } | |
9342
7f594601d5e9
Rename FF_MM_MMXEXT to FF_MM_MMX2, for both clarity and consistency
stefano
parents:
8629
diff
changeset
|
443 if (mm_flags & FF_MM_MMX2) { |
8430 | 444 c->pix_abs[0][0] = sad16_mmx2; |
445 c->pix_abs[1][0] = sad8_mmx2; | |
446 | |
447 c->sad[0]= sad16_mmx2; | |
448 c->sad[1]= sad8_mmx2; | |
449 | |
450 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | |
451 c->pix_abs[0][1] = sad16_x2_mmx2; | |
452 c->pix_abs[0][2] = sad16_y2_mmx2; | |
453 c->pix_abs[0][3] = sad16_xy2_mmx2; | |
454 c->pix_abs[1][1] = sad8_x2_mmx2; | |
455 c->pix_abs[1][2] = sad8_y2_mmx2; | |
456 c->pix_abs[1][3] = sad8_xy2_mmx2; | |
457 } | |
458 } | |
9378
52c348a0740f
Do not use SSE2 SAD for snow as it requires more alignment than can be
michael
parents:
9342
diff
changeset
|
459 if ((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW) && avctx->codec_id != CODEC_ID_SNOW) { |
8430 | 460 c->sad[0]= sad16_sse2; |
461 } | |
462 } |