comparison x86/dsputil_mmx.c @ 9337:a0d54042ea37 libavcodec

Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
author alexc
date Thu, 02 Apr 2009 21:02:42 +0000
parents 17cc6df384a6
children 5e020cbd1599
comparison
equal deleted inserted replaced
9336:62940d73e9fe 9337:a0d54042ea37
26 #include "libavcodec/dsputil.h" 26 #include "libavcodec/dsputil.h"
27 #include "libavcodec/h263.h" 27 #include "libavcodec/h263.h"
28 #include "libavcodec/mpegvideo.h" 28 #include "libavcodec/mpegvideo.h"
29 #include "libavcodec/simple_idct.h" 29 #include "libavcodec/simple_idct.h"
30 #include "dsputil_mmx.h" 30 #include "dsputil_mmx.h"
31 #include "mmx.h"
32 #include "vp3dsp_mmx.h" 31 #include "vp3dsp_mmx.h"
33 #include "vp3dsp_sse2.h" 32 #include "vp3dsp_sse2.h"
34 #include "vp6dsp_mmx.h" 33 #include "vp6dsp_mmx.h"
35 #include "vp6dsp_sse2.h" 34 #include "vp6dsp_sse2.h"
36 #include "idct_xvid.h" 35 #include "idct_xvid.h"
271 "movq %%mm6, (%0, %2) \n\t" 270 "movq %%mm6, (%0, %2) \n\t"
272 ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "r"(p) 271 ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "r"(p)
273 :"memory"); 272 :"memory");
274 } 273 }
275 274
276 static DECLARE_ALIGNED_8(const unsigned char, vector128[8]) = 275 DECLARE_ALIGNED_8(const unsigned char, ff_vector128[8]) =
277 { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; 276 { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
277
278 #define put_signed_pixels_clamped_mmx_half(off) \
279 "movq "#off"(%2), %%mm1 \n\t"\
280 "movq 16+"#off"(%2), %%mm2 \n\t"\
281 "movq 32+"#off"(%2), %%mm3 \n\t"\
282 "movq 48+"#off"(%2), %%mm4 \n\t"\
283 "packsswb 8+"#off"(%2), %%mm1 \n\t"\
284 "packsswb 24+"#off"(%2), %%mm2 \n\t"\
285 "packsswb 40+"#off"(%2), %%mm3 \n\t"\
286 "packsswb 56+"#off"(%2), %%mm4 \n\t"\
287 "paddb %%mm0, %%mm1 \n\t"\
288 "paddb %%mm0, %%mm2 \n\t"\
289 "paddb %%mm0, %%mm3 \n\t"\
290 "paddb %%mm0, %%mm4 \n\t"\
291 "movq %%mm1, (%0) \n\t"\
292 "movq %%mm2, (%0, %3) \n\t"\
293 "movq %%mm3, (%0, %3, 2) \n\t"\
294 "movq %%mm4, (%0, %1) \n\t"
278 295
279 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) 296 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
280 { 297 {
281 int i; 298 x86_reg line_skip = line_size;
282 299 x86_reg av_uninit(line_skip3);
283 movq_m2r(*vector128, mm1); 300
284 for (i = 0; i < 8; i++) { 301 __asm__ volatile (
285 movq_m2r(*(block), mm0); 302 "movq "MANGLE(ff_vector128)", %%mm0 \n\t"
286 packsswb_m2r(*(block + 4), mm0); 303 "lea (%3, %3, 2), %1 \n\t"
287 block += 8; 304 put_signed_pixels_clamped_mmx_half(0)
288 paddb_r2r(mm1, mm0); 305 "lea (%0, %3, 4), %0 \n\t"
289 movq_r2m(mm0, *pixels); 306 put_signed_pixels_clamped_mmx_half(64)
290 pixels += line_size; 307 :"+r" (pixels), "+r" (line_skip3)
291 } 308 :"r" (block), "r"(line_skip)
309 :"memory");
292 } 310 }
293 311
294 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) 312 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
295 { 313 {
296 const DCTELEM *p; 314 const DCTELEM *p;