Mercurial > libavcodec.hg
comparison x86/dsputil_mmx.c @ 9337:a0d54042ea37 libavcodec
Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
author | alexc |
---|---|
date | Thu, 02 Apr 2009 21:02:42 +0000 |
parents | 17cc6df384a6 |
children | 5e020cbd1599 |
comparison
equal
deleted
inserted
replaced
9336:62940d73e9fe | 9337:a0d54042ea37 |
---|---|
26 #include "libavcodec/dsputil.h" | 26 #include "libavcodec/dsputil.h" |
27 #include "libavcodec/h263.h" | 27 #include "libavcodec/h263.h" |
28 #include "libavcodec/mpegvideo.h" | 28 #include "libavcodec/mpegvideo.h" |
29 #include "libavcodec/simple_idct.h" | 29 #include "libavcodec/simple_idct.h" |
30 #include "dsputil_mmx.h" | 30 #include "dsputil_mmx.h" |
31 #include "mmx.h" | |
32 #include "vp3dsp_mmx.h" | 31 #include "vp3dsp_mmx.h" |
33 #include "vp3dsp_sse2.h" | 32 #include "vp3dsp_sse2.h" |
34 #include "vp6dsp_mmx.h" | 33 #include "vp6dsp_mmx.h" |
35 #include "vp6dsp_sse2.h" | 34 #include "vp6dsp_sse2.h" |
36 #include "idct_xvid.h" | 35 #include "idct_xvid.h" |
271 "movq %%mm6, (%0, %2) \n\t" | 270 "movq %%mm6, (%0, %2) \n\t" |
272 ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "r"(p) | 271 ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "r"(p) |
273 :"memory"); | 272 :"memory"); |
274 } | 273 } |
275 | 274 |
276 static DECLARE_ALIGNED_8(const unsigned char, vector128[8]) = | 275 DECLARE_ALIGNED_8(const unsigned char, ff_vector128[8]) = |
277 { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; | 276 { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; |
277 | |
278 #define put_signed_pixels_clamped_mmx_half(off) \ | |
279 "movq "#off"(%2), %%mm1 \n\t"\ | |
280 "movq 16+"#off"(%2), %%mm2 \n\t"\ | |
281 "movq 32+"#off"(%2), %%mm3 \n\t"\ | |
282 "movq 48+"#off"(%2), %%mm4 \n\t"\ | |
283 "packsswb 8+"#off"(%2), %%mm1 \n\t"\ | |
284 "packsswb 24+"#off"(%2), %%mm2 \n\t"\ | |
285 "packsswb 40+"#off"(%2), %%mm3 \n\t"\ | |
286 "packsswb 56+"#off"(%2), %%mm4 \n\t"\ | |
287 "paddb %%mm0, %%mm1 \n\t"\ | |
288 "paddb %%mm0, %%mm2 \n\t"\ | |
289 "paddb %%mm0, %%mm3 \n\t"\ | |
290 "paddb %%mm0, %%mm4 \n\t"\ | |
291 "movq %%mm1, (%0) \n\t"\ | |
292 "movq %%mm2, (%0, %3) \n\t"\ | |
293 "movq %%mm3, (%0, %3, 2) \n\t"\ | |
294 "movq %%mm4, (%0, %1) \n\t" | |
278 | 295 |
279 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) | 296 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) |
280 { | 297 { |
281 int i; | 298 x86_reg line_skip = line_size; |
282 | 299 x86_reg av_uninit(line_skip3); |
283 movq_m2r(*vector128, mm1); | 300 |
284 for (i = 0; i < 8; i++) { | 301 __asm__ volatile ( |
285 movq_m2r(*(block), mm0); | 302 "movq "MANGLE(ff_vector128)", %%mm0 \n\t" |
286 packsswb_m2r(*(block + 4), mm0); | 303 "lea (%3, %3, 2), %1 \n\t" |
287 block += 8; | 304 put_signed_pixels_clamped_mmx_half(0) |
288 paddb_r2r(mm1, mm0); | 305 "lea (%0, %3, 4), %0 \n\t" |
289 movq_r2m(mm0, *pixels); | 306 put_signed_pixels_clamped_mmx_half(64) |
290 pixels += line_size; | 307 :"+r" (pixels), "+r" (line_skip3) |
291 } | 308 :"r" (block), "r"(line_skip) |
309 :"memory"); | |
292 } | 310 } |
293 | 311 |
294 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) | 312 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) |
295 { | 313 { |
296 const DCTELEM *p; | 314 const DCTELEM *p; |