Mercurial > libavcodec.hg
comparison i386/mpegvideo_mmx.c @ 6437:5154ab444372 libavcodec
move draw_edges() into dsputil
author | aurel |
---|---|
date | Tue, 04 Mar 2008 00:07:41 +0000 |
parents | 80103098c797 |
children | 33896780c612 |
comparison
equal
deleted
inserted
replaced
6436:6491d3284f89 | 6437:5154ab444372 |
---|---|
471 "movd %%mm0, 124(%0, %3) \n\t" | 471 "movd %%mm0, 124(%0, %3) \n\t" |
472 | 472 |
473 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) | 473 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) |
474 : "%"REG_a, "memory" | 474 : "%"REG_a, "memory" |
475 ); | 475 ); |
476 } | |
477 | |
478 /* draw the edges of width 'w' of an image of size width, height | |
479 this mmx version can only handle w==8 || w==16 */ | |
480 static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) | |
481 { | |
482 uint8_t *ptr, *last_line; | |
483 int i; | |
484 | |
485 last_line = buf + (height - 1) * wrap; | |
486 /* left and right */ | |
487 ptr = buf; | |
488 if(w==8) | |
489 { | |
490 asm volatile( | |
491 "1: \n\t" | |
492 "movd (%0), %%mm0 \n\t" | |
493 "punpcklbw %%mm0, %%mm0 \n\t" | |
494 "punpcklwd %%mm0, %%mm0 \n\t" | |
495 "punpckldq %%mm0, %%mm0 \n\t" | |
496 "movq %%mm0, -8(%0) \n\t" | |
497 "movq -8(%0, %2), %%mm1 \n\t" | |
498 "punpckhbw %%mm1, %%mm1 \n\t" | |
499 "punpckhwd %%mm1, %%mm1 \n\t" | |
500 "punpckhdq %%mm1, %%mm1 \n\t" | |
501 "movq %%mm1, (%0, %2) \n\t" | |
502 "add %1, %0 \n\t" | |
503 "cmp %3, %0 \n\t" | |
504 " jb 1b \n\t" | |
505 : "+r" (ptr) | |
506 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) | |
507 ); | |
508 } | |
509 else | |
510 { | |
511 asm volatile( | |
512 "1: \n\t" | |
513 "movd (%0), %%mm0 \n\t" | |
514 "punpcklbw %%mm0, %%mm0 \n\t" | |
515 "punpcklwd %%mm0, %%mm0 \n\t" | |
516 "punpckldq %%mm0, %%mm0 \n\t" | |
517 "movq %%mm0, -8(%0) \n\t" | |
518 "movq %%mm0, -16(%0) \n\t" | |
519 "movq -8(%0, %2), %%mm1 \n\t" | |
520 "punpckhbw %%mm1, %%mm1 \n\t" | |
521 "punpckhwd %%mm1, %%mm1 \n\t" | |
522 "punpckhdq %%mm1, %%mm1 \n\t" | |
523 "movq %%mm1, (%0, %2) \n\t" | |
524 "movq %%mm1, 8(%0, %2) \n\t" | |
525 "add %1, %0 \n\t" | |
526 "cmp %3, %0 \n\t" | |
527 " jb 1b \n\t" | |
528 : "+r" (ptr) | |
529 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) | |
530 ); | |
531 } | |
532 | |
533 for(i=0;i<w;i+=4) { | |
534 /* top and bottom (and hopefully also the corners) */ | |
535 ptr= buf - (i + 1) * wrap - w; | |
536 asm volatile( | |
537 "1: \n\t" | |
538 "movq (%1, %0), %%mm0 \n\t" | |
539 "movq %%mm0, (%0) \n\t" | |
540 "movq %%mm0, (%0, %2) \n\t" | |
541 "movq %%mm0, (%0, %2, 2) \n\t" | |
542 "movq %%mm0, (%0, %3) \n\t" | |
543 "add $8, %0 \n\t" | |
544 "cmp %4, %0 \n\t" | |
545 " jb 1b \n\t" | |
546 : "+r" (ptr) | |
547 : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w) | |
548 ); | |
549 ptr= last_line + (i + 1) * wrap - w; | |
550 asm volatile( | |
551 "1: \n\t" | |
552 "movq (%1, %0), %%mm0 \n\t" | |
553 "movq %%mm0, (%0) \n\t" | |
554 "movq %%mm0, (%0, %2) \n\t" | |
555 "movq %%mm0, (%0, %2, 2) \n\t" | |
556 "movq %%mm0, (%0, %3) \n\t" | |
557 "add $8, %0 \n\t" | |
558 "cmp %4, %0 \n\t" | |
559 " jb 1b \n\t" | |
560 : "+r" (ptr) | |
561 : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w) | |
562 ); | |
563 } | |
564 } | 476 } |
565 | 477 |
566 static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){ | 478 static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){ |
567 const int intra= s->mb_intra; | 479 const int intra= s->mb_intra; |
568 int *sum= s->dct_error_sum[intra]; | 480 int *sum= s->dct_error_sum[intra]; |
716 s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx; | 628 s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx; |
717 if(!(s->flags & CODEC_FLAG_BITEXACT)) | 629 if(!(s->flags & CODEC_FLAG_BITEXACT)) |
718 s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx; | 630 s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx; |
719 s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx; | 631 s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx; |
720 | 632 |
721 draw_edges = draw_edges_mmx; | |
722 | |
723 if (mm_flags & MM_SSE2) { | 633 if (mm_flags & MM_SSE2) { |
724 s->denoise_dct= denoise_dct_sse2; | 634 s->denoise_dct= denoise_dct_sse2; |
725 } else { | 635 } else { |
726 s->denoise_dct= denoise_dct_mmx; | 636 s->denoise_dct= denoise_dct_mmx; |
727 } | 637 } |