comparison i386/dsputil_mmx.c @ 1527:8ffd0c00e6df libavcodec

mmx2 optimization of huffyuv median encoding
author michael
date Mon, 13 Oct 2003 17:27:30 +0000
parents 7d328fd9d8a5
children 3b31998fe22f
comparison
equal deleted inserted replaced
1526:fcfa169fdbf8 1527:8ffd0c00e6df
581 : "r"(src1), "r"(src2), "r"(dst), "r"(w-15) 581 : "r"(src1), "r"(src2), "r"(dst), "r"(w-15)
582 ); 582 );
583 for(; i<w; i++) 583 for(; i<w; i++)
584 dst[i+0] = src1[i+0]-src2[i+0]; 584 dst[i+0] = src1[i+0]-src2[i+0];
585 } 585 }
586
587 static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
588 int i=0;
589 uint8_t l, lt;
590
591 asm volatile(
592 "1: \n\t"
593 "movq -1(%1, %0), %%mm0 \n\t" // LT
594 "movq (%1, %0), %%mm1 \n\t" // T
595 "movq -1(%2, %0), %%mm2 \n\t" // L
596 "movq (%2, %0), %%mm3 \n\t" // X
597 "movq %%mm2, %%mm4 \n\t" // L
598 "psubb %%mm0, %%mm2 \n\t"
599 "paddb %%mm1, %%mm2 \n\t" // L + T - LT
600 "movq %%mm4, %%mm5 \n\t" // L
601 "pmaxub %%mm1, %%mm4 \n\t" // max(T, L)
602 "pminub %%mm5, %%mm1 \n\t" // min(T, L)
603 "pminub %%mm2, %%mm4 \n\t"
604 "pmaxub %%mm1, %%mm4 \n\t"
605 "psubb %%mm4, %%mm3 \n\t" // dst - pred
606 "movq %%mm3, (%3, %0) \n\t"
607 "addl $8, %0 \n\t"
608 "cmpl %4, %0 \n\t"
609 " jb 1b \n\t"
610 : "+r" (i)
611 : "r"(src1), "r"(src2), "r"(dst), "r"(w)
612 );
613
614 l= *left;
615 lt= *left_top;
616
617 dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF);
618
619 *left_top= src1[w-1];
620 *left = src2[w-1];
621 }
622
586 #define LBUTTERFLY2(a1,b1,a2,b2)\ 623 #define LBUTTERFLY2(a1,b1,a2,b2)\
587 "paddw " #b1 ", " #a1 " \n\t"\ 624 "paddw " #b1 ", " #a1 " \n\t"\
588 "paddw " #b2 ", " #a2 " \n\t"\ 625 "paddw " #b2 ", " #a2 " \n\t"\
589 "paddw " #b1 ", " #b1 " \n\t"\ 626 "paddw " #b1 ", " #b1 " \n\t"\
590 "paddw " #b2 ", " #b2 " \n\t"\ 627 "paddw " #b2 ", " #b2 " \n\t"\
1697 SET_QPEL_FUNC(qpel_pixels_tab[1][12], qpel8_mc03_mmx2) 1734 SET_QPEL_FUNC(qpel_pixels_tab[1][12], qpel8_mc03_mmx2)
1698 SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_mmx2) 1735 SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_mmx2)
1699 SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_mmx2) 1736 SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_mmx2)
1700 SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2) 1737 SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2)
1701 #endif 1738 #endif
1739
1740 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2;
1702 } else if (mm_flags & MM_3DNOW) { 1741 } else if (mm_flags & MM_3DNOW) {
1703 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; 1742 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
1704 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; 1743 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
1705 1744
1706 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; 1745 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;