comparison i386/dsputil_mmx.c @ 1530:3b31998fe22f libavcodec

disable encoders where appropriate (patch courtesy of BERO <bero -at- geocities.co.jp>)
author melanson
date Tue, 14 Oct 2003 04:15:53 +0000
parents 8ffd0c00e6df
children 1a9a63f59849
comparison
equal deleted inserted replaced
1529:cb523a2ca00f 1530:3b31998fe22f
169 #undef PAVGB 169 #undef PAVGB
170 170
171 /***********************************/ 171 /***********************************/
172 /* standard MMX */ 172 /* standard MMX */
173 173
174 #ifdef CONFIG_ENCODERS
174 static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) 175 static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size)
175 { 176 {
176 asm volatile( 177 asm volatile(
177 "movl $-128, %%eax \n\t" 178 "movl $-128, %%eax \n\t"
178 "pxor %%mm7, %%mm7 \n\t" 179 "pxor %%mm7, %%mm7 \n\t"
225 : "+r" (s1), "+r" (s2) 226 : "+r" (s1), "+r" (s2)
226 : "r" (block+64), "r" (stride) 227 : "r" (block+64), "r" (stride)
227 : "%eax" 228 : "%eax"
228 ); 229 );
229 } 230 }
231 #endif //CONFIG_ENCODERS
230 232
231 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) 233 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
232 { 234 {
233 const DCTELEM *p; 235 const DCTELEM *p;
234 uint8_t *pix; 236 uint8_t *pix;
399 : : "r" (((int)blocks)+128*6) 401 : : "r" (((int)blocks)+128*6)
400 : "%eax" 402 : "%eax"
401 ); 403 );
402 } 404 }
403 405
406 #ifdef CONFIG_ENCODERS
404 static int pix_sum16_mmx(uint8_t * pix, int line_size){ 407 static int pix_sum16_mmx(uint8_t * pix, int line_size){
405 const int h=16; 408 const int h=16;
406 int sum; 409 int sum;
407 int index= -line_size*h; 410 int index= -line_size*h;
408 411
436 : "r" (pix - index), "r" (line_size) 439 : "r" (pix - index), "r" (line_size)
437 ); 440 );
438 441
439 return sum; 442 return sum;
440 } 443 }
444 #endif //CONFIG_ENCODERS
441 445
442 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ 446 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
443 int i=0; 447 int i=0;
444 asm volatile( 448 asm volatile(
445 "1: \n\t" 449 "1: \n\t"
459 ); 463 );
460 for(; i<w; i++) 464 for(; i<w; i++)
461 dst[i+0] += src[i+0]; 465 dst[i+0] += src[i+0];
462 } 466 }
463 467
468 #ifdef CONFIG_ENCODERS
464 static int pix_norm1_mmx(uint8_t *pix, int line_size) { 469 static int pix_norm1_mmx(uint8_t *pix, int line_size) {
465 int tmp; 470 int tmp;
466 asm volatile ( 471 asm volatile (
467 "movl $16,%%ecx\n" 472 "movl $16,%%ecx\n"
468 "pxor %%mm0,%%mm0\n" 473 "pxor %%mm0,%%mm0\n"
854 } 859 }
855 860
856 861
857 WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx) 862 WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx)
858 WARPER88_1616(hadamard8_diff_mmx2, hadamard8_diff16_mmx2) 863 WARPER88_1616(hadamard8_diff_mmx2, hadamard8_diff16_mmx2)
864 #endif //CONFIG_ENCODERS
859 865
860 #define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d) 866 #define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d)
861 #define put_no_rnd_pixels16_mmx(a,b,c,d) put_pixels16_mmx(a,b,c,d) 867 #define put_no_rnd_pixels16_mmx(a,b,c,d) put_pixels16_mmx(a,b,c,d)
862 868
863 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ 869 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
1617 c->idct = ff_mmx_idct; 1623 c->idct = ff_mmx_idct;
1618 } 1624 }
1619 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; 1625 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
1620 } 1626 }
1621 1627
1628 #ifdef CONFIG_ENCODERS
1622 c->get_pixels = get_pixels_mmx; 1629 c->get_pixels = get_pixels_mmx;
1623 c->diff_pixels = diff_pixels_mmx; 1630 c->diff_pixels = diff_pixels_mmx;
1631 #endif //CONFIG_ENCODERS
1624 c->put_pixels_clamped = put_pixels_clamped_mmx; 1632 c->put_pixels_clamped = put_pixels_clamped_mmx;
1625 c->add_pixels_clamped = add_pixels_clamped_mmx; 1633 c->add_pixels_clamped = add_pixels_clamped_mmx;
1626 c->clear_blocks = clear_blocks_mmx; 1634 c->clear_blocks = clear_blocks_mmx;
1635 #ifdef CONFIG_ENCODERS
1627 c->pix_sum = pix_sum16_mmx; 1636 c->pix_sum = pix_sum16_mmx;
1637 #endif //CONFIG_ENCODERS
1628 1638
1629 c->put_pixels_tab[0][0] = put_pixels16_mmx; 1639 c->put_pixels_tab[0][0] = put_pixels16_mmx;
1630 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx; 1640 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx;
1631 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx; 1641 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx;
1632 c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx; 1642 c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx;
1665 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; 1675 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
1666 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; 1676 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
1667 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; 1677 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
1668 1678
1669 c->add_bytes= add_bytes_mmx; 1679 c->add_bytes= add_bytes_mmx;
1680 #ifdef CONFIG_ENCODERS
1670 c->diff_bytes= diff_bytes_mmx; 1681 c->diff_bytes= diff_bytes_mmx;
1671 1682
1672 c->hadamard8_diff[0]= hadamard8_diff16_mmx; 1683 c->hadamard8_diff[0]= hadamard8_diff16_mmx;
1673 c->hadamard8_diff[1]= hadamard8_diff_mmx; 1684 c->hadamard8_diff[1]= hadamard8_diff_mmx;
1674 1685
1675 c->pix_norm1 = pix_norm1_mmx; 1686 c->pix_norm1 = pix_norm1_mmx;
1676 c->sse[0] = sse16_mmx; 1687 c->sse[0] = sse16_mmx;
1688 #endif //CONFIG_ENCODERS
1677 1689
1678 if (mm_flags & MM_MMXEXT) { 1690 if (mm_flags & MM_MMXEXT) {
1679 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; 1691 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
1680 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; 1692 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
1681 1693
1688 1700
1689 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; 1701 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
1690 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; 1702 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
1691 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; 1703 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
1692 1704
1705 #ifdef CONFIG_ENCODERS
1693 c->hadamard8_diff[0]= hadamard8_diff16_mmx2; 1706 c->hadamard8_diff[0]= hadamard8_diff16_mmx2;
1694 c->hadamard8_diff[1]= hadamard8_diff_mmx2; 1707 c->hadamard8_diff[1]= hadamard8_diff_mmx2;
1708 #endif //CONFIG_ENCODERS
1695 1709
1696 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ 1710 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
1697 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; 1711 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
1698 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; 1712 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
1699 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; 1713 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
1795 SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_3dnow) 1809 SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_3dnow)
1796 SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow) 1810 SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow)
1797 } 1811 }
1798 } 1812 }
1799 1813
1814 #ifdef CONFIG_ENCODERS
1800 dsputil_init_pix_mmx(c, avctx); 1815 dsputil_init_pix_mmx(c, avctx);
1816 #endif //CONFIG_ENCODERS
1801 #if 0 1817 #if 0
1802 // for speed testing 1818 // for speed testing
1803 get_pixels = just_return; 1819 get_pixels = just_return;
1804 put_pixels_clamped = just_return; 1820 put_pixels_clamped = just_return;
1805 add_pixels_clamped = just_return; 1821 add_pixels_clamped = just_return;