Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 1530:3b31998fe22f libavcodec
disable encoders where appropriate (patch courtesy of BERO
<bero -at- geocities.co.jp>)
author | melanson |
---|---|
date | Tue, 14 Oct 2003 04:15:53 +0000 |
parents | 8ffd0c00e6df |
children | 1a9a63f59849 |
comparison
equal
deleted
inserted
replaced
1529:cb523a2ca00f | 1530:3b31998fe22f |
---|---|
169 #undef PAVGB | 169 #undef PAVGB |
170 | 170 |
171 /***********************************/ | 171 /***********************************/ |
172 /* standard MMX */ | 172 /* standard MMX */ |
173 | 173 |
174 #ifdef CONFIG_ENCODERS | |
174 static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) | 175 static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) |
175 { | 176 { |
176 asm volatile( | 177 asm volatile( |
177 "movl $-128, %%eax \n\t" | 178 "movl $-128, %%eax \n\t" |
178 "pxor %%mm7, %%mm7 \n\t" | 179 "pxor %%mm7, %%mm7 \n\t" |
225 : "+r" (s1), "+r" (s2) | 226 : "+r" (s1), "+r" (s2) |
226 : "r" (block+64), "r" (stride) | 227 : "r" (block+64), "r" (stride) |
227 : "%eax" | 228 : "%eax" |
228 ); | 229 ); |
229 } | 230 } |
231 #endif //CONFIG_ENCODERS | |
230 | 232 |
231 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) | 233 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) |
232 { | 234 { |
233 const DCTELEM *p; | 235 const DCTELEM *p; |
234 uint8_t *pix; | 236 uint8_t *pix; |
399 : : "r" (((int)blocks)+128*6) | 401 : : "r" (((int)blocks)+128*6) |
400 : "%eax" | 402 : "%eax" |
401 ); | 403 ); |
402 } | 404 } |
403 | 405 |
406 #ifdef CONFIG_ENCODERS | |
404 static int pix_sum16_mmx(uint8_t * pix, int line_size){ | 407 static int pix_sum16_mmx(uint8_t * pix, int line_size){ |
405 const int h=16; | 408 const int h=16; |
406 int sum; | 409 int sum; |
407 int index= -line_size*h; | 410 int index= -line_size*h; |
408 | 411 |
436 : "r" (pix - index), "r" (line_size) | 439 : "r" (pix - index), "r" (line_size) |
437 ); | 440 ); |
438 | 441 |
439 return sum; | 442 return sum; |
440 } | 443 } |
444 #endif //CONFIG_ENCODERS | |
441 | 445 |
442 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ | 446 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ |
443 int i=0; | 447 int i=0; |
444 asm volatile( | 448 asm volatile( |
445 "1: \n\t" | 449 "1: \n\t" |
459 ); | 463 ); |
460 for(; i<w; i++) | 464 for(; i<w; i++) |
461 dst[i+0] += src[i+0]; | 465 dst[i+0] += src[i+0]; |
462 } | 466 } |
463 | 467 |
468 #ifdef CONFIG_ENCODERS | |
464 static int pix_norm1_mmx(uint8_t *pix, int line_size) { | 469 static int pix_norm1_mmx(uint8_t *pix, int line_size) { |
465 int tmp; | 470 int tmp; |
466 asm volatile ( | 471 asm volatile ( |
467 "movl $16,%%ecx\n" | 472 "movl $16,%%ecx\n" |
468 "pxor %%mm0,%%mm0\n" | 473 "pxor %%mm0,%%mm0\n" |
854 } | 859 } |
855 | 860 |
856 | 861 |
857 WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx) | 862 WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx) |
858 WARPER88_1616(hadamard8_diff_mmx2, hadamard8_diff16_mmx2) | 863 WARPER88_1616(hadamard8_diff_mmx2, hadamard8_diff16_mmx2) |
864 #endif //CONFIG_ENCODERS | |
859 | 865 |
860 #define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d) | 866 #define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d) |
861 #define put_no_rnd_pixels16_mmx(a,b,c,d) put_pixels16_mmx(a,b,c,d) | 867 #define put_no_rnd_pixels16_mmx(a,b,c,d) put_pixels16_mmx(a,b,c,d) |
862 | 868 |
863 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ | 869 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ |
1617 c->idct = ff_mmx_idct; | 1623 c->idct = ff_mmx_idct; |
1618 } | 1624 } |
1619 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | 1625 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; |
1620 } | 1626 } |
1621 | 1627 |
1628 #ifdef CONFIG_ENCODERS | |
1622 c->get_pixels = get_pixels_mmx; | 1629 c->get_pixels = get_pixels_mmx; |
1623 c->diff_pixels = diff_pixels_mmx; | 1630 c->diff_pixels = diff_pixels_mmx; |
1631 #endif //CONFIG_ENCODERS | |
1624 c->put_pixels_clamped = put_pixels_clamped_mmx; | 1632 c->put_pixels_clamped = put_pixels_clamped_mmx; |
1625 c->add_pixels_clamped = add_pixels_clamped_mmx; | 1633 c->add_pixels_clamped = add_pixels_clamped_mmx; |
1626 c->clear_blocks = clear_blocks_mmx; | 1634 c->clear_blocks = clear_blocks_mmx; |
1635 #ifdef CONFIG_ENCODERS | |
1627 c->pix_sum = pix_sum16_mmx; | 1636 c->pix_sum = pix_sum16_mmx; |
1637 #endif //CONFIG_ENCODERS | |
1628 | 1638 |
1629 c->put_pixels_tab[0][0] = put_pixels16_mmx; | 1639 c->put_pixels_tab[0][0] = put_pixels16_mmx; |
1630 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx; | 1640 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx; |
1631 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx; | 1641 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx; |
1632 c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx; | 1642 c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx; |
1665 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; | 1675 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; |
1666 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; | 1676 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; |
1667 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; | 1677 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; |
1668 | 1678 |
1669 c->add_bytes= add_bytes_mmx; | 1679 c->add_bytes= add_bytes_mmx; |
1680 #ifdef CONFIG_ENCODERS | |
1670 c->diff_bytes= diff_bytes_mmx; | 1681 c->diff_bytes= diff_bytes_mmx; |
1671 | 1682 |
1672 c->hadamard8_diff[0]= hadamard8_diff16_mmx; | 1683 c->hadamard8_diff[0]= hadamard8_diff16_mmx; |
1673 c->hadamard8_diff[1]= hadamard8_diff_mmx; | 1684 c->hadamard8_diff[1]= hadamard8_diff_mmx; |
1674 | 1685 |
1675 c->pix_norm1 = pix_norm1_mmx; | 1686 c->pix_norm1 = pix_norm1_mmx; |
1676 c->sse[0] = sse16_mmx; | 1687 c->sse[0] = sse16_mmx; |
1688 #endif //CONFIG_ENCODERS | |
1677 | 1689 |
1678 if (mm_flags & MM_MMXEXT) { | 1690 if (mm_flags & MM_MMXEXT) { |
1679 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; | 1691 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; |
1680 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; | 1692 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; |
1681 | 1693 |
1688 | 1700 |
1689 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; | 1701 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; |
1690 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; | 1702 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; |
1691 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; | 1703 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; |
1692 | 1704 |
1705 #ifdef CONFIG_ENCODERS | |
1693 c->hadamard8_diff[0]= hadamard8_diff16_mmx2; | 1706 c->hadamard8_diff[0]= hadamard8_diff16_mmx2; |
1694 c->hadamard8_diff[1]= hadamard8_diff_mmx2; | 1707 c->hadamard8_diff[1]= hadamard8_diff_mmx2; |
1708 #endif //CONFIG_ENCODERS | |
1695 | 1709 |
1696 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 1710 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
1697 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; | 1711 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; |
1698 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; | 1712 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; |
1699 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; | 1713 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; |
1795 SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_3dnow) | 1809 SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_3dnow) |
1796 SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow) | 1810 SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow) |
1797 } | 1811 } |
1798 } | 1812 } |
1799 | 1813 |
1814 #ifdef CONFIG_ENCODERS | |
1800 dsputil_init_pix_mmx(c, avctx); | 1815 dsputil_init_pix_mmx(c, avctx); |
1816 #endif //CONFIG_ENCODERS | |
1801 #if 0 | 1817 #if 0 |
1802 // for speed testing | 1818 // for speed testing |
1803 get_pixels = just_return; | 1819 get_pixels = just_return; |
1804 put_pixels_clamped = just_return; | 1820 put_pixels_clamped = just_return; |
1805 add_pixels_clamped = just_return; | 1821 add_pixels_clamped = just_return; |