Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 1092:f59c3f66363b libavcodec
MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
bitexact cleanup
author | michaelni |
---|---|
date | Mon, 03 Mar 2003 14:54:00 +0000 |
parents | 92fb44eae6b6 |
children | 74a46d77e061 |
comparison
equal
deleted
inserted
replaced
1091:03df246fb06b | 1092:f59c3f66363b |
---|---|
18 * | 18 * |
19 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | 19 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
20 */ | 20 */ |
21 | 21 |
22 #include "../dsputil.h" | 22 #include "../dsputil.h" |
23 #include "../simple_idct.h" | |
23 | 24 |
24 int mm_flags; /* multimedia extension flags */ | 25 int mm_flags; /* multimedia extension flags */ |
25 | 26 |
26 /* pixel operations */ | 27 /* pixel operations */ |
27 static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL; | 28 static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL; |
1406 | 1407 |
1407 #define SET_QPEL_FUNC(postfix1, postfix2) \ | 1408 #define SET_QPEL_FUNC(postfix1, postfix2) \ |
1408 c->put_ ## postfix1 = put_ ## postfix2;\ | 1409 c->put_ ## postfix1 = put_ ## postfix2;\ |
1409 c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\ | 1410 c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\ |
1410 c->avg_ ## postfix1 = avg_ ## postfix2; | 1411 c->avg_ ## postfix1 = avg_ ## postfix2; |
1412 | |
1413 /* external functions, from idct_mmx.c */ | |
1414 void ff_mmx_idct(DCTELEM *block); | |
1415 void ff_mmxext_idct(DCTELEM *block); | |
1416 | |
1417 /* XXX: those functions should be suppressed ASAP when all IDCTs are | |
1418 converted */ | |
1419 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | |
1420 { | |
1421 ff_mmx_idct (block); | |
1422 put_pixels_clamped_mmx(block, dest, line_size); | |
1423 } | |
1424 static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | |
1425 { | |
1426 ff_mmx_idct (block); | |
1427 add_pixels_clamped_mmx(block, dest, line_size); | |
1428 } | |
1429 static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | |
1430 { | |
1431 ff_mmxext_idct (block); | |
1432 put_pixels_clamped_mmx(block, dest, line_size); | |
1433 } | |
1434 static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | |
1435 { | |
1436 ff_mmxext_idct (block); | |
1437 add_pixels_clamped_mmx(block, dest, line_size); | |
1438 } | |
1411 | 1439 |
1412 void dsputil_init_mmx(DSPContext* c, unsigned mask) | 1440 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) |
1413 { | 1441 { |
1414 mm_flags = mm_support(); | 1442 mm_flags = mm_support(); |
1415 #if 0 | 1443 #if 0 |
1416 fprintf(stderr, "libavcodec: CPU flags:"); | 1444 fprintf(stderr, "libavcodec: CPU flags:"); |
1417 if (mm_flags & MM_MMX) | 1445 if (mm_flags & MM_MMX) |
1426 fprintf(stderr, " sse2"); | 1454 fprintf(stderr, " sse2"); |
1427 fprintf(stderr, "\n"); | 1455 fprintf(stderr, "\n"); |
1428 #endif | 1456 #endif |
1429 | 1457 |
1430 if (mm_flags & MM_MMX) { | 1458 if (mm_flags & MM_MMX) { |
1459 const int dct_algo = avctx->dct_algo; | |
1460 const int idct_algo= avctx->idct_algo; | |
1461 | |
1462 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX) | |
1463 c->fdct = ff_fdct_mmx; | |
1464 | |
1465 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ | |
1466 c->idct_put= ff_simple_idct_put_mmx; | |
1467 c->idct_add= ff_simple_idct_add_mmx; | |
1468 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; | |
1469 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ | |
1470 if(mm_flags & MM_MMXEXT){ | |
1471 c->idct_put= ff_libmpeg2mmx2_idct_put; | |
1472 c->idct_add= ff_libmpeg2mmx2_idct_add; | |
1473 }else{ | |
1474 c->idct_put= ff_libmpeg2mmx_idct_put; | |
1475 c->idct_add= ff_libmpeg2mmx_idct_add; | |
1476 } | |
1477 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | |
1478 } | |
1479 | |
1431 c->get_pixels = get_pixels_mmx; | 1480 c->get_pixels = get_pixels_mmx; |
1432 c->diff_pixels = diff_pixels_mmx; | 1481 c->diff_pixels = diff_pixels_mmx; |
1433 c->put_pixels_clamped = put_pixels_clamped_mmx; | 1482 c->put_pixels_clamped = put_pixels_clamped_mmx; |
1434 c->add_pixels_clamped = add_pixels_clamped_mmx; | 1483 c->add_pixels_clamped = add_pixels_clamped_mmx; |
1435 c->clear_blocks = clear_blocks_mmx; | 1484 c->clear_blocks = clear_blocks_mmx; |
1485 c->sse[0] = sse16_mmx; | 1534 c->sse[0] = sse16_mmx; |
1486 | 1535 |
1487 if (mm_flags & MM_MMXEXT) { | 1536 if (mm_flags & MM_MMXEXT) { |
1488 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; | 1537 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; |
1489 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; | 1538 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; |
1490 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; | |
1491 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; | |
1492 | 1539 |
1493 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; | 1540 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; |
1494 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; | 1541 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; |
1495 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; | 1542 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; |
1496 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; | |
1497 | 1543 |
1498 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; | 1544 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; |
1499 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; | 1545 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; |
1500 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; | |
1501 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; | |
1502 | 1546 |
1503 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; | 1547 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; |
1504 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; | 1548 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; |
1505 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; | 1549 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; |
1506 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; | 1550 |
1551 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | |
1552 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; | |
1553 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; | |
1554 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; | |
1555 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; | |
1556 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; | |
1557 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; | |
1558 } | |
1507 | 1559 |
1508 #if 1 | 1560 #if 1 |
1509 SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_mmx2) | 1561 SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_mmx2) |
1510 SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_mmx2) | 1562 SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_mmx2) |
1511 SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_mmx2) | 1563 SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_mmx2) |
1540 SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2) | 1592 SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2) |
1541 #endif | 1593 #endif |
1542 } else if (mm_flags & MM_3DNOW) { | 1594 } else if (mm_flags & MM_3DNOW) { |
1543 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; | 1595 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; |
1544 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; | 1596 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; |
1545 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; | |
1546 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; | |
1547 | 1597 |
1548 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; | 1598 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; |
1549 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; | 1599 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; |
1550 c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; | 1600 c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; |
1551 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; | |
1552 | 1601 |
1553 c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; | 1602 c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; |
1554 c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow; | 1603 c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow; |
1555 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; | |
1556 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; | |
1557 | 1604 |
1558 c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; | 1605 c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; |
1559 c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; | 1606 c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; |
1560 c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; | 1607 c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; |
1561 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; | 1608 |
1609 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | |
1610 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; | |
1611 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; | |
1612 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; | |
1613 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; | |
1614 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; | |
1615 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; | |
1616 } | |
1562 | 1617 |
1563 SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow) | 1618 SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow) |
1564 SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow) | 1619 SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow) |
1565 SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_3dnow) | 1620 SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_3dnow) |
1566 SET_QPEL_FUNC(qpel_pixels_tab[0][ 3], qpel16_mc30_3dnow) | 1621 SET_QPEL_FUNC(qpel_pixels_tab[0][ 3], qpel16_mc30_3dnow) |
1592 SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_3dnow) | 1647 SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_3dnow) |
1593 SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_3dnow) | 1648 SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_3dnow) |
1594 SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow) | 1649 SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow) |
1595 } | 1650 } |
1596 } | 1651 } |
1597 dsputil_init_pix_mmx(c, mask); | 1652 |
1653 dsputil_init_pix_mmx(c, avctx); | |
1598 #if 0 | 1654 #if 0 |
1599 // for speed testing | 1655 // for speed testing |
1600 get_pixels = just_return; | 1656 get_pixels = just_return; |
1601 put_pixels_clamped = just_return; | 1657 put_pixels_clamped = just_return; |
1602 add_pixels_clamped = just_return; | 1658 add_pixels_clamped = just_return; |
1628 | 1684 |
1629 //av_fdct = just_return; | 1685 //av_fdct = just_return; |
1630 //ff_idct = just_return; | 1686 //ff_idct = just_return; |
1631 #endif | 1687 #endif |
1632 } | 1688 } |
1633 | |
1634 /* remove any non bit exact operation (testing purpose). NOTE that | |
1635 this function should be kept as small as possible because it is | |
1636 always difficult to test automatically non bit exact cases. */ | |
1637 void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask) | |
1638 { | |
1639 if (mm_flags & MM_MMX) { | |
1640 /* MMX2 & 3DNOW */ | |
1641 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; | |
1642 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; | |
1643 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; | |
1644 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; | |
1645 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; | |
1646 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; | |
1647 } | |
1648 dsputil_set_bit_exact_pix_mmx(c, mask); | |
1649 } |