comparison dsputil.c @ 706:e65798d228ea libavcodec

idct permutation cleanup, idct can be selected per context now fixing some threadunsafe code
author michaelni
date Sun, 29 Sep 2002 22:44:22 +0000
parents efcbfbd18864
children cbe316f082bc
comparison
equal deleted inserted replaced
705:107a56aa74f5 706:e65798d228ea
18 * 18 *
19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
20 */ 20 */
21 #include "avcodec.h" 21 #include "avcodec.h"
22 #include "dsputil.h" 22 #include "dsputil.h"
23 #include "simple_idct.h" 23
24
25 void (*ff_idct)(DCTELEM *block);
26 void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block);
27 void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block);
28 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); 24 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
29 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); 25 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
30 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); 26 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
31 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); 27 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
32 void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); 28 void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
47 int ff_bit_exact=0; 43 int ff_bit_exact=0;
48 44
49 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; 45 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
50 UINT32 squareTbl[512]; 46 UINT32 squareTbl[512];
51 47
52 extern INT16 ff_mpeg1_default_intra_matrix[64]; 48 const UINT8 ff_zigzag_direct[64] = {
53 extern INT16 ff_mpeg1_default_non_intra_matrix[64]; 49 0, 1, 8, 16, 9, 2, 3, 10,
54 extern INT16 ff_mpeg4_default_intra_matrix[64]; 50 17, 24, 32, 25, 18, 11, 4, 5,
55 extern INT16 ff_mpeg4_default_non_intra_matrix[64];
56
57 UINT8 zigzag_direct[64] = {
58 0, 1, 8, 16, 9, 2, 3, 10,
59 17, 24, 32, 25, 18, 11, 4, 5,
60 12, 19, 26, 33, 40, 48, 41, 34, 51 12, 19, 26, 33, 40, 48, 41, 34,
61 27, 20, 13, 6, 7, 14, 21, 28, 52 27, 20, 13, 6, 7, 14, 21, 28,
62 35, 42, 49, 56, 57, 50, 43, 36, 53 35, 42, 49, 56, 57, 50, 43, 36,
63 29, 22, 15, 23, 30, 37, 44, 51, 54 29, 22, 15, 23, 30, 37, 44, 51,
64 58, 59, 52, 45, 38, 31, 39, 46, 55 58, 59, 52, 45, 38, 31, 39, 46,
65 53, 60, 61, 54, 47, 55, 62, 63 56 53, 60, 61, 54, 47, 55, 62, 63
66 }; 57 };
67 58
68 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ 59 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
69 UINT16 __align8 inv_zigzag_direct16[64]; 60 UINT16 __align8 inv_zigzag_direct16[64];
70 61
71 /* not permutated zigzag_direct for MMX quantizer */ 62 const UINT8 ff_alternate_horizontal_scan[64] = {
72 UINT8 zigzag_direct_noperm[64]; 63 0, 1, 2, 3, 8, 9, 16, 17,
73
74 UINT8 ff_alternate_horizontal_scan[64] = {
75 0, 1, 2, 3, 8, 9, 16, 17,
76 10, 11, 4, 5, 6, 7, 15, 14, 64 10, 11, 4, 5, 6, 7, 15, 14,
77 13, 12, 19, 18, 24, 25, 32, 33, 65 13, 12, 19, 18, 24, 25, 32, 33,
78 26, 27, 20, 21, 22, 23, 28, 29, 66 26, 27, 20, 21, 22, 23, 28, 29,
79 30, 31, 34, 35, 40, 41, 48, 49, 67 30, 31, 34, 35, 40, 41, 48, 49,
80 42, 43, 36, 37, 38, 39, 44, 45, 68 42, 43, 36, 37, 38, 39, 44, 45,
81 46, 47, 50, 51, 56, 57, 58, 59, 69 46, 47, 50, 51, 56, 57, 58, 59,
82 52, 53, 54, 55, 60, 61, 62, 63, 70 52, 53, 54, 55, 60, 61, 62, 63,
83 }; 71 };
84 72
85 UINT8 ff_alternate_vertical_scan[64] = { 73 const UINT8 ff_alternate_vertical_scan[64] = {
86 0, 8, 16, 24, 1, 9, 2, 10, 74 0, 8, 16, 24, 1, 9, 2, 10,
87 17, 25, 32, 40, 48, 56, 57, 49, 75 17, 25, 32, 40, 48, 56, 57, 49,
88 41, 33, 26, 18, 3, 11, 4, 12, 76 41, 33, 26, 18, 3, 11, 4, 12,
89 19, 27, 34, 42, 50, 58, 35, 43, 77 19, 27, 34, 42, 50, 58, 35, 43,
90 51, 59, 20, 28, 5, 13, 6, 14, 78 51, 59, 20, 28, 5, 13, 6, 14,
91 21, 29, 36, 44, 52, 60, 37, 45, 79 21, 29, 36, 44, 52, 60, 37, 45,
92 53, 61, 22, 30, 7, 15, 23, 31, 80 53, 61, 22, 30, 7, 15, 23, 31,
93 38, 46, 54, 62, 39, 47, 55, 63, 81 38, 46, 54, 62, 39, 47, 55, 63,
94 }; 82 };
95
96 #ifdef SIMPLE_IDCT
97
98 /* Input permutation for the simple_idct_mmx */
99 static UINT8 simple_mmx_permutation[64]={
100 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
101 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
102 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
103 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
104 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
105 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
106 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
107 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
108 };
109 #endif
110 83
111 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ 84 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
112 UINT32 inverse[256]={ 85 UINT32 inverse[256]={
113 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, 86 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
114 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, 87 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
141 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, 114 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
142 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, 115 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
143 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, 116 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
144 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, 117 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
145 }; 118 };
146
147 /* used to skip zeros at the end */
148 UINT8 zigzag_end[64];
149
150 UINT8 permutation[64];
151 //UINT8 invPermutation[64];
152
153 static void build_zigzag_end(void)
154 {
155 int lastIndex;
156 int lastIndexAfterPerm=0;
157 for(lastIndex=0; lastIndex<64; lastIndex++)
158 {
159 if(zigzag_direct[lastIndex] > lastIndexAfterPerm)
160 lastIndexAfterPerm= zigzag_direct[lastIndex];
161 zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
162 }
163 }
164 119
165 int pix_sum_c(UINT8 * pix, int line_size) 120 int pix_sum_c(UINT8 * pix, int line_size)
166 { 121 {
167 int s, i, j; 122 int s, i, j;
168 123
1538 return s; 1493 return s;
1539 } 1494 }
1540 1495
1541 /* permute block according so that it corresponds to the MMX idct 1496 /* permute block according so that it corresponds to the MMX idct
1542 order */ 1497 order */
1543 #ifdef SIMPLE_IDCT 1498 void block_permute(INT16 *block, UINT8 *permutation)
1544 /* general permutation, but perhaps slightly slower */
1545 void block_permute(INT16 *block)
1546 { 1499 {
1547 int i; 1500 int i;
1548 INT16 temp[64]; 1501 INT16 temp[64];
1549 1502
1550 for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i]; 1503 for(i=0; i<64; i++) temp[ permutation[i] ] = block[i];
1551 1504
1552 for(i=0; i<64; i++) block[i] = temp[i]; 1505 for(i=0; i<64; i++) block[i] = temp[i];
1553 } 1506 }
1554 #else
1555
1556 void block_permute(INT16 *block)
1557 {
1558 int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1559 int i;
1560
1561 for(i=0;i<8;i++) {
1562 tmp1 = block[1];
1563 tmp2 = block[2];
1564 tmp3 = block[3];
1565 tmp4 = block[4];
1566 tmp5 = block[5];
1567 tmp6 = block[6];
1568 block[1] = tmp2;
1569 block[2] = tmp4;
1570 block[3] = tmp6;
1571 block[4] = tmp1;
1572 block[5] = tmp3;
1573 block[6] = tmp5;
1574 block += 8;
1575 }
1576 }
1577 #endif
1578 1507
1579 void clear_blocks_c(DCTELEM *blocks) 1508 void clear_blocks_c(DCTELEM *blocks)
1580 { 1509 {
1581 memset(blocks, 0, sizeof(DCTELEM)*6*64); 1510 memset(blocks, 0, sizeof(DCTELEM)*6*64);
1582 } 1511 }
1583 1512
1584 /* XXX: those functions should be suppressed ASAP when all IDCTs are
1585 converted */
1586 void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
1587 {
1588 ff_idct (block);
1589 put_pixels_clamped(block, dest, line_size);
1590 }
1591
1592 void gen_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
1593 {
1594 ff_idct (block);
1595 add_pixels_clamped(block, dest, line_size);
1596 }
1597
1598 void dsputil_init(void) 1513 void dsputil_init(void)
1599 { 1514 {
1600 int i, j; 1515 int i, j;
1601 int use_permuted_idct;
1602 1516
1603 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; 1517 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
1604 for(i=0;i<MAX_NEG_CROP;i++) { 1518 for(i=0;i<MAX_NEG_CROP;i++) {
1605 cropTbl[i] = 0; 1519 cropTbl[i] = 0;
1606 cropTbl[i + MAX_NEG_CROP + 256] = 255; 1520 cropTbl[i + MAX_NEG_CROP + 256] = 255;
1608 1522
1609 for(i=0;i<512;i++) { 1523 for(i=0;i<512;i++) {
1610 squareTbl[i] = (i - 256) * (i - 256); 1524 squareTbl[i] = (i - 256) * (i - 256);
1611 } 1525 }
1612 1526
1613 #ifdef SIMPLE_IDCT
1614 ff_idct = NULL;
1615 #else
1616 ff_idct = j_rev_dct;
1617 #endif
1618 get_pixels = get_pixels_c; 1527 get_pixels = get_pixels_c;
1619 diff_pixels = diff_pixels_c; 1528 diff_pixels = diff_pixels_c;
1620 put_pixels_clamped = put_pixels_clamped_c; 1529 put_pixels_clamped = put_pixels_clamped_c;
1621 add_pixels_clamped = add_pixels_clamped_c; 1530 add_pixels_clamped = add_pixels_clamped_c;
1622 gmc1= gmc1_c; 1531 gmc1= gmc1_c;
1631 pix_abs8x8 = pix_abs8x8_c; 1540 pix_abs8x8 = pix_abs8x8_c;
1632 pix_abs8x8_x2 = pix_abs8x8_x2_c; 1541 pix_abs8x8_x2 = pix_abs8x8_x2_c;
1633 pix_abs8x8_y2 = pix_abs8x8_y2_c; 1542 pix_abs8x8_y2 = pix_abs8x8_y2_c;
1634 pix_abs8x8_xy2 = pix_abs8x8_xy2_c; 1543 pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
1635 1544
1636 use_permuted_idct = 1;
1637
1638 #ifdef HAVE_MMX 1545 #ifdef HAVE_MMX
1639 dsputil_init_mmx(); 1546 dsputil_init_mmx();
1640 #endif 1547 #endif
1641 #ifdef ARCH_ARMV4L 1548 #ifdef ARCH_ARMV4L
1642 dsputil_init_armv4l(); 1549 dsputil_init_armv4l();
1643 #endif 1550 #endif
1644 #ifdef HAVE_MLIB 1551 #ifdef HAVE_MLIB
1645 dsputil_init_mlib(); 1552 dsputil_init_mlib();
1646 use_permuted_idct = 0;
1647 #endif 1553 #endif
1648 #ifdef ARCH_ALPHA 1554 #ifdef ARCH_ALPHA
1649 dsputil_init_alpha(); 1555 dsputil_init_alpha();
1650 use_permuted_idct = 0;
1651 #endif 1556 #endif
1652 #ifdef ARCH_POWERPC 1557 #ifdef ARCH_POWERPC
1653 dsputil_init_ppc(); 1558 dsputil_init_ppc();
1654 #endif 1559 #endif
1655 #ifdef HAVE_MMI 1560 #ifdef HAVE_MMI
1656 dsputil_init_mmi(); 1561 dsputil_init_mmi();
1657 use_permuted_idct = 0;
1658 #endif 1562 #endif
1659 1563
1660 #ifdef SIMPLE_IDCT 1564 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
1661 if (ff_idct == NULL) {
1662 ff_idct_put = simple_idct_put;
1663 ff_idct_add = simple_idct_add;
1664 use_permuted_idct=0;
1665 }
1666 #endif
1667 if(ff_idct != NULL) {
1668 ff_idct_put = gen_idct_put;
1669 ff_idct_add = gen_idct_add;
1670 }
1671
1672 if(use_permuted_idct)
1673 #ifdef SIMPLE_IDCT
1674 for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
1675 #else
1676 for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
1677 #endif
1678 else
1679 for(i=0; i<64; i++) permutation[i]=i;
1680
1681 for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
1682 for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
1683
1684 if (use_permuted_idct) {
1685 /* permute for IDCT */
1686 for(i=0;i<64;i++) {
1687 j = zigzag_direct[i];
1688 zigzag_direct[i] = block_permute_op(j);
1689 j = ff_alternate_horizontal_scan[i];
1690 ff_alternate_horizontal_scan[i] = block_permute_op(j);
1691 j = ff_alternate_vertical_scan[i];
1692 ff_alternate_vertical_scan[i] = block_permute_op(j);
1693 }
1694 block_permute(ff_mpeg1_default_intra_matrix);
1695 block_permute(ff_mpeg1_default_non_intra_matrix);
1696 block_permute(ff_mpeg4_default_intra_matrix);
1697 block_permute(ff_mpeg4_default_non_intra_matrix);
1698 }
1699
1700 build_zigzag_end();
1701 } 1565 }
1702 1566
1703 /* remove any non bit exact operation (testing purpose) */ 1567 /* remove any non bit exact operation (testing purpose) */
1704 void avcodec_set_bit_exact(void) 1568 void avcodec_set_bit_exact(void)
1705 { 1569 {