Mercurial > libavcodec.hg
comparison dsputil.c @ 706:e65798d228ea libavcodec
idct permutation cleanup, idct can be selected per context now
fixing some threadunsafe code
author | michaelni |
---|---|
date | Sun, 29 Sep 2002 22:44:22 +0000 |
parents | efcbfbd18864 |
children | cbe316f082bc |
comparison
equal
deleted
inserted
replaced
705:107a56aa74f5 | 706:e65798d228ea |
---|---|
18 * | 18 * |
19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> | 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
20 */ | 20 */ |
21 #include "avcodec.h" | 21 #include "avcodec.h" |
22 #include "dsputil.h" | 22 #include "dsputil.h" |
23 #include "simple_idct.h" | 23 |
24 | |
25 void (*ff_idct)(DCTELEM *block); | |
26 void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block); | |
27 void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block); | |
28 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); | 24 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); |
29 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); | 25 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); |
30 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | 26 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); |
31 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | 27 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); |
32 void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); | 28 void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); |
47 int ff_bit_exact=0; | 43 int ff_bit_exact=0; |
48 | 44 |
49 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; | 45 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; |
50 UINT32 squareTbl[512]; | 46 UINT32 squareTbl[512]; |
51 | 47 |
52 extern INT16 ff_mpeg1_default_intra_matrix[64]; | 48 const UINT8 ff_zigzag_direct[64] = { |
53 extern INT16 ff_mpeg1_default_non_intra_matrix[64]; | 49 0, 1, 8, 16, 9, 2, 3, 10, |
54 extern INT16 ff_mpeg4_default_intra_matrix[64]; | 50 17, 24, 32, 25, 18, 11, 4, 5, |
55 extern INT16 ff_mpeg4_default_non_intra_matrix[64]; | |
56 | |
57 UINT8 zigzag_direct[64] = { | |
58 0, 1, 8, 16, 9, 2, 3, 10, | |
59 17, 24, 32, 25, 18, 11, 4, 5, | |
60 12, 19, 26, 33, 40, 48, 41, 34, | 51 12, 19, 26, 33, 40, 48, 41, 34, |
61 27, 20, 13, 6, 7, 14, 21, 28, | 52 27, 20, 13, 6, 7, 14, 21, 28, |
62 35, 42, 49, 56, 57, 50, 43, 36, | 53 35, 42, 49, 56, 57, 50, 43, 36, |
63 29, 22, 15, 23, 30, 37, 44, 51, | 54 29, 22, 15, 23, 30, 37, 44, 51, |
64 58, 59, 52, 45, 38, 31, 39, 46, | 55 58, 59, 52, 45, 38, 31, 39, 46, |
65 53, 60, 61, 54, 47, 55, 62, 63 | 56 53, 60, 61, 54, 47, 55, 62, 63 |
66 }; | 57 }; |
67 | 58 |
68 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ | 59 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ |
69 UINT16 __align8 inv_zigzag_direct16[64]; | 60 UINT16 __align8 inv_zigzag_direct16[64]; |
70 | 61 |
71 /* not permutated zigzag_direct for MMX quantizer */ | 62 const UINT8 ff_alternate_horizontal_scan[64] = { |
72 UINT8 zigzag_direct_noperm[64]; | 63 0, 1, 2, 3, 8, 9, 16, 17, |
73 | |
74 UINT8 ff_alternate_horizontal_scan[64] = { | |
75 0, 1, 2, 3, 8, 9, 16, 17, | |
76 10, 11, 4, 5, 6, 7, 15, 14, | 64 10, 11, 4, 5, 6, 7, 15, 14, |
77 13, 12, 19, 18, 24, 25, 32, 33, | 65 13, 12, 19, 18, 24, 25, 32, 33, |
78 26, 27, 20, 21, 22, 23, 28, 29, | 66 26, 27, 20, 21, 22, 23, 28, 29, |
79 30, 31, 34, 35, 40, 41, 48, 49, | 67 30, 31, 34, 35, 40, 41, 48, 49, |
80 42, 43, 36, 37, 38, 39, 44, 45, | 68 42, 43, 36, 37, 38, 39, 44, 45, |
81 46, 47, 50, 51, 56, 57, 58, 59, | 69 46, 47, 50, 51, 56, 57, 58, 59, |
82 52, 53, 54, 55, 60, 61, 62, 63, | 70 52, 53, 54, 55, 60, 61, 62, 63, |
83 }; | 71 }; |
84 | 72 |
85 UINT8 ff_alternate_vertical_scan[64] = { | 73 const UINT8 ff_alternate_vertical_scan[64] = { |
86 0, 8, 16, 24, 1, 9, 2, 10, | 74 0, 8, 16, 24, 1, 9, 2, 10, |
87 17, 25, 32, 40, 48, 56, 57, 49, | 75 17, 25, 32, 40, 48, 56, 57, 49, |
88 41, 33, 26, 18, 3, 11, 4, 12, | 76 41, 33, 26, 18, 3, 11, 4, 12, |
89 19, 27, 34, 42, 50, 58, 35, 43, | 77 19, 27, 34, 42, 50, 58, 35, 43, |
90 51, 59, 20, 28, 5, 13, 6, 14, | 78 51, 59, 20, 28, 5, 13, 6, 14, |
91 21, 29, 36, 44, 52, 60, 37, 45, | 79 21, 29, 36, 44, 52, 60, 37, 45, |
92 53, 61, 22, 30, 7, 15, 23, 31, | 80 53, 61, 22, 30, 7, 15, 23, 31, |
93 38, 46, 54, 62, 39, 47, 55, 63, | 81 38, 46, 54, 62, 39, 47, 55, 63, |
94 }; | 82 }; |
95 | |
96 #ifdef SIMPLE_IDCT | |
97 | |
98 /* Input permutation for the simple_idct_mmx */ | |
99 static UINT8 simple_mmx_permutation[64]={ | |
100 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |
101 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
102 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
103 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
104 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
105 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
106 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
107 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
108 }; | |
109 #endif | |
110 | 83 |
111 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ | 84 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ |
112 UINT32 inverse[256]={ | 85 UINT32 inverse[256]={ |
113 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, | 86 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, |
114 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, | 87 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, |
141 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, | 114 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, |
142 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, | 115 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, |
143 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, | 116 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, |
144 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, | 117 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, |
145 }; | 118 }; |
146 | |
147 /* used to skip zeros at the end */ | |
148 UINT8 zigzag_end[64]; | |
149 | |
150 UINT8 permutation[64]; | |
151 //UINT8 invPermutation[64]; | |
152 | |
153 static void build_zigzag_end(void) | |
154 { | |
155 int lastIndex; | |
156 int lastIndexAfterPerm=0; | |
157 for(lastIndex=0; lastIndex<64; lastIndex++) | |
158 { | |
159 if(zigzag_direct[lastIndex] > lastIndexAfterPerm) | |
160 lastIndexAfterPerm= zigzag_direct[lastIndex]; | |
161 zigzag_end[lastIndex]= lastIndexAfterPerm + 1; | |
162 } | |
163 } | |
164 | 119 |
165 int pix_sum_c(UINT8 * pix, int line_size) | 120 int pix_sum_c(UINT8 * pix, int line_size) |
166 { | 121 { |
167 int s, i, j; | 122 int s, i, j; |
168 | 123 |
1538 return s; | 1493 return s; |
1539 } | 1494 } |
1540 | 1495 |
1541 /* permute block according so that it corresponds to the MMX idct | 1496 /* permute block according so that it corresponds to the MMX idct |
1542 order */ | 1497 order */ |
1543 #ifdef SIMPLE_IDCT | 1498 void block_permute(INT16 *block, UINT8 *permutation) |
1544 /* general permutation, but perhaps slightly slower */ | |
1545 void block_permute(INT16 *block) | |
1546 { | 1499 { |
1547 int i; | 1500 int i; |
1548 INT16 temp[64]; | 1501 INT16 temp[64]; |
1549 | 1502 |
1550 for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i]; | 1503 for(i=0; i<64; i++) temp[ permutation[i] ] = block[i]; |
1551 | 1504 |
1552 for(i=0; i<64; i++) block[i] = temp[i]; | 1505 for(i=0; i<64; i++) block[i] = temp[i]; |
1553 } | 1506 } |
1554 #else | |
1555 | |
1556 void block_permute(INT16 *block) | |
1557 { | |
1558 int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; | |
1559 int i; | |
1560 | |
1561 for(i=0;i<8;i++) { | |
1562 tmp1 = block[1]; | |
1563 tmp2 = block[2]; | |
1564 tmp3 = block[3]; | |
1565 tmp4 = block[4]; | |
1566 tmp5 = block[5]; | |
1567 tmp6 = block[6]; | |
1568 block[1] = tmp2; | |
1569 block[2] = tmp4; | |
1570 block[3] = tmp6; | |
1571 block[4] = tmp1; | |
1572 block[5] = tmp3; | |
1573 block[6] = tmp5; | |
1574 block += 8; | |
1575 } | |
1576 } | |
1577 #endif | |
1578 | 1507 |
1579 void clear_blocks_c(DCTELEM *blocks) | 1508 void clear_blocks_c(DCTELEM *blocks) |
1580 { | 1509 { |
1581 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 1510 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
1582 } | 1511 } |
1583 | 1512 |
1584 /* XXX: those functions should be suppressed ASAP when all IDCTs are | |
1585 converted */ | |
1586 void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block) | |
1587 { | |
1588 ff_idct (block); | |
1589 put_pixels_clamped(block, dest, line_size); | |
1590 } | |
1591 | |
1592 void gen_idct_add(UINT8 *dest, int line_size, DCTELEM *block) | |
1593 { | |
1594 ff_idct (block); | |
1595 add_pixels_clamped(block, dest, line_size); | |
1596 } | |
1597 | |
1598 void dsputil_init(void) | 1513 void dsputil_init(void) |
1599 { | 1514 { |
1600 int i, j; | 1515 int i, j; |
1601 int use_permuted_idct; | |
1602 | 1516 |
1603 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; | 1517 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; |
1604 for(i=0;i<MAX_NEG_CROP;i++) { | 1518 for(i=0;i<MAX_NEG_CROP;i++) { |
1605 cropTbl[i] = 0; | 1519 cropTbl[i] = 0; |
1606 cropTbl[i + MAX_NEG_CROP + 256] = 255; | 1520 cropTbl[i + MAX_NEG_CROP + 256] = 255; |
1608 | 1522 |
1609 for(i=0;i<512;i++) { | 1523 for(i=0;i<512;i++) { |
1610 squareTbl[i] = (i - 256) * (i - 256); | 1524 squareTbl[i] = (i - 256) * (i - 256); |
1611 } | 1525 } |
1612 | 1526 |
1613 #ifdef SIMPLE_IDCT | |
1614 ff_idct = NULL; | |
1615 #else | |
1616 ff_idct = j_rev_dct; | |
1617 #endif | |
1618 get_pixels = get_pixels_c; | 1527 get_pixels = get_pixels_c; |
1619 diff_pixels = diff_pixels_c; | 1528 diff_pixels = diff_pixels_c; |
1620 put_pixels_clamped = put_pixels_clamped_c; | 1529 put_pixels_clamped = put_pixels_clamped_c; |
1621 add_pixels_clamped = add_pixels_clamped_c; | 1530 add_pixels_clamped = add_pixels_clamped_c; |
1622 gmc1= gmc1_c; | 1531 gmc1= gmc1_c; |
1631 pix_abs8x8 = pix_abs8x8_c; | 1540 pix_abs8x8 = pix_abs8x8_c; |
1632 pix_abs8x8_x2 = pix_abs8x8_x2_c; | 1541 pix_abs8x8_x2 = pix_abs8x8_x2_c; |
1633 pix_abs8x8_y2 = pix_abs8x8_y2_c; | 1542 pix_abs8x8_y2 = pix_abs8x8_y2_c; |
1634 pix_abs8x8_xy2 = pix_abs8x8_xy2_c; | 1543 pix_abs8x8_xy2 = pix_abs8x8_xy2_c; |
1635 | 1544 |
1636 use_permuted_idct = 1; | |
1637 | |
1638 #ifdef HAVE_MMX | 1545 #ifdef HAVE_MMX |
1639 dsputil_init_mmx(); | 1546 dsputil_init_mmx(); |
1640 #endif | 1547 #endif |
1641 #ifdef ARCH_ARMV4L | 1548 #ifdef ARCH_ARMV4L |
1642 dsputil_init_armv4l(); | 1549 dsputil_init_armv4l(); |
1643 #endif | 1550 #endif |
1644 #ifdef HAVE_MLIB | 1551 #ifdef HAVE_MLIB |
1645 dsputil_init_mlib(); | 1552 dsputil_init_mlib(); |
1646 use_permuted_idct = 0; | |
1647 #endif | 1553 #endif |
1648 #ifdef ARCH_ALPHA | 1554 #ifdef ARCH_ALPHA |
1649 dsputil_init_alpha(); | 1555 dsputil_init_alpha(); |
1650 use_permuted_idct = 0; | |
1651 #endif | 1556 #endif |
1652 #ifdef ARCH_POWERPC | 1557 #ifdef ARCH_POWERPC |
1653 dsputil_init_ppc(); | 1558 dsputil_init_ppc(); |
1654 #endif | 1559 #endif |
1655 #ifdef HAVE_MMI | 1560 #ifdef HAVE_MMI |
1656 dsputil_init_mmi(); | 1561 dsputil_init_mmi(); |
1657 use_permuted_idct = 0; | |
1658 #endif | 1562 #endif |
1659 | 1563 |
1660 #ifdef SIMPLE_IDCT | 1564 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; |
1661 if (ff_idct == NULL) { | |
1662 ff_idct_put = simple_idct_put; | |
1663 ff_idct_add = simple_idct_add; | |
1664 use_permuted_idct=0; | |
1665 } | |
1666 #endif | |
1667 if(ff_idct != NULL) { | |
1668 ff_idct_put = gen_idct_put; | |
1669 ff_idct_add = gen_idct_add; | |
1670 } | |
1671 | |
1672 if(use_permuted_idct) | |
1673 #ifdef SIMPLE_IDCT | |
1674 for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i]; | |
1675 #else | |
1676 for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |
1677 #endif | |
1678 else | |
1679 for(i=0; i<64; i++) permutation[i]=i; | |
1680 | |
1681 for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1; | |
1682 for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i]; | |
1683 | |
1684 if (use_permuted_idct) { | |
1685 /* permute for IDCT */ | |
1686 for(i=0;i<64;i++) { | |
1687 j = zigzag_direct[i]; | |
1688 zigzag_direct[i] = block_permute_op(j); | |
1689 j = ff_alternate_horizontal_scan[i]; | |
1690 ff_alternate_horizontal_scan[i] = block_permute_op(j); | |
1691 j = ff_alternate_vertical_scan[i]; | |
1692 ff_alternate_vertical_scan[i] = block_permute_op(j); | |
1693 } | |
1694 block_permute(ff_mpeg1_default_intra_matrix); | |
1695 block_permute(ff_mpeg1_default_non_intra_matrix); | |
1696 block_permute(ff_mpeg4_default_intra_matrix); | |
1697 block_permute(ff_mpeg4_default_non_intra_matrix); | |
1698 } | |
1699 | |
1700 build_zigzag_end(); | |
1701 } | 1565 } |
1702 | 1566 |
1703 /* remove any non bit exact operation (testing purpose) */ | 1567 /* remove any non bit exact operation (testing purpose) */ |
1704 void avcodec_set_bit_exact(void) | 1568 void avcodec_set_bit_exact(void) |
1705 { | 1569 { |