comparison dsputil.c @ 853:eacc2dd8fd9d libavcodec

* using DSPContext - so each codec could use its local (sub)set of CPU extension
author kabi
date Mon, 11 Nov 2002 09:40:17 +0000
parents d4726182dfd2
children b510a7b6decd
comparison
equal deleted inserted replaced
852:c01c98206ee6 853:eacc2dd8fd9d
18 * 18 *
19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
20 */ 20 */
21 #include "avcodec.h" 21 #include "avcodec.h"
22 #include "dsputil.h" 22 #include "dsputil.h"
23 23 /*
24 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); 24 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
25 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); 25 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
26 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); 26 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
27 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); 27 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
28 void (*ff_gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); 28 void (*ff_gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
39 39
40 op_pixels_abs_func pix_abs8x8; 40 op_pixels_abs_func pix_abs8x8;
41 op_pixels_abs_func pix_abs8x8_x2; 41 op_pixels_abs_func pix_abs8x8_x2;
42 op_pixels_abs_func pix_abs8x8_y2; 42 op_pixels_abs_func pix_abs8x8_y2;
43 op_pixels_abs_func pix_abs8x8_xy2; 43 op_pixels_abs_func pix_abs8x8_xy2;
44 44 */
45 int ff_bit_exact=0; 45 int ff_bit_exact=0;
46 46
47 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; 47 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
48 UINT32 squareTbl[512]; 48 UINT32 squareTbl[512];
49 49
82 53, 61, 22, 30, 7, 15, 23, 31, 82 53, 61, 22, 30, 7, 15, 23, 31,
83 38, 46, 54, 62, 39, 47, 55, 63, 83 38, 46, 54, 62, 39, 47, 55, 63,
84 }; 84 };
85 85
86 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ 86 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
87 UINT32 inverse[256]={ 87 const UINT32 inverse[256]={
88 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, 88 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
89 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, 89 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
90 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, 90 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
91 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, 91 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
92 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, 92 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
117 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, 117 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
118 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, 118 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
119 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, 119 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
120 }; 120 };
121 121
122 int pix_sum_c(UINT8 * pix, int line_size) 122 static int pix_sum_c(UINT8 * pix, int line_size)
123 { 123 {
124 int s, i, j; 124 int s, i, j;
125 125
126 s = 0; 126 s = 0;
127 for (i = 0; i < 16; i++) { 127 for (i = 0; i < 16; i++) {
139 pix += line_size - 16; 139 pix += line_size - 16;
140 } 140 }
141 return s; 141 return s;
142 } 142 }
143 143
144 int pix_norm1_c(UINT8 * pix, int line_size) 144 static int pix_norm1_c(UINT8 * pix, int line_size)
145 { 145 {
146 int s, i, j; 146 int s, i, j;
147 UINT32 *sq = squareTbl + 256; 147 UINT32 *sq = squareTbl + 256;
148 148
149 s = 0; 149 s = 0;
163 } 163 }
164 return s; 164 return s;
165 } 165 }
166 166
167 167
168 void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) 168 static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
169 { 169 {
170 int i; 170 int i;
171 171
172 /* read the pixels */ 172 /* read the pixels */
173 for(i=0;i<8;i++) { 173 for(i=0;i<8;i++) {
182 pixels += line_size; 182 pixels += line_size;
183 block += 8; 183 block += 8;
184 } 184 }
185 } 185 }
186 186
187 void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2, 187 static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1,
188 int stride){ 188 const UINT8 *s2, int stride){
189 int i; 189 int i;
190 190
191 /* read the pixels */ 191 /* read the pixels */
192 for(i=0;i<8;i++) { 192 for(i=0;i<8;i++) {
193 block[0] = s1[0] - s2[0]; 193 block[0] = s1[0] - s2[0];
203 block += 8; 203 block += 8;
204 } 204 }
205 } 205 }
206 206
207 207
208 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, 208 static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
209 int line_size) 209 int line_size)
210 { 210 {
211 int i; 211 int i;
212 UINT8 *cm = cropTbl + MAX_NEG_CROP; 212 UINT8 *cm = cropTbl + MAX_NEG_CROP;
213 213
214 /* read the pixels */ 214 /* read the pixels */
225 pixels += line_size; 225 pixels += line_size;
226 block += 8; 226 block += 8;
227 } 227 }
228 } 228 }
229 229
230 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, 230 static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
231 int line_size) 231 int line_size)
232 { 232 {
233 int i; 233 int i;
234 UINT8 *cm = cropTbl + MAX_NEG_CROP; 234 UINT8 *cm = cropTbl + MAX_NEG_CROP;
235 235
1351 #undef op_avg 1351 #undef op_avg
1352 #undef op_avg_no_rnd 1352 #undef op_avg_no_rnd
1353 #undef op_put 1353 #undef op_put
1354 #undef op_put_no_rnd 1354 #undef op_put_no_rnd
1355 1355
1356 int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) 1356 static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1357 { 1357 {
1358 int s, i; 1358 int s, i;
1359 1359
1360 s = 0; 1360 s = 0;
1361 for(i=0;i<16;i++) { 1361 for(i=0;i<16;i++) {
1379 pix2 += line_size; 1379 pix2 += line_size;
1380 } 1380 }
1381 return s; 1381 return s;
1382 } 1382 }
1383 1383
1384 int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) 1384 static int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1385 { 1385 {
1386 int s, i; 1386 int s, i;
1387 1387
1388 s = 0; 1388 s = 0;
1389 for(i=0;i<16;i++) { 1389 for(i=0;i<16;i++) {
1407 pix2 += line_size; 1407 pix2 += line_size;
1408 } 1408 }
1409 return s; 1409 return s;
1410 } 1410 }
1411 1411
1412 int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) 1412 static int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1413 { 1413 {
1414 int s, i; 1414 int s, i;
1415 UINT8 *pix3 = pix2 + line_size; 1415 UINT8 *pix3 = pix2 + line_size;
1416 1416
1417 s = 0; 1417 s = 0;
1437 pix3 += line_size; 1437 pix3 += line_size;
1438 } 1438 }
1439 return s; 1439 return s;
1440 } 1440 }
1441 1441
1442 int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) 1442 static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1443 { 1443 {
1444 int s, i; 1444 int s, i;
1445 UINT8 *pix3 = pix2 + line_size; 1445 UINT8 *pix3 = pix2 + line_size;
1446 1446
1447 s = 0; 1447 s = 0;
1467 pix3 += line_size; 1467 pix3 += line_size;
1468 } 1468 }
1469 return s; 1469 return s;
1470 } 1470 }
1471 1471
1472 int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) 1472 static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1473 { 1473 {
1474 int s, i; 1474 int s, i;
1475 1475
1476 s = 0; 1476 s = 0;
1477 for(i=0;i<8;i++) { 1477 for(i=0;i<8;i++) {
1487 pix2 += line_size; 1487 pix2 += line_size;
1488 } 1488 }
1489 return s; 1489 return s;
1490 } 1490 }
1491 1491
1492 int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) 1492 static int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1493 { 1493 {
1494 int s, i; 1494 int s, i;
1495 1495
1496 s = 0; 1496 s = 0;
1497 for(i=0;i<8;i++) { 1497 for(i=0;i<8;i++) {
1507 pix2 += line_size; 1507 pix2 += line_size;
1508 } 1508 }
1509 return s; 1509 return s;
1510 } 1510 }
1511 1511
1512 int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) 1512 static int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1513 { 1513 {
1514 int s, i; 1514 int s, i;
1515 UINT8 *pix3 = pix2 + line_size; 1515 UINT8 *pix3 = pix2 + line_size;
1516 1516
1517 s = 0; 1517 s = 0;
1529 pix3 += line_size; 1529 pix3 += line_size;
1530 } 1530 }
1531 return s; 1531 return s;
1532 } 1532 }
1533 1533
1534 int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) 1534 static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1535 { 1535 {
1536 int s, i; 1536 int s, i;
1537 UINT8 *pix3 = pix2 + line_size; 1537 UINT8 *pix3 = pix2 + line_size;
1538 1538
1539 s = 0; 1539 s = 0;
1572 const int perm_j= permutation[j]; 1572 const int perm_j= permutation[j];
1573 block[perm_j]= temp[j]; 1573 block[perm_j]= temp[j];
1574 } 1574 }
1575 } 1575 }
1576 1576
1577 void clear_blocks_c(DCTELEM *blocks) 1577 static void clear_blocks_c(DCTELEM *blocks)
1578 { 1578 {
1579 memset(blocks, 0, sizeof(DCTELEM)*6*64); 1579 memset(blocks, 0, sizeof(DCTELEM)*6*64);
1580 } 1580 }
1581 1581
1582 void dsputil_init(void) 1582 void dsputil_init(DSPContext* c, unsigned mask)
1583 { 1583 {
1584 int i; 1584 int i;
1585 1585
1586 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; 1586 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
1587 for(i=0;i<MAX_NEG_CROP;i++) { 1587 for(i=0;i<MAX_NEG_CROP;i++) {
1591 1591
1592 for(i=0;i<512;i++) { 1592 for(i=0;i<512;i++) {
1593 squareTbl[i] = (i - 256) * (i - 256); 1593 squareTbl[i] = (i - 256) * (i - 256);
1594 } 1594 }
1595 1595
1596 get_pixels = get_pixels_c; 1596 c->get_pixels = get_pixels_c;
1597 diff_pixels = diff_pixels_c; 1597 c->diff_pixels = diff_pixels_c;
1598 put_pixels_clamped = put_pixels_clamped_c; 1598 c->put_pixels_clamped = put_pixels_clamped_c;
1599 add_pixels_clamped = add_pixels_clamped_c; 1599 c->add_pixels_clamped = add_pixels_clamped_c;
1600 ff_gmc1= gmc1_c; 1600 c->gmc1 = gmc1_c;
1601 ff_gmc= gmc_c; 1601 c->gmc = gmc_c;
1602 clear_blocks= clear_blocks_c; 1602 c->clear_blocks = clear_blocks_c;
1603 pix_sum= pix_sum_c; 1603 c->pix_sum = pix_sum_c;
1604 pix_norm1= pix_norm1_c; 1604 c->pix_norm1 = pix_norm1_c;
1605 1605
1606 pix_abs16x16 = pix_abs16x16_c; 1606 c->pix_abs16x16 = pix_abs16x16_c;
1607 pix_abs16x16_x2 = pix_abs16x16_x2_c; 1607 c->pix_abs16x16_x2 = pix_abs16x16_x2_c;
1608 pix_abs16x16_y2 = pix_abs16x16_y2_c; 1608 c->pix_abs16x16_y2 = pix_abs16x16_y2_c;
1609 pix_abs16x16_xy2 = pix_abs16x16_xy2_c; 1609 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
1610 pix_abs8x8 = pix_abs8x8_c; 1610 c->pix_abs8x8 = pix_abs8x8_c;
1611 pix_abs8x8_x2 = pix_abs8x8_x2_c; 1611 c->pix_abs8x8_x2 = pix_abs8x8_x2_c;
1612 pix_abs8x8_y2 = pix_abs8x8_y2_c; 1612 c->pix_abs8x8_y2 = pix_abs8x8_y2_c;
1613 pix_abs8x8_xy2 = pix_abs8x8_xy2_c; 1613 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
1614
1615 c->put_pixels_tab[0][0] = put_pixels16;
1616 c->put_pixels_tab[0][1] = put_pixels16_x2;
1617 c->put_pixels_tab[0][2] = put_pixels16_y2;
1618 c->put_pixels_tab[0][3] = put_pixels16_xy2;
1619
1620 c->put_no_rnd_pixels_tab[0][0] = put_pixels16;
1621 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2;
1622 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2;
1623 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2;
1624
1625 c->avg_pixels_tab[0][0] = avg_pixels16;
1626 c->avg_pixels_tab[0][1] = avg_pixels16_x2;
1627 c->avg_pixels_tab[0][2] = avg_pixels16_y2;
1628 c->avg_pixels_tab[0][3] = avg_pixels16_xy2;
1629
1630 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16;
1631 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2;
1632 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2;
1633 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2;
1634
1635 c->put_pixels_tab[1][0] = put_pixels8;
1636 c->put_pixels_tab[1][1] = put_pixels8_x2;
1637 c->put_pixels_tab[1][2] = put_pixels8_y2;
1638 c->put_pixels_tab[1][3] = put_pixels8_xy2;
1639
1640 c->put_no_rnd_pixels_tab[1][0] = put_pixels8;
1641 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2;
1642 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2;
1643 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2;
1644
1645 c->avg_pixels_tab[1][0] = avg_pixels8;
1646 c->avg_pixels_tab[1][1] = avg_pixels8_x2;
1647 c->avg_pixels_tab[1][2] = avg_pixels8_y2;
1648 c->avg_pixels_tab[1][3] = avg_pixels8_xy2;
1649
1650 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8;
1651 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2;
1652 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2;
1653 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2;
1614 1654
1615 #ifdef HAVE_MMX 1655 #ifdef HAVE_MMX
1616 dsputil_init_mmx(); 1656 dsputil_init_mmx(c, mask);
1617 #endif 1657 #endif
1618 #ifdef ARCH_ARMV4L 1658 #ifdef ARCH_ARMV4L
1619 dsputil_init_armv4l(); 1659 dsputil_init_armv4l(c, mask);
1620 #endif 1660 #endif
1621 #ifdef HAVE_MLIB 1661 #ifdef HAVE_MLIB
1622 dsputil_init_mlib(); 1662 dsputil_init_mlib(c, mask);
1623 #endif 1663 #endif
1624 #ifdef ARCH_ALPHA 1664 #ifdef ARCH_ALPHA
1625 dsputil_init_alpha(); 1665 dsputil_init_alpha(c, mask);
1626 #endif 1666 #endif
1627 #ifdef ARCH_POWERPC 1667 #ifdef ARCH_POWERPC
1628 dsputil_init_ppc(); 1668 dsputil_init_ppc(c, mask);
1629 #endif 1669 #endif
1630 #ifdef HAVE_MMI 1670 #ifdef HAVE_MMI
1631 dsputil_init_mmi(); 1671 dsputil_init_mmi(c, mask);
1632 #endif 1672 #endif
1633 1673
1634 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; 1674 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
1635 } 1675 }
1636 1676
1637 /* remove any non bit exact operation (testing purpose) */ 1677 /* remove any non bit exact operation (testing purpose) */
1638 void avcodec_set_bit_exact(void) 1678 void avcodec_set_bit_exact(void)
1639 { 1679 {
1640 ff_bit_exact=1; 1680 ff_bit_exact=1;
1641 #ifdef HAVE_MMX 1681 #ifdef HAVE_MMX
1642 dsputil_set_bit_exact_mmx(); 1682 #warning FIXME - set_bit_exact
1683 // dsputil_set_bit_exact_mmx();
1643 #endif 1684 #endif
1644 } 1685 }
1645 1686
1646 void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3], 1687 void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
1647 int orig_linesize[3], int coded_linesize, 1688 int orig_linesize[3], int coded_linesize,