Mercurial > libavcodec.hg
comparison dsputil.c @ 853:eacc2dd8fd9d libavcodec
* using DSPContext - so each codec could use its local (sub)set of CPU extension
author | kabi |
---|---|
date | Mon, 11 Nov 2002 09:40:17 +0000 |
parents | d4726182dfd2 |
children | b510a7b6decd |
comparison
equal
deleted
inserted
replaced
852:c01c98206ee6 | 853:eacc2dd8fd9d |
---|---|
18 * | 18 * |
19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> | 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
20 */ | 20 */ |
21 #include "avcodec.h" | 21 #include "avcodec.h" |
22 #include "dsputil.h" | 22 #include "dsputil.h" |
23 | 23 /* |
24 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); | 24 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); |
25 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); | 25 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); |
26 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | 26 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); |
27 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | 27 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); |
28 void (*ff_gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); | 28 void (*ff_gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); |
39 | 39 |
40 op_pixels_abs_func pix_abs8x8; | 40 op_pixels_abs_func pix_abs8x8; |
41 op_pixels_abs_func pix_abs8x8_x2; | 41 op_pixels_abs_func pix_abs8x8_x2; |
42 op_pixels_abs_func pix_abs8x8_y2; | 42 op_pixels_abs_func pix_abs8x8_y2; |
43 op_pixels_abs_func pix_abs8x8_xy2; | 43 op_pixels_abs_func pix_abs8x8_xy2; |
44 | 44 */ |
45 int ff_bit_exact=0; | 45 int ff_bit_exact=0; |
46 | 46 |
47 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; | 47 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; |
48 UINT32 squareTbl[512]; | 48 UINT32 squareTbl[512]; |
49 | 49 |
82 53, 61, 22, 30, 7, 15, 23, 31, | 82 53, 61, 22, 30, 7, 15, 23, 31, |
83 38, 46, 54, 62, 39, 47, 55, 63, | 83 38, 46, 54, 62, 39, 47, 55, 63, |
84 }; | 84 }; |
85 | 85 |
86 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ | 86 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ |
87 UINT32 inverse[256]={ | 87 const UINT32 inverse[256]={ |
88 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, | 88 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, |
89 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, | 89 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, |
90 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, | 90 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, |
91 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, | 91 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, |
92 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, | 92 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, |
117 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, | 117 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, |
118 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, | 118 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, |
119 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, | 119 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, |
120 }; | 120 }; |
121 | 121 |
122 int pix_sum_c(UINT8 * pix, int line_size) | 122 static int pix_sum_c(UINT8 * pix, int line_size) |
123 { | 123 { |
124 int s, i, j; | 124 int s, i, j; |
125 | 125 |
126 s = 0; | 126 s = 0; |
127 for (i = 0; i < 16; i++) { | 127 for (i = 0; i < 16; i++) { |
139 pix += line_size - 16; | 139 pix += line_size - 16; |
140 } | 140 } |
141 return s; | 141 return s; |
142 } | 142 } |
143 | 143 |
144 int pix_norm1_c(UINT8 * pix, int line_size) | 144 static int pix_norm1_c(UINT8 * pix, int line_size) |
145 { | 145 { |
146 int s, i, j; | 146 int s, i, j; |
147 UINT32 *sq = squareTbl + 256; | 147 UINT32 *sq = squareTbl + 256; |
148 | 148 |
149 s = 0; | 149 s = 0; |
163 } | 163 } |
164 return s; | 164 return s; |
165 } | 165 } |
166 | 166 |
167 | 167 |
168 void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) | 168 static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) |
169 { | 169 { |
170 int i; | 170 int i; |
171 | 171 |
172 /* read the pixels */ | 172 /* read the pixels */ |
173 for(i=0;i<8;i++) { | 173 for(i=0;i<8;i++) { |
182 pixels += line_size; | 182 pixels += line_size; |
183 block += 8; | 183 block += 8; |
184 } | 184 } |
185 } | 185 } |
186 | 186 |
187 void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2, | 187 static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, |
188 int stride){ | 188 const UINT8 *s2, int stride){ |
189 int i; | 189 int i; |
190 | 190 |
191 /* read the pixels */ | 191 /* read the pixels */ |
192 for(i=0;i<8;i++) { | 192 for(i=0;i<8;i++) { |
193 block[0] = s1[0] - s2[0]; | 193 block[0] = s1[0] - s2[0]; |
203 block += 8; | 203 block += 8; |
204 } | 204 } |
205 } | 205 } |
206 | 206 |
207 | 207 |
208 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, | 208 static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, |
209 int line_size) | 209 int line_size) |
210 { | 210 { |
211 int i; | 211 int i; |
212 UINT8 *cm = cropTbl + MAX_NEG_CROP; | 212 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
213 | 213 |
214 /* read the pixels */ | 214 /* read the pixels */ |
225 pixels += line_size; | 225 pixels += line_size; |
226 block += 8; | 226 block += 8; |
227 } | 227 } |
228 } | 228 } |
229 | 229 |
230 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, | 230 static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, |
231 int line_size) | 231 int line_size) |
232 { | 232 { |
233 int i; | 233 int i; |
234 UINT8 *cm = cropTbl + MAX_NEG_CROP; | 234 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
235 | 235 |
1351 #undef op_avg | 1351 #undef op_avg |
1352 #undef op_avg_no_rnd | 1352 #undef op_avg_no_rnd |
1353 #undef op_put | 1353 #undef op_put |
1354 #undef op_put_no_rnd | 1354 #undef op_put_no_rnd |
1355 | 1355 |
1356 int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) | 1356 static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1357 { | 1357 { |
1358 int s, i; | 1358 int s, i; |
1359 | 1359 |
1360 s = 0; | 1360 s = 0; |
1361 for(i=0;i<16;i++) { | 1361 for(i=0;i<16;i++) { |
1379 pix2 += line_size; | 1379 pix2 += line_size; |
1380 } | 1380 } |
1381 return s; | 1381 return s; |
1382 } | 1382 } |
1383 | 1383 |
1384 int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | 1384 static int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1385 { | 1385 { |
1386 int s, i; | 1386 int s, i; |
1387 | 1387 |
1388 s = 0; | 1388 s = 0; |
1389 for(i=0;i<16;i++) { | 1389 for(i=0;i<16;i++) { |
1407 pix2 += line_size; | 1407 pix2 += line_size; |
1408 } | 1408 } |
1409 return s; | 1409 return s; |
1410 } | 1410 } |
1411 | 1411 |
1412 int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | 1412 static int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1413 { | 1413 { |
1414 int s, i; | 1414 int s, i; |
1415 UINT8 *pix3 = pix2 + line_size; | 1415 UINT8 *pix3 = pix2 + line_size; |
1416 | 1416 |
1417 s = 0; | 1417 s = 0; |
1437 pix3 += line_size; | 1437 pix3 += line_size; |
1438 } | 1438 } |
1439 return s; | 1439 return s; |
1440 } | 1440 } |
1441 | 1441 |
1442 int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | 1442 static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1443 { | 1443 { |
1444 int s, i; | 1444 int s, i; |
1445 UINT8 *pix3 = pix2 + line_size; | 1445 UINT8 *pix3 = pix2 + line_size; |
1446 | 1446 |
1447 s = 0; | 1447 s = 0; |
1467 pix3 += line_size; | 1467 pix3 += line_size; |
1468 } | 1468 } |
1469 return s; | 1469 return s; |
1470 } | 1470 } |
1471 | 1471 |
1472 int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) | 1472 static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1473 { | 1473 { |
1474 int s, i; | 1474 int s, i; |
1475 | 1475 |
1476 s = 0; | 1476 s = 0; |
1477 for(i=0;i<8;i++) { | 1477 for(i=0;i<8;i++) { |
1487 pix2 += line_size; | 1487 pix2 += line_size; |
1488 } | 1488 } |
1489 return s; | 1489 return s; |
1490 } | 1490 } |
1491 | 1491 |
1492 int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | 1492 static int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1493 { | 1493 { |
1494 int s, i; | 1494 int s, i; |
1495 | 1495 |
1496 s = 0; | 1496 s = 0; |
1497 for(i=0;i<8;i++) { | 1497 for(i=0;i<8;i++) { |
1507 pix2 += line_size; | 1507 pix2 += line_size; |
1508 } | 1508 } |
1509 return s; | 1509 return s; |
1510 } | 1510 } |
1511 | 1511 |
1512 int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | 1512 static int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1513 { | 1513 { |
1514 int s, i; | 1514 int s, i; |
1515 UINT8 *pix3 = pix2 + line_size; | 1515 UINT8 *pix3 = pix2 + line_size; |
1516 | 1516 |
1517 s = 0; | 1517 s = 0; |
1529 pix3 += line_size; | 1529 pix3 += line_size; |
1530 } | 1530 } |
1531 return s; | 1531 return s; |
1532 } | 1532 } |
1533 | 1533 |
1534 int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | 1534 static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1535 { | 1535 { |
1536 int s, i; | 1536 int s, i; |
1537 UINT8 *pix3 = pix2 + line_size; | 1537 UINT8 *pix3 = pix2 + line_size; |
1538 | 1538 |
1539 s = 0; | 1539 s = 0; |
1572 const int perm_j= permutation[j]; | 1572 const int perm_j= permutation[j]; |
1573 block[perm_j]= temp[j]; | 1573 block[perm_j]= temp[j]; |
1574 } | 1574 } |
1575 } | 1575 } |
1576 | 1576 |
1577 void clear_blocks_c(DCTELEM *blocks) | 1577 static void clear_blocks_c(DCTELEM *blocks) |
1578 { | 1578 { |
1579 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 1579 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
1580 } | 1580 } |
1581 | 1581 |
1582 void dsputil_init(void) | 1582 void dsputil_init(DSPContext* c, unsigned mask) |
1583 { | 1583 { |
1584 int i; | 1584 int i; |
1585 | 1585 |
1586 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; | 1586 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; |
1587 for(i=0;i<MAX_NEG_CROP;i++) { | 1587 for(i=0;i<MAX_NEG_CROP;i++) { |
1591 | 1591 |
1592 for(i=0;i<512;i++) { | 1592 for(i=0;i<512;i++) { |
1593 squareTbl[i] = (i - 256) * (i - 256); | 1593 squareTbl[i] = (i - 256) * (i - 256); |
1594 } | 1594 } |
1595 | 1595 |
1596 get_pixels = get_pixels_c; | 1596 c->get_pixels = get_pixels_c; |
1597 diff_pixels = diff_pixels_c; | 1597 c->diff_pixels = diff_pixels_c; |
1598 put_pixels_clamped = put_pixels_clamped_c; | 1598 c->put_pixels_clamped = put_pixels_clamped_c; |
1599 add_pixels_clamped = add_pixels_clamped_c; | 1599 c->add_pixels_clamped = add_pixels_clamped_c; |
1600 ff_gmc1= gmc1_c; | 1600 c->gmc1 = gmc1_c; |
1601 ff_gmc= gmc_c; | 1601 c->gmc = gmc_c; |
1602 clear_blocks= clear_blocks_c; | 1602 c->clear_blocks = clear_blocks_c; |
1603 pix_sum= pix_sum_c; | 1603 c->pix_sum = pix_sum_c; |
1604 pix_norm1= pix_norm1_c; | 1604 c->pix_norm1 = pix_norm1_c; |
1605 | 1605 |
1606 pix_abs16x16 = pix_abs16x16_c; | 1606 c->pix_abs16x16 = pix_abs16x16_c; |
1607 pix_abs16x16_x2 = pix_abs16x16_x2_c; | 1607 c->pix_abs16x16_x2 = pix_abs16x16_x2_c; |
1608 pix_abs16x16_y2 = pix_abs16x16_y2_c; | 1608 c->pix_abs16x16_y2 = pix_abs16x16_y2_c; |
1609 pix_abs16x16_xy2 = pix_abs16x16_xy2_c; | 1609 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c; |
1610 pix_abs8x8 = pix_abs8x8_c; | 1610 c->pix_abs8x8 = pix_abs8x8_c; |
1611 pix_abs8x8_x2 = pix_abs8x8_x2_c; | 1611 c->pix_abs8x8_x2 = pix_abs8x8_x2_c; |
1612 pix_abs8x8_y2 = pix_abs8x8_y2_c; | 1612 c->pix_abs8x8_y2 = pix_abs8x8_y2_c; |
1613 pix_abs8x8_xy2 = pix_abs8x8_xy2_c; | 1613 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c; |
1614 | |
1615 c->put_pixels_tab[0][0] = put_pixels16; | |
1616 c->put_pixels_tab[0][1] = put_pixels16_x2; | |
1617 c->put_pixels_tab[0][2] = put_pixels16_y2; | |
1618 c->put_pixels_tab[0][3] = put_pixels16_xy2; | |
1619 | |
1620 c->put_no_rnd_pixels_tab[0][0] = put_pixels16; | |
1621 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2; | |
1622 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2; | |
1623 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2; | |
1624 | |
1625 c->avg_pixels_tab[0][0] = avg_pixels16; | |
1626 c->avg_pixels_tab[0][1] = avg_pixels16_x2; | |
1627 c->avg_pixels_tab[0][2] = avg_pixels16_y2; | |
1628 c->avg_pixels_tab[0][3] = avg_pixels16_xy2; | |
1629 | |
1630 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16; | |
1631 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2; | |
1632 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2; | |
1633 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2; | |
1634 | |
1635 c->put_pixels_tab[1][0] = put_pixels8; | |
1636 c->put_pixels_tab[1][1] = put_pixels8_x2; | |
1637 c->put_pixels_tab[1][2] = put_pixels8_y2; | |
1638 c->put_pixels_tab[1][3] = put_pixels8_xy2; | |
1639 | |
1640 c->put_no_rnd_pixels_tab[1][0] = put_pixels8; | |
1641 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2; | |
1642 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2; | |
1643 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2; | |
1644 | |
1645 c->avg_pixels_tab[1][0] = avg_pixels8; | |
1646 c->avg_pixels_tab[1][1] = avg_pixels8_x2; | |
1647 c->avg_pixels_tab[1][2] = avg_pixels8_y2; | |
1648 c->avg_pixels_tab[1][3] = avg_pixels8_xy2; | |
1649 | |
1650 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8; | |
1651 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2; | |
1652 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2; | |
1653 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2; | |
1614 | 1654 |
1615 #ifdef HAVE_MMX | 1655 #ifdef HAVE_MMX |
1616 dsputil_init_mmx(); | 1656 dsputil_init_mmx(c, mask); |
1617 #endif | 1657 #endif |
1618 #ifdef ARCH_ARMV4L | 1658 #ifdef ARCH_ARMV4L |
1619 dsputil_init_armv4l(); | 1659 dsputil_init_armv4l(c, mask); |
1620 #endif | 1660 #endif |
1621 #ifdef HAVE_MLIB | 1661 #ifdef HAVE_MLIB |
1622 dsputil_init_mlib(); | 1662 dsputil_init_mlib(c, mask); |
1623 #endif | 1663 #endif |
1624 #ifdef ARCH_ALPHA | 1664 #ifdef ARCH_ALPHA |
1625 dsputil_init_alpha(); | 1665 dsputil_init_alpha(c, mask); |
1626 #endif | 1666 #endif |
1627 #ifdef ARCH_POWERPC | 1667 #ifdef ARCH_POWERPC |
1628 dsputil_init_ppc(); | 1668 dsputil_init_ppc(c, mask); |
1629 #endif | 1669 #endif |
1630 #ifdef HAVE_MMI | 1670 #ifdef HAVE_MMI |
1631 dsputil_init_mmi(); | 1671 dsputil_init_mmi(c, mask); |
1632 #endif | 1672 #endif |
1633 | 1673 |
1634 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; | 1674 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; |
1635 } | 1675 } |
1636 | 1676 |
1637 /* remove any non bit exact operation (testing purpose) */ | 1677 /* remove any non bit exact operation (testing purpose) */ |
1638 void avcodec_set_bit_exact(void) | 1678 void avcodec_set_bit_exact(void) |
1639 { | 1679 { |
1640 ff_bit_exact=1; | 1680 ff_bit_exact=1; |
1641 #ifdef HAVE_MMX | 1681 #ifdef HAVE_MMX |
1642 dsputil_set_bit_exact_mmx(); | 1682 #warning FIXME - set_bit_exact |
1683 // dsputil_set_bit_exact_mmx(); | |
1643 #endif | 1684 #endif |
1644 } | 1685 } |
1645 | 1686 |
1646 void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3], | 1687 void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3], |
1647 int orig_linesize[3], int coded_linesize, | 1688 int orig_linesize[3], int coded_linesize, |