comparison ppc/dsputil_altivec.c @ 3554:ce5554dd79ce libavcodec

Cosmetics: 2->4 spaces and some braces
author lu_zero
date Sun, 06 Aug 2006 23:15:32 +0000
parents 4f4c13574ad5
children d5f97ae4f24f
comparison
equal deleted inserted replaced
3553:a542b0325239 3554:ce5554dd79ce
1308 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 1308 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1309 } 1309 }
1310 1310
1311 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ 1311 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1312 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); 1312 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1);
1313 int sum; 1313 int sum;
1314 register const_vector unsigned char vzero = (const_vector unsigned char)vec_splat_u8(0); 1314 register const_vector unsigned char vzero =
1315 register vector signed short temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 1315 (const_vector unsigned char)vec_splat_u8(0);
1316 register vector signed short temp0, temp1, temp2, temp3, temp4,
1317 temp5, temp6, temp7;
1316 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); 1318 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
1317 { 1319 {
1318 register const_vector signed short vprod1 = (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); 1320 register const_vector signed short vprod1 =(const_vector signed short)
1319 register const_vector signed short vprod2 = (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); 1321 AVV( 1,-1, 1,-1, 1,-1, 1,-1);
1320 register const_vector signed short vprod3 = (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); 1322 register const_vector signed short vprod2 =(const_vector signed short)
1323 AVV( 1, 1,-1,-1, 1, 1,-1,-1);
1324 register const_vector signed short vprod3 =(const_vector signed short)
1325 AVV( 1, 1, 1, 1,-1,-1,-1,-1);
1321 register const_vector unsigned char perm1 = (const_vector unsigned char) 1326 register const_vector unsigned char perm1 = (const_vector unsigned char)
1322 AVV(0x02, 0x03, 0x00, 0x01, 1327 AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
1323 0x06, 0x07, 0x04, 0x05, 1328 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D);
1324 0x0A, 0x0B, 0x08, 0x09,
1325 0x0E, 0x0F, 0x0C, 0x0D);
1326 register const_vector unsigned char perm2 = (const_vector unsigned char) 1329 register const_vector unsigned char perm2 = (const_vector unsigned char)
1327 AVV(0x04, 0x05, 0x06, 0x07, 1330 AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
1328 0x00, 0x01, 0x02, 0x03, 1331 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B);
1329 0x0C, 0x0D, 0x0E, 0x0F,
1330 0x08, 0x09, 0x0A, 0x0B);
1331 register const_vector unsigned char perm3 = (const_vector unsigned char) 1332 register const_vector unsigned char perm3 = (const_vector unsigned char)
1332 AVV(0x08, 0x09, 0x0A, 0x0B, 1333 AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
1333 0x0C, 0x0D, 0x0E, 0x0F, 1334 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
1334 0x00, 0x01, 0x02, 0x03,
1335 0x04, 0x05, 0x06, 0x07);
1336 1335
1337 #define ONEITERBUTTERFLY(i, res) \ 1336 #define ONEITERBUTTERFLY(i, res) \
1338 { \ 1337 { \
1339 register vector unsigned char src1, src2, srcO; \ 1338 register vector unsigned char src1, src2, srcO; \
1340 register vector unsigned char dst1, dst2, dstO; \ 1339 register vector unsigned char dst1, dst2, dstO; \
1441 vs. around 780), but xlc goes to around 660 on the 1440 vs. around 780), but xlc goes to around 660 on the
1442 regular C code... 1441 regular C code...
1443 */ 1442 */
1444 1443
1445 static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { 1444 static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) {
1446 int sum; 1445 int sum;
1447 register vector signed short 1446 register vector signed short
1448 temp0 REG_v(v0), 1447 temp0 REG_v(v0),
1449 temp1 REG_v(v1), 1448 temp1 REG_v(v1),
1450 temp2 REG_v(v2), 1449 temp2 REG_v(v2),
1451 temp3 REG_v(v3), 1450 temp3 REG_v(v3),
1452 temp4 REG_v(v4), 1451 temp4 REG_v(v4),
1453 temp5 REG_v(v5), 1452 temp5 REG_v(v5),
1454 temp6 REG_v(v6), 1453 temp6 REG_v(v6),
1455 temp7 REG_v(v7); 1454 temp7 REG_v(v7);
1456 register vector signed short 1455 register vector signed short
1457 temp0S REG_v(v8), 1456 temp0S REG_v(v8),
1458 temp1S REG_v(v9), 1457 temp1S REG_v(v9),
1459 temp2S REG_v(v10), 1458 temp2S REG_v(v10),
1460 temp3S REG_v(v11), 1459 temp3S REG_v(v11),
1461 temp4S REG_v(v12), 1460 temp4S REG_v(v12),
1462 temp5S REG_v(v13), 1461 temp5S REG_v(v13),
1463 temp6S REG_v(v14), 1462 temp6S REG_v(v14),
1464 temp7S REG_v(v15); 1463 temp7S REG_v(v15);
1465 register const_vector unsigned char vzero REG_v(v31)= (const_vector unsigned char)vec_splat_u8(0); 1464 register const_vector unsigned char vzero REG_v(v31)=
1465 (const_vector unsigned char)vec_splat_u8(0);
1466 { 1466 {
1467 register const_vector signed short vprod1 REG_v(v16)= (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); 1467 register const_vector signed short vprod1 REG_v(v16)=
1468 register const_vector signed short vprod2 REG_v(v17)= (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); 1468 (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1);
1469 register const_vector signed short vprod3 REG_v(v18)= (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); 1469 register const_vector signed short vprod2 REG_v(v17)=
1470 register const_vector unsigned char perm1 REG_v(v19)= (const_vector unsigned char) 1470 (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1);
1471 AVV(0x02, 0x03, 0x00, 0x01, 1471 register const_vector signed short vprod3 REG_v(v18)=
1472 0x06, 0x07, 0x04, 0x05, 1472 (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1);
1473 0x0A, 0x0B, 0x08, 0x09, 1473 register const_vector unsigned char perm1 REG_v(v19)=
1474 0x0E, 0x0F, 0x0C, 0x0D); 1474 (const_vector unsigned char)
1475 register const_vector unsigned char perm2 REG_v(v20)= (const_vector unsigned char) 1475 AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
1476 AVV(0x04, 0x05, 0x06, 0x07, 1476 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D);
1477 0x00, 0x01, 0x02, 0x03, 1477 register const_vector unsigned char perm2 REG_v(v20)=
1478 0x0C, 0x0D, 0x0E, 0x0F, 1478 (const_vector unsigned char)
1479 0x08, 0x09, 0x0A, 0x0B); 1479 AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
1480 register const_vector unsigned char perm3 REG_v(v21)= (const_vector unsigned char) 1480 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B);
1481 AVV(0x08, 0x09, 0x0A, 0x0B, 1481 register const_vector unsigned char perm3 REG_v(v21)=
1482 0x0C, 0x0D, 0x0E, 0x0F, 1482 (const_vector unsigned char)
1483 0x00, 0x01, 0x02, 0x03, 1483 AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
1484 0x04, 0x05, 0x06, 0x07); 1484 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
1485 1485
1486 #define ONEITERBUTTERFLY(i, res1, res2) \ 1486 #define ONEITERBUTTERFLY(i, res1, res2) \
1487 { \ 1487 { \
1488 register vector unsigned char src1 REG_v(v22), \ 1488 register vector unsigned char src1 REG_v(v22), \
1489 src2 REG_v(v23), \ 1489 src2 REG_v(v23), \
1640 return sum; 1640 return sum;
1641 } 1641 }
1642 1642
1643 int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ 1643 int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1644 POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1); 1644 POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1);
1645 int score; 1645 int score;
1646 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1); 1646 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1);
1647 score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8); 1647 score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
1648 if (h==16) { 1648 if (h==16) {
1649 dst += 8*stride; 1649 dst += 8*stride;
1650 src += 8*stride; 1650 src += 8*stride;
1651 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); 1651 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
1652 } 1652 }
1653 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); 1653 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1);
1654 return score; 1654 return score;
1655 } 1655 }
1656 1656
1657 int has_altivec(void) 1657 int has_altivec(void)
1658 { 1658 {
1659 #ifdef __AMIGAOS4__ 1659 #ifdef __AMIGAOS4__
1660 ULONG result = 0; 1660 ULONG result = 0;
1661 extern struct ExecIFace *IExec; 1661 extern struct ExecIFace *IExec;
1662 1662
1663 IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); 1663 IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
1664 if (result == VECTORTYPE_ALTIVEC) return 1; 1664 if (result == VECTORTYPE_ALTIVEC) return 1;
1665 return 0; 1665 return 0;
1666 #else /* __AMIGAOS4__ */ 1666 #else /* __AMIGAOS4__ */
1667 1667
1668 #ifdef CONFIG_DARWIN 1668 #ifdef CONFIG_DARWIN
1669 int sels[2] = {CTL_HW, HW_VECTORUNIT}; 1669 int sels[2] = {CTL_HW, HW_VECTORUNIT};
1670 int has_vu = 0; 1670 int has_vu = 0;
1755 } pixels += 4 - line_size * (h + 1); 1755 } pixels += 4 - line_size * (h + 1);
1756 block += 4 - line_size * h; 1756 block += 4 - line_size * h;
1757 } 1757 }
1758 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); 1758 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
1759 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ 1759 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
1760 register int i; 1760 register int i;
1761 register vector unsigned char 1761 register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
1762 pixelsv1, pixelsv2, 1762 register vector unsigned char blockv, temp1, temp2, blocktemp;
1763 pixelsavg; 1763 register vector unsigned short pixelssum1, pixelssum2, temp3;
1764 register vector unsigned char 1764
1765 blockv, temp1, temp2, blocktemp; 1765 register const_vector unsigned char vczero = (const_vector unsigned char)
1766 register vector unsigned short 1766 vec_splat_u8(0);
1767 pixelssum1, pixelssum2, temp3; 1767 register const_vector unsigned short vctwo = (const_vector unsigned short)
1768 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); 1768 vec_splat_u16(2);
1769 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); 1769
1770 1770 temp1 = vec_ld(0, pixels);
1771 temp1 = vec_ld(0, pixels); 1771 temp2 = vec_ld(16, pixels);
1772 temp2 = vec_ld(16, pixels); 1772 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
1773 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); 1773 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) {
1774 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) 1774 pixelsv2 = temp2;
1775 { 1775 } else {
1776 pixelsv2 = temp2; 1776 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
1777 } 1777 }
1778 else 1778 pixelsv1 = vec_mergeh(vczero, pixelsv1);
1779 { 1779 pixelsv2 = vec_mergeh(vczero, pixelsv2);
1780 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); 1780 pixelssum1 = vec_add((vector unsigned short)pixelsv1,
1781 } 1781 (vector unsigned short)pixelsv2);
1782 pixelsv1 = vec_mergeh(vczero, pixelsv1); 1782 pixelssum1 = vec_add(pixelssum1, vctwo);
1783 pixelsv2 = vec_mergeh(vczero, pixelsv2);
1784 pixelssum1 = vec_add((vector unsigned short)pixelsv1,
1785 (vector unsigned short)pixelsv2);
1786 pixelssum1 = vec_add(pixelssum1, vctwo);
1787 1783
1788 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); 1784 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
1789 for (i = 0; i < h ; i++) { 1785 for (i = 0; i < h ; i++) {
1790 int rightside = ((unsigned long)block & 0x0000000F); 1786 int rightside = ((unsigned long)block & 0x0000000F);
1791 blockv = vec_ld(0, block); 1787 blockv = vec_ld(0, block);
1792 1788
1793 temp1 = vec_ld(line_size, pixels); 1789 temp1 = vec_ld(line_size, pixels);
1794 temp2 = vec_ld(line_size + 16, pixels); 1790 temp2 = vec_ld(line_size + 16, pixels);
1795 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); 1791 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
1796 if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) 1792 if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F)
1797 { 1793 {
1798 pixelsv2 = temp2; 1794 pixelsv2 = temp2;
1799 } 1795 } else {
1800 else 1796 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
1801 { 1797 }
1802 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); 1798
1803 } 1799 pixelsv1 = vec_mergeh(vczero, pixelsv1);
1804 1800 pixelsv2 = vec_mergeh(vczero, pixelsv2);
1805 pixelsv1 = vec_mergeh(vczero, pixelsv1); 1801 pixelssum2 = vec_add((vector unsigned short)pixelsv1,
1806 pixelsv2 = vec_mergeh(vczero, pixelsv2); 1802 (vector unsigned short)pixelsv2);
1807 pixelssum2 = vec_add((vector unsigned short)pixelsv1, 1803 temp3 = vec_add(pixelssum1, pixelssum2);
1808 (vector unsigned short)pixelsv2); 1804 temp3 = vec_sra(temp3, vctwo);
1809 temp3 = vec_add(pixelssum1, pixelssum2); 1805 pixelssum1 = vec_add(pixelssum2, vctwo);
1810 temp3 = vec_sra(temp3, vctwo); 1806 pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
1811 pixelssum1 = vec_add(pixelssum2, vctwo); 1807
1812 pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); 1808 if (rightside) {
1813 1809 blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
1814 if (rightside) 1810 } else {
1815 { 1811 blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
1816 blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); 1812 }
1817 } 1813
1818 else 1814 blockv = vec_avg(blocktemp, blockv);
1819 { 1815 vec_st(blockv, 0, block);
1820 blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); 1816
1821 } 1817 block += line_size;
1822 1818 pixels += line_size;
1823 blockv = vec_avg(blocktemp, blockv); 1819 }
1824 vec_st(blockv, 0, block);
1825
1826 block += line_size;
1827 pixels += line_size;
1828 }
1829 1820
1830 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); 1821 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
1831 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 1822 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1832 } 1823 }
1833 1824