Mercurial > libavcodec.hg
comparison ppc/dsputil_altivec.c @ 3554:ce5554dd79ce libavcodec
Cosmetics: 2->4 spaces and some braces
author | lu_zero |
---|---|
date | Sun, 06 Aug 2006 23:15:32 +0000 |
parents | 4f4c13574ad5 |
children | d5f97ae4f24f |
comparison
equal
deleted
inserted
replaced
3553:a542b0325239 | 3554:ce5554dd79ce |
---|---|
1308 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | 1308 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
1309 } | 1309 } |
1310 | 1310 |
1311 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ | 1311 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
1312 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); | 1312 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); |
1313 int sum; | 1313 int sum; |
1314 register const_vector unsigned char vzero = (const_vector unsigned char)vec_splat_u8(0); | 1314 register const_vector unsigned char vzero = |
1315 register vector signed short temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; | 1315 (const_vector unsigned char)vec_splat_u8(0); |
1316 register vector signed short temp0, temp1, temp2, temp3, temp4, | |
1317 temp5, temp6, temp7; | |
1316 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); | 1318 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); |
1317 { | 1319 { |
1318 register const_vector signed short vprod1 = (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); | 1320 register const_vector signed short vprod1 =(const_vector signed short) |
1319 register const_vector signed short vprod2 = (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); | 1321 AVV( 1,-1, 1,-1, 1,-1, 1,-1); |
1320 register const_vector signed short vprod3 = (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); | 1322 register const_vector signed short vprod2 =(const_vector signed short) |
1323 AVV( 1, 1,-1,-1, 1, 1,-1,-1); | |
1324 register const_vector signed short vprod3 =(const_vector signed short) | |
1325 AVV( 1, 1, 1, 1,-1,-1,-1,-1); | |
1321 register const_vector unsigned char perm1 = (const_vector unsigned char) | 1326 register const_vector unsigned char perm1 = (const_vector unsigned char) |
1322 AVV(0x02, 0x03, 0x00, 0x01, | 1327 AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, |
1323 0x06, 0x07, 0x04, 0x05, | 1328 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D); |
1324 0x0A, 0x0B, 0x08, 0x09, | |
1325 0x0E, 0x0F, 0x0C, 0x0D); | |
1326 register const_vector unsigned char perm2 = (const_vector unsigned char) | 1329 register const_vector unsigned char perm2 = (const_vector unsigned char) |
1327 AVV(0x04, 0x05, 0x06, 0x07, | 1330 AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, |
1328 0x00, 0x01, 0x02, 0x03, | 1331 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B); |
1329 0x0C, 0x0D, 0x0E, 0x0F, | |
1330 0x08, 0x09, 0x0A, 0x0B); | |
1331 register const_vector unsigned char perm3 = (const_vector unsigned char) | 1332 register const_vector unsigned char perm3 = (const_vector unsigned char) |
1332 AVV(0x08, 0x09, 0x0A, 0x0B, | 1333 AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
1333 0x0C, 0x0D, 0x0E, 0x0F, | 1334 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); |
1334 0x00, 0x01, 0x02, 0x03, | |
1335 0x04, 0x05, 0x06, 0x07); | |
1336 | 1335 |
1337 #define ONEITERBUTTERFLY(i, res) \ | 1336 #define ONEITERBUTTERFLY(i, res) \ |
1338 { \ | 1337 { \ |
1339 register vector unsigned char src1, src2, srcO; \ | 1338 register vector unsigned char src1, src2, srcO; \ |
1340 register vector unsigned char dst1, dst2, dstO; \ | 1339 register vector unsigned char dst1, dst2, dstO; \ |
1441 vs. around 780), but xlc goes to around 660 on the | 1440 vs. around 780), but xlc goes to around 660 on the |
1442 regular C code... | 1441 regular C code... |
1443 */ | 1442 */ |
1444 | 1443 |
1445 static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { | 1444 static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { |
1446 int sum; | 1445 int sum; |
1447 register vector signed short | 1446 register vector signed short |
1448 temp0 REG_v(v0), | 1447 temp0 REG_v(v0), |
1449 temp1 REG_v(v1), | 1448 temp1 REG_v(v1), |
1450 temp2 REG_v(v2), | 1449 temp2 REG_v(v2), |
1451 temp3 REG_v(v3), | 1450 temp3 REG_v(v3), |
1452 temp4 REG_v(v4), | 1451 temp4 REG_v(v4), |
1453 temp5 REG_v(v5), | 1452 temp5 REG_v(v5), |
1454 temp6 REG_v(v6), | 1453 temp6 REG_v(v6), |
1455 temp7 REG_v(v7); | 1454 temp7 REG_v(v7); |
1456 register vector signed short | 1455 register vector signed short |
1457 temp0S REG_v(v8), | 1456 temp0S REG_v(v8), |
1458 temp1S REG_v(v9), | 1457 temp1S REG_v(v9), |
1459 temp2S REG_v(v10), | 1458 temp2S REG_v(v10), |
1460 temp3S REG_v(v11), | 1459 temp3S REG_v(v11), |
1461 temp4S REG_v(v12), | 1460 temp4S REG_v(v12), |
1462 temp5S REG_v(v13), | 1461 temp5S REG_v(v13), |
1463 temp6S REG_v(v14), | 1462 temp6S REG_v(v14), |
1464 temp7S REG_v(v15); | 1463 temp7S REG_v(v15); |
1465 register const_vector unsigned char vzero REG_v(v31)= (const_vector unsigned char)vec_splat_u8(0); | 1464 register const_vector unsigned char vzero REG_v(v31)= |
1465 (const_vector unsigned char)vec_splat_u8(0); | |
1466 { | 1466 { |
1467 register const_vector signed short vprod1 REG_v(v16)= (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); | 1467 register const_vector signed short vprod1 REG_v(v16)= |
1468 register const_vector signed short vprod2 REG_v(v17)= (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); | 1468 (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); |
1469 register const_vector signed short vprod3 REG_v(v18)= (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); | 1469 register const_vector signed short vprod2 REG_v(v17)= |
1470 register const_vector unsigned char perm1 REG_v(v19)= (const_vector unsigned char) | 1470 (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); |
1471 AVV(0x02, 0x03, 0x00, 0x01, | 1471 register const_vector signed short vprod3 REG_v(v18)= |
1472 0x06, 0x07, 0x04, 0x05, | 1472 (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); |
1473 0x0A, 0x0B, 0x08, 0x09, | 1473 register const_vector unsigned char perm1 REG_v(v19)= |
1474 0x0E, 0x0F, 0x0C, 0x0D); | 1474 (const_vector unsigned char) |
1475 register const_vector unsigned char perm2 REG_v(v20)= (const_vector unsigned char) | 1475 AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, |
1476 AVV(0x04, 0x05, 0x06, 0x07, | 1476 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D); |
1477 0x00, 0x01, 0x02, 0x03, | 1477 register const_vector unsigned char perm2 REG_v(v20)= |
1478 0x0C, 0x0D, 0x0E, 0x0F, | 1478 (const_vector unsigned char) |
1479 0x08, 0x09, 0x0A, 0x0B); | 1479 AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, |
1480 register const_vector unsigned char perm3 REG_v(v21)= (const_vector unsigned char) | 1480 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B); |
1481 AVV(0x08, 0x09, 0x0A, 0x0B, | 1481 register const_vector unsigned char perm3 REG_v(v21)= |
1482 0x0C, 0x0D, 0x0E, 0x0F, | 1482 (const_vector unsigned char) |
1483 0x00, 0x01, 0x02, 0x03, | 1483 AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
1484 0x04, 0x05, 0x06, 0x07); | 1484 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); |
1485 | 1485 |
1486 #define ONEITERBUTTERFLY(i, res1, res2) \ | 1486 #define ONEITERBUTTERFLY(i, res1, res2) \ |
1487 { \ | 1487 { \ |
1488 register vector unsigned char src1 REG_v(v22), \ | 1488 register vector unsigned char src1 REG_v(v22), \ |
1489 src2 REG_v(v23), \ | 1489 src2 REG_v(v23), \ |
1640 return sum; | 1640 return sum; |
1641 } | 1641 } |
1642 | 1642 |
1643 int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ | 1643 int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
1644 POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1); | 1644 POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1); |
1645 int score; | 1645 int score; |
1646 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1); | 1646 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1); |
1647 score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8); | 1647 score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8); |
1648 if (h==16) { | 1648 if (h==16) { |
1649 dst += 8*stride; | 1649 dst += 8*stride; |
1650 src += 8*stride; | 1650 src += 8*stride; |
1651 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); | 1651 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); |
1652 } | 1652 } |
1653 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); | 1653 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); |
1654 return score; | 1654 return score; |
1655 } | 1655 } |
1656 | 1656 |
1657 int has_altivec(void) | 1657 int has_altivec(void) |
1658 { | 1658 { |
1659 #ifdef __AMIGAOS4__ | 1659 #ifdef __AMIGAOS4__ |
1660 ULONG result = 0; | 1660 ULONG result = 0; |
1661 extern struct ExecIFace *IExec; | 1661 extern struct ExecIFace *IExec; |
1662 | 1662 |
1663 IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); | 1663 IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); |
1664 if (result == VECTORTYPE_ALTIVEC) return 1; | 1664 if (result == VECTORTYPE_ALTIVEC) return 1; |
1665 return 0; | 1665 return 0; |
1666 #else /* __AMIGAOS4__ */ | 1666 #else /* __AMIGAOS4__ */ |
1667 | 1667 |
1668 #ifdef CONFIG_DARWIN | 1668 #ifdef CONFIG_DARWIN |
1669 int sels[2] = {CTL_HW, HW_VECTORUNIT}; | 1669 int sels[2] = {CTL_HW, HW_VECTORUNIT}; |
1670 int has_vu = 0; | 1670 int has_vu = 0; |
1755 } pixels += 4 - line_size * (h + 1); | 1755 } pixels += 4 - line_size * (h + 1); |
1756 block += 4 - line_size * h; | 1756 block += 4 - line_size * h; |
1757 } | 1757 } |
1758 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); | 1758 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); |
1759 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | 1759 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ |
1760 register int i; | 1760 register int i; |
1761 register vector unsigned char | 1761 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; |
1762 pixelsv1, pixelsv2, | 1762 register vector unsigned char blockv, temp1, temp2, blocktemp; |
1763 pixelsavg; | 1763 register vector unsigned short pixelssum1, pixelssum2, temp3; |
1764 register vector unsigned char | 1764 |
1765 blockv, temp1, temp2, blocktemp; | 1765 register const_vector unsigned char vczero = (const_vector unsigned char) |
1766 register vector unsigned short | 1766 vec_splat_u8(0); |
1767 pixelssum1, pixelssum2, temp3; | 1767 register const_vector unsigned short vctwo = (const_vector unsigned short) |
1768 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); | 1768 vec_splat_u16(2); |
1769 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); | 1769 |
1770 | 1770 temp1 = vec_ld(0, pixels); |
1771 temp1 = vec_ld(0, pixels); | 1771 temp2 = vec_ld(16, pixels); |
1772 temp2 = vec_ld(16, pixels); | 1772 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
1773 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | 1773 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { |
1774 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) | 1774 pixelsv2 = temp2; |
1775 { | 1775 } else { |
1776 pixelsv2 = temp2; | 1776 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); |
1777 } | 1777 } |
1778 else | 1778 pixelsv1 = vec_mergeh(vczero, pixelsv1); |
1779 { | 1779 pixelsv2 = vec_mergeh(vczero, pixelsv2); |
1780 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); | 1780 pixelssum1 = vec_add((vector unsigned short)pixelsv1, |
1781 } | 1781 (vector unsigned short)pixelsv2); |
1782 pixelsv1 = vec_mergeh(vczero, pixelsv1); | 1782 pixelssum1 = vec_add(pixelssum1, vctwo); |
1783 pixelsv2 = vec_mergeh(vczero, pixelsv2); | |
1784 pixelssum1 = vec_add((vector unsigned short)pixelsv1, | |
1785 (vector unsigned short)pixelsv2); | |
1786 pixelssum1 = vec_add(pixelssum1, vctwo); | |
1787 | 1783 |
1788 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); | 1784 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); |
1789 for (i = 0; i < h ; i++) { | 1785 for (i = 0; i < h ; i++) { |
1790 int rightside = ((unsigned long)block & 0x0000000F); | 1786 int rightside = ((unsigned long)block & 0x0000000F); |
1791 blockv = vec_ld(0, block); | 1787 blockv = vec_ld(0, block); |
1792 | 1788 |
1793 temp1 = vec_ld(line_size, pixels); | 1789 temp1 = vec_ld(line_size, pixels); |
1794 temp2 = vec_ld(line_size + 16, pixels); | 1790 temp2 = vec_ld(line_size + 16, pixels); |
1795 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); | 1791 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); |
1796 if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) | 1792 if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) |
1797 { | 1793 { |
1798 pixelsv2 = temp2; | 1794 pixelsv2 = temp2; |
1799 } | 1795 } else { |
1800 else | 1796 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); |
1801 { | 1797 } |
1802 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); | 1798 |
1803 } | 1799 pixelsv1 = vec_mergeh(vczero, pixelsv1); |
1804 | 1800 pixelsv2 = vec_mergeh(vczero, pixelsv2); |
1805 pixelsv1 = vec_mergeh(vczero, pixelsv1); | 1801 pixelssum2 = vec_add((vector unsigned short)pixelsv1, |
1806 pixelsv2 = vec_mergeh(vczero, pixelsv2); | 1802 (vector unsigned short)pixelsv2); |
1807 pixelssum2 = vec_add((vector unsigned short)pixelsv1, | 1803 temp3 = vec_add(pixelssum1, pixelssum2); |
1808 (vector unsigned short)pixelsv2); | 1804 temp3 = vec_sra(temp3, vctwo); |
1809 temp3 = vec_add(pixelssum1, pixelssum2); | 1805 pixelssum1 = vec_add(pixelssum2, vctwo); |
1810 temp3 = vec_sra(temp3, vctwo); | 1806 pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); |
1811 pixelssum1 = vec_add(pixelssum2, vctwo); | 1807 |
1812 pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); | 1808 if (rightside) { |
1813 | 1809 blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); |
1814 if (rightside) | 1810 } else { |
1815 { | 1811 blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); |
1816 blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); | 1812 } |
1817 } | 1813 |
1818 else | 1814 blockv = vec_avg(blocktemp, blockv); |
1819 { | 1815 vec_st(blockv, 0, block); |
1820 blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); | 1816 |
1821 } | 1817 block += line_size; |
1822 | 1818 pixels += line_size; |
1823 blockv = vec_avg(blocktemp, blockv); | 1819 } |
1824 vec_st(blockv, 0, block); | |
1825 | |
1826 block += line_size; | |
1827 pixels += line_size; | |
1828 } | |
1829 | 1820 |
1830 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); | 1821 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); |
1831 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | 1822 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
1832 } | 1823 } |
1833 | 1824 |