Mercurial > libavcodec.hg
comparison ppc/dsputil_altivec.c @ 2057:4c663228e020 libavcodec
avg_pixels8_xy2_altivec in AltiVec, enabling avg_pixels8_altivec, hadamard fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
author | michael |
---|---|
date | Sat, 05 Jun 2004 22:29:37 +0000 |
parents | 31bf68b1792b |
children | 4a0ec9031804 |
comparison
equal
deleted
inserted
replaced
2056:31bf68b1792b | 2057:4c663228e020 |
---|---|
1645 } | 1645 } |
1646 } | 1646 } |
1647 #endif /* CONFIG_DARWIN */ | 1647 #endif /* CONFIG_DARWIN */ |
1648 return 0; | 1648 return 0; |
1649 } | 1649 } |
1650 | |
1651 /* next one assumes that ((line_size % 8) == 0) */ | |
1652 void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
1653 { | |
1654 POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1); | |
1655 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
1656 | |
1657 int j; | |
1658 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); | |
1659 for (j = 0; j < 2; j++) { | |
1660 int i; | |
1661 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |
1662 const uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
1663 uint32_t l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; | |
1664 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
1665 uint32_t l1, h1; | |
1666 pixels += line_size; | |
1667 for (i = 0; i < h; i += 2) { | |
1668 uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |
1669 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
1670 l1 = (a & 0x03030303UL) + (b & 0x03030303UL); | |
1671 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
1672 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); | |
1673 pixels += line_size; | |
1674 block += line_size; | |
1675 a = (((const struct unaligned_32 *) (pixels))->l); | |
1676 b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
1677 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; | |
1678 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
1679 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); | |
1680 pixels += line_size; | |
1681 block += line_size; | |
1682 } pixels += 4 - line_size * (h + 1); | |
1683 block += 4 - line_size * h; | |
1684 } | |
1685 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); | |
1686 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
1687 register int i; | |
1688 register vector unsigned char | |
1689 pixelsv1, pixelsv2, | |
1690 pixelsavg; | |
1691 register vector unsigned char | |
1692 blockv, temp1, temp2, blocktemp; | |
1693 register vector unsigned short | |
1694 pixelssum1, pixelssum2, temp3; | |
1695 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); | |
1696 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); | |
1697 | |
1698 temp1 = vec_ld(0, pixels); | |
1699 temp2 = vec_ld(16, pixels); | |
1700 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | |
1701 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) | |
1702 { | |
1703 pixelsv2 = temp2; | |
1704 } | |
1705 else | |
1706 { | |
1707 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); | |
1708 } | |
1709 pixelsv1 = vec_mergeh(vczero, pixelsv1); | |
1710 pixelsv2 = vec_mergeh(vczero, pixelsv2); | |
1711 pixelssum1 = vec_add((vector unsigned short)pixelsv1, | |
1712 (vector unsigned short)pixelsv2); | |
1713 pixelssum1 = vec_add(pixelssum1, vctwo); | |
1714 | |
1715 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); | |
1716 for (i = 0; i < h ; i++) { | |
1717 int rightside = ((unsigned long)block & 0x0000000F); | |
1718 blockv = vec_ld(0, block); | |
1719 | |
1720 temp1 = vec_ld(line_size, pixels); | |
1721 temp2 = vec_ld(line_size + 16, pixels); | |
1722 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); | |
1723 if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) | |
1724 { | |
1725 pixelsv2 = temp2; | |
1726 } | |
1727 else | |
1728 { | |
1729 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); | |
1730 } | |
1731 | |
1732 pixelsv1 = vec_mergeh(vczero, pixelsv1); | |
1733 pixelsv2 = vec_mergeh(vczero, pixelsv2); | |
1734 pixelssum2 = vec_add((vector unsigned short)pixelsv1, | |
1735 (vector unsigned short)pixelsv2); | |
1736 temp3 = vec_add(pixelssum1, pixelssum2); | |
1737 temp3 = vec_sra(temp3, vctwo); | |
1738 pixelssum1 = vec_add(pixelssum2, vctwo); | |
1739 pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); | |
1740 | |
1741 if (rightside) | |
1742 { | |
1743 blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); | |
1744 } | |
1745 else | |
1746 { | |
1747 blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); | |
1748 } | |
1749 | |
1750 blockv = vec_avg(blocktemp, blockv); | |
1751 vec_st(blockv, 0, block); | |
1752 | |
1753 block += line_size; | |
1754 pixels += line_size; | |
1755 } | |
1756 | |
1757 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); | |
1758 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
1759 } |