comparison ppc/dsputil_altivec.c @ 2057:4c663228e020 libavcodec

avg_pixels8_xy2_altivec in AltiVec, enabling avg_pixels8_altivec, hadamard fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
author michael
date Sat, 05 Jun 2004 22:29:37 +0000
parents 31bf68b1792b
children 4a0ec9031804
comparison
equal deleted inserted replaced
2056:31bf68b1792b 2057:4c663228e020
1645 } 1645 }
1646 } 1646 }
1647 #endif /* CONFIG_DARWIN */ 1647 #endif /* CONFIG_DARWIN */
1648 return 0; 1648 return 0;
1649 } 1649 }
1650
1651 /* next one assumes that ((line_size % 8) == 0) */
1652 void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
1653 {
1654 POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);
1655 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
1656
1657 int j;
1658 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
1659 for (j = 0; j < 2; j++) {
1660 int i;
1661 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1662 const uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
1663 uint32_t l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
1664 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1665 uint32_t l1, h1;
1666 pixels += line_size;
1667 for (i = 0; i < h; i += 2) {
1668 uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1669 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
1670 l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
1671 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1672 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1673 pixels += line_size;
1674 block += line_size;
1675 a = (((const struct unaligned_32 *) (pixels))->l);
1676 b = (((const struct unaligned_32 *) (pixels + 1))->l);
1677 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
1678 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1679 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1680 pixels += line_size;
1681 block += line_size;
1682 } pixels += 4 - line_size * (h + 1);
1683 block += 4 - line_size * h;
1684 }
1685 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
1686 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
1687 register int i;
1688 register vector unsigned char
1689 pixelsv1, pixelsv2,
1690 pixelsavg;
1691 register vector unsigned char
1692 blockv, temp1, temp2, blocktemp;
1693 register vector unsigned short
1694 pixelssum1, pixelssum2, temp3;
1695 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
1696 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
1697
1698 temp1 = vec_ld(0, pixels);
1699 temp2 = vec_ld(16, pixels);
1700 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
1701 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
1702 {
1703 pixelsv2 = temp2;
1704 }
1705 else
1706 {
1707 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
1708 }
1709 pixelsv1 = vec_mergeh(vczero, pixelsv1);
1710 pixelsv2 = vec_mergeh(vczero, pixelsv2);
1711 pixelssum1 = vec_add((vector unsigned short)pixelsv1,
1712 (vector unsigned short)pixelsv2);
1713 pixelssum1 = vec_add(pixelssum1, vctwo);
1714
1715 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
1716 for (i = 0; i < h ; i++) {
1717 int rightside = ((unsigned long)block & 0x0000000F);
1718 blockv = vec_ld(0, block);
1719
1720 temp1 = vec_ld(line_size, pixels);
1721 temp2 = vec_ld(line_size + 16, pixels);
1722 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
1723 if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F)
1724 {
1725 pixelsv2 = temp2;
1726 }
1727 else
1728 {
1729 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
1730 }
1731
1732 pixelsv1 = vec_mergeh(vczero, pixelsv1);
1733 pixelsv2 = vec_mergeh(vczero, pixelsv2);
1734 pixelssum2 = vec_add((vector unsigned short)pixelsv1,
1735 (vector unsigned short)pixelsv2);
1736 temp3 = vec_add(pixelssum1, pixelssum2);
1737 temp3 = vec_sra(temp3, vctwo);
1738 pixelssum1 = vec_add(pixelssum2, vctwo);
1739 pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
1740
1741 if (rightside)
1742 {
1743 blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
1744 }
1745 else
1746 {
1747 blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
1748 }
1749
1750 blockv = vec_avg(blocktemp, blockv);
1751 vec_st(blockv, 0, block);
1752
1753 block += line_size;
1754 pixels += line_size;
1755 }
1756
1757 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
1758 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1759 }