# HG changeset patch # User michael # Date 1086474577 0 # Node ID 4c663228e02082ed973d5bed1d2ec2b5b21ee09c # Parent 31bf68b1792be244b4e2c3d464ce206792fe93f4 avg_pixels8_xy2_altivec in AltiVec, enabling avg_pixels8_altivec, hadamard fix by (Romain Dolbeau ) diff -r 31bf68b1792b -r 4c663228e020 ppc/dsputil_altivec.c --- a/ppc/dsputil_altivec.c Sat Jun 05 22:26:03 2004 +0000 +++ b/ppc/dsputil_altivec.c Sat Jun 05 22:29:37 2004 +0000 @@ -1647,3 +1647,113 @@ #endif /* CONFIG_DARWIN */ return 0; } + +/* next one assumes that ((line_size % 8) == 0) */ +void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ +POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + + int j; +POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); + for (j = 0; j < 2; j++) { + int i; + const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + const uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); + uint32_t l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; + uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + uint32_t l1, h1; + pixels += line_size; + for (i = 0; i < h; i += 2) { + uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); + l1 = (a & 0x03030303UL) + (b & 0x03030303UL); + h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); + pixels += line_size; + block += line_size; + a = (((const struct unaligned_32 *) (pixels))->l); + b = (((const struct unaligned_32 *) (pixels + 1))->l); + l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; + h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); + pixels += line_size; + block += line_size; + } pixels += 4 - line_size * (h + 1); + block += 4 - line_size * h; + } +POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register int i; + register vector unsigned char + pixelsv1, pixelsv2, + pixelsavg; + register vector unsigned char + blockv, temp1, temp2, blocktemp; + register vector unsigned short + pixelssum1, pixelssum2, temp3; + register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); + register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); + + temp1 = vec_ld(0, pixels); + temp2 = vec_ld(16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); + if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); + } + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum1 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + pixelssum1 = vec_add(pixelssum1, vctwo); + +POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); + for (i = 0; i < h ; i++) { + int rightside = ((unsigned long)block & 0x0000000F); + blockv = vec_ld(0, block); + + temp1 = vec_ld(line_size, pixels); + temp2 = vec_ld(line_size + 16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); + if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); + } + + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum2 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + temp3 = vec_add(pixelssum1, pixelssum2); + temp3 = vec_sra(temp3, vctwo); + pixelssum1 = vec_add(pixelssum2, vctwo); + pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); + + if (rightside) + { + blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); + } + else + { + blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); + } + + blockv = vec_avg(blocktemp, blockv); + vec_st(blockv, 0, block); + + block += line_size; + pixels += line_size; + } + +POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} diff -r 31bf68b1792b -r 4c663228e020 ppc/dsputil_altivec.h --- a/ppc/dsputil_altivec.h Sat Jun 05 22:26:03 2004 +0000 +++ b/ppc/dsputil_altivec.h Sat Jun 05 22:29:37 2004 +0000 @@ -48,6 +48,7 @@ extern void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h); extern int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h); extern int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h); +extern void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder); diff -r 31bf68b1792b -r 4c663228e020 ppc/dsputil_ppc.c --- a/ppc/dsputil_ppc.c Sat Jun 05 22:26:03 2004 +0000 +++ b/ppc/dsputil_ppc.c Sat Jun 05 22:29:37 2004 +0000 @@ -62,6 +62,7 @@ "put_no_rnd_pixels16_xy2_altivec", "hadamard8_diff8x8_altivec", "hadamard8_diff16_altivec", + "avg_pixels8_xy2_altivec", "clear_blocks_dcbz32_ppc", "clear_blocks_dcbz128_ppc" }; @@ -268,10 +269,8 @@ /* the two functions do the same thing, so use the same code */ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec; c->avg_pixels_tab[0][0] = avg_pixels16_altivec; -// next one disabled as it's untested. -#if 0 c->avg_pixels_tab[1][0] = avg_pixels8_altivec; -#endif /* 0 */ + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec; c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; diff -r 31bf68b1792b -r 4c663228e020 ppc/dsputil_ppc.h --- a/ppc/dsputil_ppc.h Sat Jun 05 22:26:03 2004 +0000 +++ b/ppc/dsputil_ppc.h Sat Jun 05 22:29:37 2004 +0000 @@ -52,6 +52,7 @@ altivec_put_no_rnd_pixels16_xy2_num, altivec_hadamard8_diff8x8_num, altivec_hadamard8_diff16_num, + altivec_avg_pixels8_xy2_num, powerpc_clear_blocks_dcbz32, powerpc_clear_blocks_dcbz128, powerpc_perf_total