Mercurial > libavcodec.hg
diff ppc/dsputil_altivec.c @ 1015:35cf2f4a0f8c libavcodec
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author | michaelni |
---|---|
date | Sun, 19 Jan 2003 19:00:45 +0000 |
parents | 3b7cc8e4b83f |
children | 9cc1031e1864 |
line wrap: on
line diff
--- a/ppc/dsputil_altivec.c Sun Jan 19 18:30:29 2003 +0000 +++ b/ppc/dsputil_altivec.c Sun Jan 19 19:00:45 2003 +0000 @@ -24,22 +24,6 @@ #include <sys/sysctl.h> #endif -#ifdef ALTIVEC_TBL_PERFORMANCE_REPORT -unsigned long long perfdata[altivec_perf_total][altivec_data_total]; -/* list below must match enum in dsputil_altivec.h */ -static unsigned char* perfname[] = { - "fft_calc", - "gmc1", - "dct_unquantize_h263", - "idct_add", - "idct_put", - "put_pixels_clamped", - "put_pixels16", - "avg_pixels16" -}; -#include <stdio.h> -#endif - int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) { int i; @@ -648,12 +632,12 @@ void put_pixels_clamped_altivec(const DCTELEM *block, UINT8 *restrict pixels, int line_size) { -ALTIVEC_TBL_DECLARE(altivec_put_pixels_clamped_num, 1); +POWERPC_TBL_DECLARE(altivec_put_pixels_clamped_num, 1); #ifdef ALTIVEC_USE_REFERENCE_C_CODE int i; UINT8 *cm = cropTbl + MAX_NEG_CROP; -ALTIVEC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1); +POWERPC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1); /* read the pixels */ for(i=0;i<8;i++) { @@ -670,7 +654,7 @@ block += 8; } -ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1); +POWERPC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1); #else /* ALTIVEC_USE_REFERENCE_C_CODE */ register const vector short vczero = (const vector short)(0); @@ -681,7 +665,7 @@ pixelsv0, pixelsv1, pixelsv2, pixelsv3, pixelsv4, pixelsv0old, pixelsv4old; -ALTIVEC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1); +POWERPC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1); blockv0 = vec_ld(0, block); blockv1 = vec_ld(16, block); @@ -720,17 +704,17 @@ vec_st(pixelsv3, 48, pixels); } -ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1); +POWERPC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1); #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) { -ALTIVEC_TBL_DECLARE(altivec_put_pixels16_num, 1); +POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1); #ifdef ALTIVEC_USE_REFERENCE_C_CODE int i; -ALTIVEC_TBL_START_COUNT(altivec_put_pixels16_num, 1); +POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); for(i=0; i<h; i++) { *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l); @@ -741,25 +725,24 @@ block +=line_size; } -ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); +POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); #else /* ALTIVEC_USE_REFERENCE_C_CODE */ - - register vector unsigned char perm = vec_lvsl(0, pixels); register vector unsigned char pixelsv1, pixelsv2; int i; -ALTIVEC_TBL_START_COUNT(altivec_put_pixels16_num, 1); +POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); for(i=0; i<h; i++) { pixelsv1 = vec_ld(0, (unsigned char*)pixels); pixelsv2 = vec_ld(16, (unsigned char*)pixels); - vec_st(vec_perm(pixelsv1, pixelsv2, perm), 0, (unsigned char*)block); + vec_st(vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)), + 0, (unsigned char*)block); pixels+=line_size; block +=line_size; } -ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); +POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } @@ -767,11 +750,11 @@ #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) { -ALTIVEC_TBL_DECLARE(altivec_avg_pixels16_num, 1); +POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1); #ifdef ALTIVEC_USE_REFERENCE_C_CODE int i; -ALTIVEC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); +POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); for(i=0; i<h; i++) { op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l)); @@ -782,29 +765,207 @@ block +=line_size; } -ALTIVEC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); +POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); #else /* ALTIVEC_USE_REFERENCE_C_CODE */ - - register vector unsigned char perm = vec_lvsl(0, pixels); register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; int i; -ALTIVEC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); +POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); for(i=0; i<h; i++) { pixelsv1 = vec_ld(0, (unsigned char*)pixels); pixelsv2 = vec_ld(16, (unsigned char*)pixels); blockv = vec_ld(0, block); - pixelsv = vec_perm(pixelsv1, pixelsv2, perm); + pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)); blockv = vec_avg(blockv,pixelsv); vec_st(blockv, 0, (unsigned char*)block); pixels+=line_size; block +=line_size; } -ALTIVEC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); +POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); + +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + +void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, + int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int i; +POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); + for (i = 0; i < h; i++) { + *((uint32_t *) (block)) = + (((*((uint32_t *) (block))) | + ((((const struct unaligned_32 *) (pixels))->l))) - + ((((*((uint32_t *) (block))) ^ + ((((const struct unaligned_32 *) (pixels))-> + l))) & 0xFEFEFEFEUL) >> 1)); + *((uint32_t *) (block + 4)) = + (((*((uint32_t *) (block + 4))) | + ((((const struct unaligned_32 *) (pixels + 4))->l))) - + ((((*((uint32_t *) (block + 4))) ^ + ((((const struct unaligned_32 *) (pixels + + 4))-> + l))) & 0xFEFEFEFEUL) >> 1)); + pixels += line_size; + block += line_size; + } +POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; + int i; + +POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); + + for (i = 0; i < h; i++) { + /* + block is 8 bytes-aligned, so we're either in the + left block (16 bytes-aligned) or in the right block (not) + */ + int rightside = ((unsigned long)block & 0x0000000F); + + blockv = vec_ld(0, block); + pixelsv1 = vec_ld(0, (unsigned char*)pixels); + pixelsv2 = vec_ld(16, (unsigned char*)pixels); + pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)); + + if (rightside) + { + pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1)); + } + else + { + pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3)); + } + + blockv = vec_avg(blockv, pixelsv); + + vec_st(blockv, 0, block); + + pixels += line_size; + block += line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); + +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} +void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int j; +POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); + for (j = 0; j < 2; j++) { + int i; + const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + const uint32_t b = + (((const struct unaligned_32 *) (pixels + 1))->l); + uint32_t l0 = + (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; + uint32_t h0 = + ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + uint32_t l1, h1; + pixels += line_size; + for (i = 0; i < h; i += 2) { + uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); + l1 = (a & 0x03030303UL) + (b & 0x03030303UL); + h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + a = (((const struct unaligned_32 *) (pixels))->l); + b = (((const struct unaligned_32 *) (pixels + 1))->l); + l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; + h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + } pixels += 4 - line_size * (h + 1); + block += 4 - line_size * h; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register int i; + register vector unsigned char + pixelsv1, pixelsv2, + pixelsavg; + register vector unsigned char + blockv, temp1, temp2; + register vector unsigned short + pixelssum1, pixelssum2, temp3; + register const vector unsigned char vczero = (const vector unsigned char)(0); + register const vector unsigned short vctwo = (const vector unsigned short)(2); + + temp1 = vec_ld(0, pixels); + temp2 = vec_ld(16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); + if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); + } + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum1 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + pixelssum1 = vec_add(pixelssum1, vctwo); + +POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); + for (i = 0; i < h ; i++) { + int rightside = ((unsigned long)block & 0x0000000F); + blockv = vec_ld(0, block); + + temp1 = vec_ld(line_size, pixels); + temp2 = vec_ld(line_size + 16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); + if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); + } + + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum2 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + temp3 = vec_add(pixelssum1, pixelssum2); + temp3 = vec_sra(temp3, vctwo); + pixelssum1 = vec_add(pixelssum2, vctwo); + pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); + + if (rightside) + { + blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); + } + else + { + blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); + } + + vec_st(blockv, 0, block); + + block += line_size; + pixels += line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } @@ -822,22 +983,3 @@ #endif return 0; } - -#ifdef ALTIVEC_TBL_PERFORMANCE_REPORT -void altivec_display_perf_report(void) -{ - int i; - fprintf(stderr, "AltiVec performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n"); - for(i = 0 ; i < altivec_perf_total ; i++) - { - if (perfdata[i][altivec_data_num] != (unsigned long long)0) - fprintf(stderr, " Function \"%s\":\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", - perfname[i], - perfdata[i][altivec_data_min], - perfdata[i][altivec_data_max], - (double)perfdata[i][altivec_data_sum] / - (double)perfdata[i][altivec_data_num], - perfdata[i][altivec_data_num]); - } -} -#endif /* ALTIVEC_TBL_PERFORMANCE_REPORT */