Mercurial > libavcodec.hg
changeset 8531:961e40a13102 libavcodec
add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
Patch by David Conrad %lessen42 A gmail P com%
author | gpoirier |
---|---|
date | Mon, 05 Jan 2009 23:15:14 +0000 |
parents | 992e2f8bfba7 |
children | 598a77fbc614 |
files | ppc/h264_altivec.c |
diffstat | 1 files changed, 82 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/ppc/h264_altivec.c Mon Jan 05 22:13:42 2009 +0000 +++ b/ppc/h264_altivec.c Mon Jan 05 23:15:14 2009 +0000 @@ -935,6 +935,82 @@ write16x4(pix-2, stride, line1, line2, line3, line4); } + +static av_always_inline +void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, + int weightd, int weights, int offset, int w, int h) +{ + int y, dst_aligned, src_aligned; + vec_u8 vsrc, vdst; + vec_u16 vtemp, vlog2_denom, vweights, vweightd, voffset, v0, v1, v2, v3; + DECLARE_ALIGNED_16(int32_t, temp[4]); + LOAD_ZERO; + + offset = ((offset + 1) | 1) << log2_denom; + temp[0] = log2_denom+1; + temp[1] = weights; + temp[2] = weightd; + temp[3] = offset; + + vtemp = vec_ld(0, temp); + vlog2_denom = vec_splat(vtemp, 1); + vweights = vec_splat(vtemp, 3); + vweightd = vec_splat(vtemp, 5); + voffset = vec_splat(vtemp, 7); + dst_aligned = !((unsigned long)dst & 0xf); + src_aligned = !((unsigned long)src & 0xf); + + for (y=0; y<h; y++) { + vdst = vec_ld(0, dst); + vsrc = vec_ld(0, src); + + v0 = vec_mergeh(zero_u8v, vdst); + v1 = vec_mergel(zero_u8v, vdst); + v2 = vec_mergeh(zero_u8v, vsrc); + v3 = vec_mergel(zero_u8v, vsrc); + + if (w == 8) { + if (src_aligned) + v3 = v2; + else + v2 = v3; + } + + if (w == 16 || dst_aligned) { + v0 = vec_mladd(v0, vweightd, zero_u16v); + v2 = vec_mladd(v2, vweights, zero_u16v); + + v0 = vec_adds(v0, voffset); + v0 = vec_adds(v0, v2); + v0 = vec_sra(v0, vlog2_denom); + } + if (w == 16 || !dst_aligned) { + v1 = vec_mladd(v1, vweightd, zero_u16v); + v3 = vec_mladd(v3, vweights, zero_u16v); + + v1 = vec_adds(v1, voffset); + v1 = vec_adds(v1, v3); + v1 = vec_sra(v1, vlog2_denom); + } + vdst = vec_packsu(v0, v1); + vec_st(vdst, 0, dst); + + dst += stride; + src += stride; + } +} + +#define H264_WEIGHT(W,H) \ +static void ff_biweight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \ + biweight_h264_WxH_altivec(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \ +} + +H264_WEIGHT(16,16) +H264_WEIGHT(16, 8) +H264_WEIGHT( 8,16) +H264_WEIGHT( 8, 8) +H264_WEIGHT( 8, 4) + void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { if (has_altivec()) { @@ -973,5 +1049,11 @@ dspfunc(put_h264_qpel, 0, 16); dspfunc(avg_h264_qpel, 0, 16); #undef dspfunc + + c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec; + c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec; + c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec; + c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec; + c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec; } }