Mercurial > libavcodec.hg
comparison ppc/h264_altivec.c @ 8530:992e2f8bfba7 libavcodec
AltiVec version of h264_idct(8)_dc_add
Patch by David Conrad %lessen42 A gmail P com%
author | gpoirier |
---|---|
date | Mon, 05 Jan 2009 22:13:42 +0000 |
parents | 1615d6b75ada |
children | 961e40a13102 |
comparison
equal
deleted
inserted
replaced
8529:1cb8c8e14a0a | 8530:992e2f8bfba7 |
---|---|
594 ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel); | 594 ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel); |
595 ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel); | 595 ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel); |
596 ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel); | 596 ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel); |
597 } | 597 } |
598 | 598 |
599 // TODO: implement this in AltiVec | 599 static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *block, int stride, int size) |
600 static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride) { | 600 { |
601 int i, j; | 601 vec_s16 dc16; |
602 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | 602 vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner; |
603 int dc = (block[0] + 32) >> 6; | 603 LOAD_ZERO; |
604 for( j = 0; j < 8; j++ ) | 604 DECLARE_ALIGNED_16(int, dc); |
605 { | 605 int i; |
606 for( i = 0; i < 8; i++ ) | 606 |
607 dst[i] = cm[ dst[i] + dc ]; | 607 dc = (block[0] + 32) >> 6; |
608 dst += stride; | 608 dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1); |
609 | |
610 if (size == 4) | |
611 dc16 = vec_sld(dc16, zero_s16v, 8); | |
612 dcplus = vec_packsu(dc16, zero_s16v); | |
613 dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v); | |
614 | |
615 aligner = vec_lvsr(0, dst); | |
616 dcplus = vec_perm(dcplus, dcplus, aligner); | |
617 dcminus = vec_perm(dcminus, dcminus, aligner); | |
618 | |
619 for (i = 0; i < size; i += 4) { | |
620 v0 = vec_ld(0, dst+0*stride); | |
621 v1 = vec_ld(0, dst+1*stride); | |
622 v2 = vec_ld(0, dst+2*stride); | |
623 v3 = vec_ld(0, dst+3*stride); | |
624 | |
625 v0 = vec_adds(v0, dcplus); | |
626 v1 = vec_adds(v1, dcplus); | |
627 v2 = vec_adds(v2, dcplus); | |
628 v3 = vec_adds(v3, dcplus); | |
629 | |
630 v0 = vec_subs(v0, dcminus); | |
631 v1 = vec_subs(v1, dcminus); | |
632 v2 = vec_subs(v2, dcminus); | |
633 v3 = vec_subs(v3, dcminus); | |
634 | |
635 vec_st(v0, 0, dst+0*stride); | |
636 vec_st(v1, 0, dst+1*stride); | |
637 vec_st(v2, 0, dst+2*stride); | |
638 vec_st(v3, 0, dst+3*stride); | |
639 | |
640 dst += 4*stride; | |
609 } | 641 } |
642 } | |
643 | |
644 static void h264_idct_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride) | |
645 { | |
646 h264_idct_dc_add_internal(dst, block, stride, 4); | |
647 } | |
648 | |
649 static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride) | |
650 { | |
651 h264_idct_dc_add_internal(dst, block, stride, 8); | |
610 } | 652 } |
611 | 653 |
612 static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ | 654 static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ |
613 int i; | 655 int i; |
614 for(i=0; i<16; i+=4){ | 656 for(i=0; i<16; i+=4){ |
901 c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec; | 943 c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec; |
902 /* ff_h264_idct_add_altivec may be re-enabled once AltiVec versions of | 944 /* ff_h264_idct_add_altivec may be re-enabled once AltiVec versions of |
903 h264_idct_add16, h264_idct_add16intra, h264_idct_add8 are implemented | 945 h264_idct_add16, h264_idct_add16intra, h264_idct_add8 are implemented |
904 c->h264_idct_add = ff_h264_idct_add_altivec; | 946 c->h264_idct_add = ff_h264_idct_add_altivec; |
905 */ | 947 */ |
948 c->h264_idct_dc_add= h264_idct_dc_add_altivec; | |
949 c->h264_idct8_dc_add = ff_h264_idct8_dc_add_altivec; | |
906 c->h264_idct8_add = ff_h264_idct8_add_altivec; | 950 c->h264_idct8_add = ff_h264_idct8_add_altivec; |
907 c->h264_idct8_add4 = ff_h264_idct8_add4_altivec; | 951 c->h264_idct8_add4 = ff_h264_idct8_add4_altivec; |
908 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec; | 952 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec; |
909 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec; | 953 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec; |
910 | 954 |