comparison x86/h264dsp_mmx.c @ 10961:34a65026fa06 libavcodec

Move array specifiers outside DECLARE_ALIGNED() invocations
author mru
date Fri, 22 Jan 2010 03:25:11 +0000
parents fdddf3d4238f
children aa10bb3c244c
comparison
equal deleted inserted replaced
10960:10759fd39860 10961:34a65026fa06
155 } 155 }
156 156
157 static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) 157 static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
158 { 158 {
159 int i; 159 int i;
160 DECLARE_ALIGNED_8(int16_t, b2[64]); 160 DECLARE_ALIGNED_8(int16_t, b2)[64];
161 161
162 block[0] += 32; 162 block[0] += 32;
163 163
164 for(i=0; i<2; i++){ 164 for(i=0; i<2; i++){
165 DECLARE_ALIGNED_8(uint64_t, tmp); 165 DECLARE_ALIGNED_8(uint64_t, tmp);
626 "pminub "#tc0", "#q2" \n\t"\ 626 "pminub "#tc0", "#q2" \n\t"\
627 "movq "#q2", "q1addr" \n\t" 627 "movq "#q2", "q1addr" \n\t"
628 628
629 static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) 629 static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
630 { 630 {
631 DECLARE_ALIGNED_8(uint64_t, tmp0[2]); 631 DECLARE_ALIGNED_8(uint64_t, tmp0)[2];
632 632
633 __asm__ volatile( 633 __asm__ volatile(
634 "movq (%2,%4), %%mm0 \n\t" //p1 634 "movq (%2,%4), %%mm0 \n\t" //p1
635 "movq (%2,%4,2), %%mm1 \n\t" //p0 635 "movq (%2,%4,2), %%mm1 \n\t" //p0
636 "movq (%3), %%mm2 \n\t" //q0 636 "movq (%3), %%mm2 \n\t" //q0
688 } 688 }
689 static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 689 static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
690 { 690 {
691 //FIXME: could cut some load/stores by merging transpose with filter 691 //FIXME: could cut some load/stores by merging transpose with filter
692 // also, it only needs to transpose 6x8 692 // also, it only needs to transpose 6x8
693 DECLARE_ALIGNED_8(uint8_t, trans[8*8]); 693 DECLARE_ALIGNED_8(uint8_t, trans)[8*8];
694 int i; 694 int i;
695 for(i=0; i<2; i++, pix+=8*stride, tc0+=2) { 695 for(i=0; i<2; i++, pix+=8*stride, tc0+=2) {
696 if((tc0[0] & tc0[1]) < 0) 696 if((tc0[0] & tc0[1]) < 0)
697 continue; 697 continue;
698 transpose4x4(trans, pix-4, 8, stride); 698 transpose4x4(trans, pix-4, 8, stride);
732 } 732 }
733 733
734 static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 734 static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
735 { 735 {
736 //FIXME: could cut some load/stores by merging transpose with filter 736 //FIXME: could cut some load/stores by merging transpose with filter
737 DECLARE_ALIGNED_8(uint8_t, trans[8*4]); 737 DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
738 transpose4x4(trans, pix-2, 8, stride); 738 transpose4x4(trans, pix-2, 8, stride);
739 transpose4x4(trans+4, pix-2+4*stride, 8, stride); 739 transpose4x4(trans+4, pix-2+4*stride, 8, stride);
740 h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0); 740 h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0);
741 transpose4x4(pix-2, trans, stride, 8); 741 transpose4x4(pix-2, trans, stride, 8);
742 transpose4x4(pix-2+4*stride, trans+4, stride, 8); 742 transpose4x4(pix-2+4*stride, trans+4, stride, 8);
782 } 782 }
783 783
784 static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta) 784 static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
785 { 785 {
786 //FIXME: could cut some load/stores by merging transpose with filter 786 //FIXME: could cut some load/stores by merging transpose with filter
787 DECLARE_ALIGNED_8(uint8_t, trans[8*4]); 787 DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
788 transpose4x4(trans, pix-2, 8, stride); 788 transpose4x4(trans, pix-2, 8, stride);
789 transpose4x4(trans+4, pix-2+4*stride, 8, stride); 789 transpose4x4(trans+4, pix-2+4*stride, 8, stride);
790 h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1); 790 h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1);
791 transpose4x4(pix-2, trans, stride, 8); 791 transpose4x4(pix-2, trans, stride, 8);
792 transpose4x4(pix-2+4*stride, trans+4, stride, 8); 792 transpose4x4(pix-2+4*stride, trans+4, stride, 8);
1972 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\ 1972 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
1973 }\ 1973 }\
1974 1974
1975 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \ 1975 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \
1976 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 1976 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1977 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ 1977 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
1978 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 1978 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
1979 OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\ 1979 OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
1980 }\ 1980 }\
1981 \ 1981 \
1982 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 1982 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1983 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\ 1983 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
1984 }\ 1984 }\
1985 \ 1985 \
1986 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 1986 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1987 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ 1987 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
1988 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 1988 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
1989 OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\ 1989 OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
1990 }\ 1990 }\
1991 1991
1992 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \ 1992 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \
1993 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 1993 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1994 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ 1994 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
1995 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 1995 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
1996 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ 1996 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
1997 }\ 1997 }\
1998 \ 1998 \
1999 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 1999 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2000 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ 2000 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
2001 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ 2001 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
2002 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ 2002 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
2003 }\ 2003 }\
2004 \ 2004 \
2005 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2005 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2006 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ 2006 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
2007 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 2007 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
2008 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ 2008 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
2009 }\ 2009 }\
2010 \ 2010 \
2011 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2011 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2012 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ 2012 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
2013 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ 2013 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
2014 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ 2014 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
2015 }\ 2015 }\
2016 \ 2016 \
2017 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2017 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2018 DECLARE_ALIGNED(ALIGN, uint16_t, temp[SIZE*(SIZE<8?12:24)]);\ 2018 DECLARE_ALIGNED(ALIGN, uint16_t, temp)[SIZE*(SIZE<8?12:24)];\
2019 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\ 2019 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\
2020 }\ 2020 }\
2021 \ 2021 \
2022 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2022 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2023 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ 2023 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
2024 uint8_t * const halfHV= temp;\ 2024 uint8_t * const halfHV= temp;\
2025 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 2025 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
2026 assert(((int)temp & 7) == 0);\ 2026 assert(((int)temp & 7) == 0);\
2027 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 2027 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
2028 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\ 2028 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
2029 }\ 2029 }\
2030 \ 2030 \
2031 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2031 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2032 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ 2032 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
2033 uint8_t * const halfHV= temp;\ 2033 uint8_t * const halfHV= temp;\
2034 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 2034 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
2035 assert(((int)temp & 7) == 0);\ 2035 assert(((int)temp & 7) == 0);\
2036 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 2036 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
2037 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\ 2037 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
2038 }\ 2038 }\
2039 \ 2039 \
2040 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2040 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2041 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ 2041 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
2042 uint8_t * const halfHV= temp;\ 2042 uint8_t * const halfHV= temp;\
2043 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 2043 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
2044 assert(((int)temp & 7) == 0);\ 2044 assert(((int)temp & 7) == 0);\
2045 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 2045 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
2046 OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\ 2046 OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
2047 }\ 2047 }\
2048 \ 2048 \
2049 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2049 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2050 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ 2050 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
2051 uint8_t * const halfHV= temp;\ 2051 uint8_t * const halfHV= temp;\
2052 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 2052 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
2053 assert(((int)temp & 7) == 0);\ 2053 assert(((int)temp & 7) == 0);\
2054 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 2054 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
2055 OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\ 2055 OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
2108 H264_MC_816(H264_MC_H, ssse3) 2108 H264_MC_816(H264_MC_H, ssse3)
2109 H264_MC_816(H264_MC_HV, ssse3) 2109 H264_MC_816(H264_MC_HV, ssse3)
2110 #endif 2110 #endif
2111 2111
2112 /* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */ 2112 /* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */
2113 DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg[4]) = { 2113 DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg)[4] = {
2114 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL 2114 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL
2115 }; 2115 };
2116 2116
2117 #define H264_CHROMA_OP(S,D) 2117 #define H264_CHROMA_OP(S,D)
2118 #define H264_CHROMA_OP4(S,D,T) 2118 #define H264_CHROMA_OP4(S,D,T)