Mercurial > libavcodec.hg
comparison x86/h264dsp_mmx.c @ 10961:34a65026fa06 libavcodec
Move array specifiers outside DECLARE_ALIGNED() invocations
author | mru |
---|---|
date | Fri, 22 Jan 2010 03:25:11 +0000 |
parents | fdddf3d4238f |
children | aa10bb3c244c |
comparison
equal
deleted
inserted
replaced
10960:10759fd39860 | 10961:34a65026fa06 |
---|---|
155 } | 155 } |
156 | 156 |
157 static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | 157 static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) |
158 { | 158 { |
159 int i; | 159 int i; |
160 DECLARE_ALIGNED_8(int16_t, b2[64]); | 160 DECLARE_ALIGNED_8(int16_t, b2)[64]; |
161 | 161 |
162 block[0] += 32; | 162 block[0] += 32; |
163 | 163 |
164 for(i=0; i<2; i++){ | 164 for(i=0; i<2; i++){ |
165 DECLARE_ALIGNED_8(uint64_t, tmp); | 165 DECLARE_ALIGNED_8(uint64_t, tmp); |
626 "pminub "#tc0", "#q2" \n\t"\ | 626 "pminub "#tc0", "#q2" \n\t"\ |
627 "movq "#q2", "q1addr" \n\t" | 627 "movq "#q2", "q1addr" \n\t" |
628 | 628 |
629 static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) | 629 static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) |
630 { | 630 { |
631 DECLARE_ALIGNED_8(uint64_t, tmp0[2]); | 631 DECLARE_ALIGNED_8(uint64_t, tmp0)[2]; |
632 | 632 |
633 __asm__ volatile( | 633 __asm__ volatile( |
634 "movq (%2,%4), %%mm0 \n\t" //p1 | 634 "movq (%2,%4), %%mm0 \n\t" //p1 |
635 "movq (%2,%4,2), %%mm1 \n\t" //p0 | 635 "movq (%2,%4,2), %%mm1 \n\t" //p0 |
636 "movq (%3), %%mm2 \n\t" //q0 | 636 "movq (%3), %%mm2 \n\t" //q0 |
688 } | 688 } |
689 static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | 689 static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
690 { | 690 { |
691 //FIXME: could cut some load/stores by merging transpose with filter | 691 //FIXME: could cut some load/stores by merging transpose with filter |
692 // also, it only needs to transpose 6x8 | 692 // also, it only needs to transpose 6x8 |
693 DECLARE_ALIGNED_8(uint8_t, trans[8*8]); | 693 DECLARE_ALIGNED_8(uint8_t, trans)[8*8]; |
694 int i; | 694 int i; |
695 for(i=0; i<2; i++, pix+=8*stride, tc0+=2) { | 695 for(i=0; i<2; i++, pix+=8*stride, tc0+=2) { |
696 if((tc0[0] & tc0[1]) < 0) | 696 if((tc0[0] & tc0[1]) < 0) |
697 continue; | 697 continue; |
698 transpose4x4(trans, pix-4, 8, stride); | 698 transpose4x4(trans, pix-4, 8, stride); |
732 } | 732 } |
733 | 733 |
734 static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | 734 static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
735 { | 735 { |
736 //FIXME: could cut some load/stores by merging transpose with filter | 736 //FIXME: could cut some load/stores by merging transpose with filter |
737 DECLARE_ALIGNED_8(uint8_t, trans[8*4]); | 737 DECLARE_ALIGNED_8(uint8_t, trans)[8*4]; |
738 transpose4x4(trans, pix-2, 8, stride); | 738 transpose4x4(trans, pix-2, 8, stride); |
739 transpose4x4(trans+4, pix-2+4*stride, 8, stride); | 739 transpose4x4(trans+4, pix-2+4*stride, 8, stride); |
740 h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0); | 740 h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0); |
741 transpose4x4(pix-2, trans, stride, 8); | 741 transpose4x4(pix-2, trans, stride, 8); |
742 transpose4x4(pix-2+4*stride, trans+4, stride, 8); | 742 transpose4x4(pix-2+4*stride, trans+4, stride, 8); |
782 } | 782 } |
783 | 783 |
784 static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta) | 784 static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta) |
785 { | 785 { |
786 //FIXME: could cut some load/stores by merging transpose with filter | 786 //FIXME: could cut some load/stores by merging transpose with filter |
787 DECLARE_ALIGNED_8(uint8_t, trans[8*4]); | 787 DECLARE_ALIGNED_8(uint8_t, trans)[8*4]; |
788 transpose4x4(trans, pix-2, 8, stride); | 788 transpose4x4(trans, pix-2, 8, stride); |
789 transpose4x4(trans+4, pix-2+4*stride, 8, stride); | 789 transpose4x4(trans+4, pix-2+4*stride, 8, stride); |
790 h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1); | 790 h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1); |
791 transpose4x4(pix-2, trans, stride, 8); | 791 transpose4x4(pix-2, trans, stride, 8); |
792 transpose4x4(pix-2+4*stride, trans+4, stride, 8); | 792 transpose4x4(pix-2+4*stride, trans+4, stride, 8); |
1972 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\ | 1972 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\ |
1973 }\ | 1973 }\ |
1974 | 1974 |
1975 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \ | 1975 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \ |
1976 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 1976 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1977 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ | 1977 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ |
1978 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ | 1978 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ |
1979 OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\ | 1979 OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\ |
1980 }\ | 1980 }\ |
1981 \ | 1981 \ |
1982 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 1982 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1983 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\ | 1983 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\ |
1984 }\ | 1984 }\ |
1985 \ | 1985 \ |
1986 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 1986 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1987 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ | 1987 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ |
1988 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ | 1988 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ |
1989 OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\ | 1989 OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\ |
1990 }\ | 1990 }\ |
1991 | 1991 |
1992 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \ | 1992 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \ |
1993 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 1993 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1994 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ | 1994 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ |
1995 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ | 1995 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ |
1996 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ | 1996 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ |
1997 }\ | 1997 }\ |
1998 \ | 1998 \ |
1999 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 1999 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2000 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ | 2000 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ |
2001 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ | 2001 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ |
2002 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ | 2002 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ |
2003 }\ | 2003 }\ |
2004 \ | 2004 \ |
2005 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2005 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2006 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ | 2006 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ |
2007 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ | 2007 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ |
2008 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ | 2008 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ |
2009 }\ | 2009 }\ |
2010 \ | 2010 \ |
2011 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2011 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2012 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ | 2012 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ |
2013 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ | 2013 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ |
2014 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ | 2014 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ |
2015 }\ | 2015 }\ |
2016 \ | 2016 \ |
2017 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2017 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2018 DECLARE_ALIGNED(ALIGN, uint16_t, temp[SIZE*(SIZE<8?12:24)]);\ | 2018 DECLARE_ALIGNED(ALIGN, uint16_t, temp)[SIZE*(SIZE<8?12:24)];\ |
2019 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\ | 2019 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\ |
2020 }\ | 2020 }\ |
2021 \ | 2021 \ |
2022 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2022 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2023 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ | 2023 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ |
2024 uint8_t * const halfHV= temp;\ | 2024 uint8_t * const halfHV= temp;\ |
2025 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ | 2025 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ |
2026 assert(((int)temp & 7) == 0);\ | 2026 assert(((int)temp & 7) == 0);\ |
2027 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ | 2027 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ |
2028 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\ | 2028 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\ |
2029 }\ | 2029 }\ |
2030 \ | 2030 \ |
2031 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2031 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2032 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ | 2032 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ |
2033 uint8_t * const halfHV= temp;\ | 2033 uint8_t * const halfHV= temp;\ |
2034 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ | 2034 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ |
2035 assert(((int)temp & 7) == 0);\ | 2035 assert(((int)temp & 7) == 0);\ |
2036 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ | 2036 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ |
2037 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\ | 2037 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\ |
2038 }\ | 2038 }\ |
2039 \ | 2039 \ |
2040 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2040 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2041 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ | 2041 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ |
2042 uint8_t * const halfHV= temp;\ | 2042 uint8_t * const halfHV= temp;\ |
2043 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ | 2043 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ |
2044 assert(((int)temp & 7) == 0);\ | 2044 assert(((int)temp & 7) == 0);\ |
2045 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ | 2045 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ |
2046 OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\ | 2046 OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\ |
2047 }\ | 2047 }\ |
2048 \ | 2048 \ |
2049 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2049 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2050 DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ | 2050 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ |
2051 uint8_t * const halfHV= temp;\ | 2051 uint8_t * const halfHV= temp;\ |
2052 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ | 2052 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ |
2053 assert(((int)temp & 7) == 0);\ | 2053 assert(((int)temp & 7) == 0);\ |
2054 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ | 2054 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ |
2055 OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\ | 2055 OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\ |
2108 H264_MC_816(H264_MC_H, ssse3) | 2108 H264_MC_816(H264_MC_H, ssse3) |
2109 H264_MC_816(H264_MC_HV, ssse3) | 2109 H264_MC_816(H264_MC_HV, ssse3) |
2110 #endif | 2110 #endif |
2111 | 2111 |
2112 /* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */ | 2112 /* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */ |
2113 DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg[4]) = { | 2113 DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg)[4] = { |
2114 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL | 2114 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL |
2115 }; | 2115 }; |
2116 | 2116 |
2117 #define H264_CHROMA_OP(S,D) | 2117 #define H264_CHROMA_OP(S,D) |
2118 #define H264_CHROMA_OP4(S,D,T) | 2118 #define H264_CHROMA_OP4(S,D,T) |