Mercurial > libavcodec.hg
comparison x86/h264dsp_mmx.c @ 11369:98970e51365a libavcodec
Remove DECLARE_ALIGNED_{8,16} macros
These macros are redundant. All uses are replaced with the generic
DECLARE_ALIGNED macro instead.
author | mru |
---|---|
date | Sat, 06 Mar 2010 14:24:59 +0000 |
parents | aa10bb3c244c |
children | 731050abce41 |
comparison
equal
deleted
inserted
replaced
11368:3d4f64b8fb10 | 11369:98970e51365a |
---|---|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 */ | 19 */ |
20 | 20 |
21 #include "dsputil_mmx.h" | 21 #include "dsputil_mmx.h" |
22 | 22 |
23 DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL; | 23 DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL; |
24 DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL; | 24 DECLARE_ALIGNED(8, static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL; |
25 | 25 |
26 /***********************************/ | 26 /***********************************/ |
27 /* IDCT */ | 27 /* IDCT */ |
28 | 28 |
29 #define SUMSUB_BADC( a, b, c, d ) \ | 29 #define SUMSUB_BADC( a, b, c, d ) \ |
155 } | 155 } |
156 | 156 |
157 static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | 157 static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) |
158 { | 158 { |
159 int i; | 159 int i; |
160 DECLARE_ALIGNED_8(int16_t, b2)[64]; | 160 DECLARE_ALIGNED(8, int16_t, b2)[64]; |
161 | 161 |
162 block[0] += 32; | 162 block[0] += 32; |
163 | 163 |
164 for(i=0; i<2; i++){ | 164 for(i=0; i<2; i++){ |
165 DECLARE_ALIGNED_8(uint64_t, tmp); | 165 DECLARE_ALIGNED(8, uint64_t, tmp); |
166 | 166 |
167 h264_idct8_1d(block+4*i); | 167 h264_idct8_1d(block+4*i); |
168 | 168 |
169 __asm__ volatile( | 169 __asm__ volatile( |
170 "movq %%mm7, %0 \n\t" | 170 "movq %%mm7, %0 \n\t" |
626 "pminub "#tc0", "#q2" \n\t"\ | 626 "pminub "#tc0", "#q2" \n\t"\ |
627 "movq "#q2", "q1addr" \n\t" | 627 "movq "#q2", "q1addr" \n\t" |
628 | 628 |
629 static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) | 629 static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) |
630 { | 630 { |
631 DECLARE_ALIGNED_8(uint64_t, tmp0)[2]; | 631 DECLARE_ALIGNED(8, uint64_t, tmp0)[2]; |
632 | 632 |
633 __asm__ volatile( | 633 __asm__ volatile( |
634 "movq (%2,%4), %%mm0 \n\t" //p1 | 634 "movq (%2,%4), %%mm0 \n\t" //p1 |
635 "movq (%2,%4,2), %%mm1 \n\t" //p0 | 635 "movq (%2,%4,2), %%mm1 \n\t" //p0 |
636 "movq (%3), %%mm2 \n\t" //q0 | 636 "movq (%3), %%mm2 \n\t" //q0 |
688 } | 688 } |
689 static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | 689 static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
690 { | 690 { |
691 //FIXME: could cut some load/stores by merging transpose with filter | 691 //FIXME: could cut some load/stores by merging transpose with filter |
692 // also, it only needs to transpose 6x8 | 692 // also, it only needs to transpose 6x8 |
693 DECLARE_ALIGNED_8(uint8_t, trans)[8*8]; | 693 DECLARE_ALIGNED(8, uint8_t, trans)[8*8]; |
694 int i; | 694 int i; |
695 for(i=0; i<2; i++, pix+=8*stride, tc0+=2) { | 695 for(i=0; i<2; i++, pix+=8*stride, tc0+=2) { |
696 if((tc0[0] & tc0[1]) < 0) | 696 if((tc0[0] & tc0[1]) < 0) |
697 continue; | 697 continue; |
698 transpose4x4(trans, pix-4, 8, stride); | 698 transpose4x4(trans, pix-4, 8, stride); |
732 } | 732 } |
733 | 733 |
734 static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | 734 static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
735 { | 735 { |
736 //FIXME: could cut some load/stores by merging transpose with filter | 736 //FIXME: could cut some load/stores by merging transpose with filter |
737 DECLARE_ALIGNED_8(uint8_t, trans)[8*4]; | 737 DECLARE_ALIGNED(8, uint8_t, trans)[8*4]; |
738 transpose4x4(trans, pix-2, 8, stride); | 738 transpose4x4(trans, pix-2, 8, stride); |
739 transpose4x4(trans+4, pix-2+4*stride, 8, stride); | 739 transpose4x4(trans+4, pix-2+4*stride, 8, stride); |
740 h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0); | 740 h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0); |
741 transpose4x4(pix-2, trans, stride, 8); | 741 transpose4x4(pix-2, trans, stride, 8); |
742 transpose4x4(pix-2+4*stride, trans+4, stride, 8); | 742 transpose4x4(pix-2+4*stride, trans+4, stride, 8); |
782 } | 782 } |
783 | 783 |
784 static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta) | 784 static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta) |
785 { | 785 { |
786 //FIXME: could cut some load/stores by merging transpose with filter | 786 //FIXME: could cut some load/stores by merging transpose with filter |
787 DECLARE_ALIGNED_8(uint8_t, trans)[8*4]; | 787 DECLARE_ALIGNED(8, uint8_t, trans)[8*4]; |
788 transpose4x4(trans, pix-2, 8, stride); | 788 transpose4x4(trans, pix-2, 8, stride); |
789 transpose4x4(trans+4, pix-2+4*stride, 8, stride); | 789 transpose4x4(trans+4, pix-2+4*stride, 8, stride); |
790 h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1); | 790 h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1); |
791 transpose4x4(pix-2, trans, stride, 8); | 791 transpose4x4(pix-2, trans, stride, 8); |
792 transpose4x4(pix-2+4*stride, trans+4, stride, 8); | 792 transpose4x4(pix-2+4*stride, trans+4, stride, 8); |
813 // could do a special case for dir==0 && edges==1, but it only reduces the | 813 // could do a special case for dir==0 && edges==1, but it only reduces the |
814 // average filter time by 1.2% | 814 // average filter time by 1.2% |
815 for( dir=1; dir>=0; dir-- ) { | 815 for( dir=1; dir>=0; dir-- ) { |
816 const x86_reg d_idx = dir ? -8 : -1; | 816 const x86_reg d_idx = dir ? -8 : -1; |
817 const int mask_mv = dir ? mask_mv1 : mask_mv0; | 817 const int mask_mv = dir ? mask_mv1 : mask_mv0; |
818 DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL; | 818 DECLARE_ALIGNED(8, const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL; |
819 int b_idx, edge; | 819 int b_idx, edge; |
820 for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) { | 820 for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) { |
821 __asm__ volatile( | 821 __asm__ volatile( |
822 "pand %0, %%mm0 \n\t" | 822 "pand %0, %%mm0 \n\t" |
823 ::"m"(mask_dir) | 823 ::"m"(mask_dir) |
2104 H264_MC_816(H264_MC_H, ssse3) | 2104 H264_MC_816(H264_MC_H, ssse3) |
2105 H264_MC_816(H264_MC_HV, ssse3) | 2105 H264_MC_816(H264_MC_HV, ssse3) |
2106 #endif | 2106 #endif |
2107 | 2107 |
2108 /* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */ | 2108 /* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */ |
2109 DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg)[4] = { | 2109 DECLARE_ALIGNED(8, static const uint64_t, h264_rnd_reg)[4] = { |
2110 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL | 2110 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL |
2111 }; | 2111 }; |
2112 | 2112 |
2113 #define H264_CHROMA_OP(S,D) | 2113 #define H264_CHROMA_OP(S,D) |
2114 #define H264_CHROMA_OP4(S,D,T) | 2114 #define H264_CHROMA_OP4(S,D,T) |