comparison x86/dsputil_mmx.h @ 12450:3941687b4fa9 libavcodec

Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c, still #included in dsputil_mmx.c and is part of DSPContext, and h264dsp_mmx.c, which represents H264DSPContext and is now compiled on its own.
author rbultje
date Wed, 01 Sep 2010 20:48:59 +0000
parents fe78a4548d12
children f4355cd85faa
comparison
equal deleted inserted replaced
12449:3bca212d6f51 12450:3941687b4fa9
91 #define TRANSPOSE4(a,b,c,d,t)\ 91 #define TRANSPOSE4(a,b,c,d,t)\
92 SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\ 92 SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\
93 SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\ 93 SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\
94 SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\ 94 SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
95 SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */ 95 SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
96
97 static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
98 __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
99 "movd %4, %%mm0 \n\t"
100 "movd %5, %%mm1 \n\t"
101 "movd %6, %%mm2 \n\t"
102 "movd %7, %%mm3 \n\t"
103 "punpcklbw %%mm1, %%mm0 \n\t"
104 "punpcklbw %%mm3, %%mm2 \n\t"
105 "movq %%mm0, %%mm1 \n\t"
106 "punpcklwd %%mm2, %%mm0 \n\t"
107 "punpckhwd %%mm2, %%mm1 \n\t"
108 "movd %%mm0, %0 \n\t"
109 "punpckhdq %%mm0, %%mm0 \n\t"
110 "movd %%mm0, %1 \n\t"
111 "movd %%mm1, %2 \n\t"
112 "punpckhdq %%mm1, %%mm1 \n\t"
113 "movd %%mm1, %3 \n\t"
114
115 : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
116 "=m" (*(uint32_t*)(dst + 1*dst_stride)),
117 "=m" (*(uint32_t*)(dst + 2*dst_stride)),
118 "=m" (*(uint32_t*)(dst + 3*dst_stride))
119 : "m" (*(uint32_t*)(src + 0*src_stride)),
120 "m" (*(uint32_t*)(src + 1*src_stride)),
121 "m" (*(uint32_t*)(src + 2*src_stride)),
122 "m" (*(uint32_t*)(src + 3*src_stride))
123 );
124 }
96 125
97 // e,f,g,h can be memory 126 // e,f,g,h can be memory
98 // out: a,d,t,c 127 // out: a,d,t,c
99 #define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\ 128 #define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\
100 "punpcklbw " #e ", " #a " \n\t" /* a0 e0 a1 e1 a2 e2 a3 e3 */\ 129 "punpcklbw " #e ", " #a " \n\t" /* a0 e0 a1 e1 a2 e2 a3 e3 */\