Mercurial > libavcodec.hg
comparison x86/dsputil_mmx.h @ 12450:3941687b4fa9 libavcodec
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
still #included in dsputil_mmx.c and is part of DSPContext, and h264dsp_mmx.c,
which represents H264DSPContext and is now compiled on its own.
author | rbultje |
---|---|
date | Wed, 01 Sep 2010 20:48:59 +0000 |
parents | fe78a4548d12 |
children | f4355cd85faa |
comparison
equal
deleted
inserted
replaced
12449:3bca212d6f51 | 12450:3941687b4fa9 |
---|---|
91 #define TRANSPOSE4(a,b,c,d,t)\ | 91 #define TRANSPOSE4(a,b,c,d,t)\ |
92 SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\ | 92 SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\ |
93 SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\ | 93 SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\ |
94 SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\ | 94 SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\ |
95 SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */ | 95 SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */ |
96 | |
97 static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ | |
98 __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... | |
99 "movd %4, %%mm0 \n\t" | |
100 "movd %5, %%mm1 \n\t" | |
101 "movd %6, %%mm2 \n\t" | |
102 "movd %7, %%mm3 \n\t" | |
103 "punpcklbw %%mm1, %%mm0 \n\t" | |
104 "punpcklbw %%mm3, %%mm2 \n\t" | |
105 "movq %%mm0, %%mm1 \n\t" | |
106 "punpcklwd %%mm2, %%mm0 \n\t" | |
107 "punpckhwd %%mm2, %%mm1 \n\t" | |
108 "movd %%mm0, %0 \n\t" | |
109 "punpckhdq %%mm0, %%mm0 \n\t" | |
110 "movd %%mm0, %1 \n\t" | |
111 "movd %%mm1, %2 \n\t" | |
112 "punpckhdq %%mm1, %%mm1 \n\t" | |
113 "movd %%mm1, %3 \n\t" | |
114 | |
115 : "=m" (*(uint32_t*)(dst + 0*dst_stride)), | |
116 "=m" (*(uint32_t*)(dst + 1*dst_stride)), | |
117 "=m" (*(uint32_t*)(dst + 2*dst_stride)), | |
118 "=m" (*(uint32_t*)(dst + 3*dst_stride)) | |
119 : "m" (*(uint32_t*)(src + 0*src_stride)), | |
120 "m" (*(uint32_t*)(src + 1*src_stride)), | |
121 "m" (*(uint32_t*)(src + 2*src_stride)), | |
122 "m" (*(uint32_t*)(src + 3*src_stride)) | |
123 ); | |
124 } | |
96 | 125 |
97 // e,f,g,h can be memory | 126 // e,f,g,h can be memory |
98 // out: a,d,t,c | 127 // out: a,d,t,c |
99 #define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\ | 128 #define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\ |
100 "punpcklbw " #e ", " #a " \n\t" /* a0 e0 a1 e1 a2 e2 a3 e3 */\ | 129 "punpcklbw " #e ", " #a " \n\t" /* a0 e0 a1 e1 a2 e2 a3 e3 */\ |