Mercurial > mplayer.hg
comparison libswscale/x86/yuv2rgb_template2.c @ 31124:b11e3ae960ac
13% faster yuv420 to rgb15 mmx.
It is now faster than the old gpl version on conroe.
author | lorenm |
---|---|
date | Wed, 19 May 2010 13:31:11 +0000 |
parents | b3c85aa7adbf |
children |
comparison
equal
deleted
inserted
replaced
31123:b3c85aa7adbf | 31124:b11e3ae960ac |
---|---|
160 | 160 |
161 #define YUV2RGB_ENDFUNC \ | 161 #define YUV2RGB_ENDFUNC \ |
162 __asm__ volatile (SFENCE"\n\t"EMMS); \ | 162 __asm__ volatile (SFENCE"\n\t"EMMS); \ |
163 return srcSliceH; \ | 163 return srcSliceH; \ |
164 | 164 |
165 | 165 #define IF0(x) |
166 #define RGB_PACK16(gmask, gshift, rshift) \ | 166 #define IF1(x) x |
167 | |
168 #define RGB_PACK16(gmask, is15) \ | |
167 "pand "MANGLE(mmx_redmask)", %%mm0\n\t" \ | 169 "pand "MANGLE(mmx_redmask)", %%mm0\n\t" \ |
168 "pand "MANGLE(mmx_redmask)", %%mm1\n\t" \ | 170 "pand "MANGLE(mmx_redmask)", %%mm1\n\t" \ |
171 "movq %%mm2, %%mm3\n\t" \ | |
172 "psllw $"AV_STRINGIFY(3-is15)", %%mm2\n\t" \ | |
173 "psrlw $"AV_STRINGIFY(5+is15)", %%mm3\n\t" \ | |
169 "psrlw $3, %%mm0\n\t" \ | 174 "psrlw $3, %%mm0\n\t" \ |
170 "pand "MANGLE(gmask)", %%mm2\n\t" \ | 175 IF##is15("psrlw $1, %%mm1\n\t") \ |
171 "movq %%mm0, %%mm5\n\t" \ | 176 "pand "MANGLE(pb_e0)", %%mm2\n\t" \ |
172 "movq %%mm1, %%mm6\n\t" \ | 177 "pand "MANGLE(gmask)", %%mm3\n\t" \ |
173 "movq %%mm2, %%mm7\n\t" \ | |
174 "punpcklbw %%mm4, %%mm0\n\t" \ | |
175 "punpcklbw %%mm4, %%mm1\n\t" \ | |
176 "punpcklbw %%mm4, %%mm2\n\t" \ | |
177 "punpckhbw %%mm4, %%mm5\n\t" \ | |
178 "punpckhbw %%mm4, %%mm6\n\t" \ | |
179 "punpckhbw %%mm4, %%mm7\n\t" \ | |
180 "psllw $"rshift", %%mm1\n\t" \ | |
181 "psllw $"rshift", %%mm6\n\t" \ | |
182 "psllw $"gshift", %%mm2\n\t" \ | |
183 "psllw $"gshift", %%mm7\n\t" \ | |
184 "por %%mm1, %%mm0\n\t" \ | |
185 "por %%mm6, %%mm5\n\t" \ | |
186 "por %%mm2, %%mm0\n\t" \ | 178 "por %%mm2, %%mm0\n\t" \ |
187 "por %%mm7, %%mm5\n\t" \ | 179 "por %%mm3, %%mm1\n\t" \ |
180 "movq %%mm0, %%mm2\n\t" \ | |
181 "punpcklbw %%mm1, %%mm0\n\t" \ | |
182 "punpckhbw %%mm1, %%mm2\n\t" \ | |
188 MOVNTQ " %%mm0, (%1)\n\t" \ | 183 MOVNTQ " %%mm0, (%1)\n\t" \ |
189 MOVNTQ " %%mm5, 8(%1)\n\t" \ | 184 MOVNTQ " %%mm2, 8(%1)\n\t" \ |
190 | 185 |
191 #define DITHER_RGB \ | 186 #define DITHER_RGB \ |
192 "paddusb "BLUE_DITHER"(%4), %%mm0\n\t" \ | 187 "paddusb "BLUE_DITHER"(%4), %%mm0\n\t" \ |
193 "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \ | 188 "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \ |
194 "paddusb "RED_DITHER"(%4), %%mm1\n\t" \ | 189 "paddusb "RED_DITHER"(%4), %%mm1\n\t" \ |
212 YUV2RGB | 207 YUV2RGB |
213 RGB_PACK_INTERLEAVE | 208 RGB_PACK_INTERLEAVE |
214 #ifdef DITHER1XBPP | 209 #ifdef DITHER1XBPP |
215 DITHER_RGB | 210 DITHER_RGB |
216 #endif | 211 #endif |
217 RGB_PACK16(mmx_redmask, "2", "7") | 212 RGB_PACK16(pb_03, 1) |
218 | 213 |
219 YUV2RGB_ENDLOOP(2) | 214 YUV2RGB_ENDLOOP(2) |
220 YUV2RGB_OPERANDS | 215 YUV2RGB_OPERANDS |
221 YUV2RGB_ENDFUNC | 216 YUV2RGB_ENDFUNC |
222 } | 217 } |
240 YUV2RGB | 235 YUV2RGB |
241 RGB_PACK_INTERLEAVE | 236 RGB_PACK_INTERLEAVE |
242 #ifdef DITHER1XBPP | 237 #ifdef DITHER1XBPP |
243 DITHER_RGB | 238 DITHER_RGB |
244 #endif | 239 #endif |
245 RGB_PACK16(mmx_grnmask, "3", "8") | 240 RGB_PACK16(pb_07, 0) |
246 | 241 |
247 YUV2RGB_ENDLOOP(2) | 242 YUV2RGB_ENDLOOP(2) |
248 YUV2RGB_OPERANDS | 243 YUV2RGB_OPERANDS |
249 YUV2RGB_ENDFUNC | 244 YUV2RGB_ENDFUNC |
250 } | 245 } |