comparison libswscale/x86/yuv2rgb_template2.c @ 31124:b11e3ae960ac

13% faster yuv420 to rgb15 mmx. It is now faster than the old gpl version on conroe.
author lorenm
date Wed, 19 May 2010 13:31:11 +0000
parents b3c85aa7adbf
children
comparison
equal deleted inserted replaced
31123:b3c85aa7adbf 31124:b11e3ae960ac
160 160
161 #define YUV2RGB_ENDFUNC \ 161 #define YUV2RGB_ENDFUNC \
162 __asm__ volatile (SFENCE"\n\t"EMMS); \ 162 __asm__ volatile (SFENCE"\n\t"EMMS); \
163 return srcSliceH; \ 163 return srcSliceH; \
164 164
165 165 #define IF0(x)
166 #define RGB_PACK16(gmask, gshift, rshift) \ 166 #define IF1(x) x
167
168 #define RGB_PACK16(gmask, is15) \
167 "pand "MANGLE(mmx_redmask)", %%mm0\n\t" \ 169 "pand "MANGLE(mmx_redmask)", %%mm0\n\t" \
168 "pand "MANGLE(mmx_redmask)", %%mm1\n\t" \ 170 "pand "MANGLE(mmx_redmask)", %%mm1\n\t" \
171 "movq %%mm2, %%mm3\n\t" \
172 "psllw $"AV_STRINGIFY(3-is15)", %%mm2\n\t" \
173 "psrlw $"AV_STRINGIFY(5+is15)", %%mm3\n\t" \
169 "psrlw $3, %%mm0\n\t" \ 174 "psrlw $3, %%mm0\n\t" \
170 "pand "MANGLE(gmask)", %%mm2\n\t" \ 175 IF##is15("psrlw $1, %%mm1\n\t") \
171 "movq %%mm0, %%mm5\n\t" \ 176 "pand "MANGLE(pb_e0)", %%mm2\n\t" \
172 "movq %%mm1, %%mm6\n\t" \ 177 "pand "MANGLE(gmask)", %%mm3\n\t" \
173 "movq %%mm2, %%mm7\n\t" \
174 "punpcklbw %%mm4, %%mm0\n\t" \
175 "punpcklbw %%mm4, %%mm1\n\t" \
176 "punpcklbw %%mm4, %%mm2\n\t" \
177 "punpckhbw %%mm4, %%mm5\n\t" \
178 "punpckhbw %%mm4, %%mm6\n\t" \
179 "punpckhbw %%mm4, %%mm7\n\t" \
180 "psllw $"rshift", %%mm1\n\t" \
181 "psllw $"rshift", %%mm6\n\t" \
182 "psllw $"gshift", %%mm2\n\t" \
183 "psllw $"gshift", %%mm7\n\t" \
184 "por %%mm1, %%mm0\n\t" \
185 "por %%mm6, %%mm5\n\t" \
186 "por %%mm2, %%mm0\n\t" \ 178 "por %%mm2, %%mm0\n\t" \
187 "por %%mm7, %%mm5\n\t" \ 179 "por %%mm3, %%mm1\n\t" \
180 "movq %%mm0, %%mm2\n\t" \
181 "punpcklbw %%mm1, %%mm0\n\t" \
182 "punpckhbw %%mm1, %%mm2\n\t" \
188 MOVNTQ " %%mm0, (%1)\n\t" \ 183 MOVNTQ " %%mm0, (%1)\n\t" \
189 MOVNTQ " %%mm5, 8(%1)\n\t" \ 184 MOVNTQ " %%mm2, 8(%1)\n\t" \
190 185
191 #define DITHER_RGB \ 186 #define DITHER_RGB \
192 "paddusb "BLUE_DITHER"(%4), %%mm0\n\t" \ 187 "paddusb "BLUE_DITHER"(%4), %%mm0\n\t" \
193 "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \ 188 "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \
194 "paddusb "RED_DITHER"(%4), %%mm1\n\t" \ 189 "paddusb "RED_DITHER"(%4), %%mm1\n\t" \
212 YUV2RGB 207 YUV2RGB
213 RGB_PACK_INTERLEAVE 208 RGB_PACK_INTERLEAVE
214 #ifdef DITHER1XBPP 209 #ifdef DITHER1XBPP
215 DITHER_RGB 210 DITHER_RGB
216 #endif 211 #endif
217 RGB_PACK16(mmx_redmask, "2", "7") 212 RGB_PACK16(pb_03, 1)
218 213
219 YUV2RGB_ENDLOOP(2) 214 YUV2RGB_ENDLOOP(2)
220 YUV2RGB_OPERANDS 215 YUV2RGB_OPERANDS
221 YUV2RGB_ENDFUNC 216 YUV2RGB_ENDFUNC
222 } 217 }
240 YUV2RGB 235 YUV2RGB
241 RGB_PACK_INTERLEAVE 236 RGB_PACK_INTERLEAVE
242 #ifdef DITHER1XBPP 237 #ifdef DITHER1XBPP
243 DITHER_RGB 238 DITHER_RGB
244 #endif 239 #endif
245 RGB_PACK16(mmx_grnmask, "3", "8") 240 RGB_PACK16(pb_07, 0)
246 241
247 YUV2RGB_ENDLOOP(2) 242 YUV2RGB_ENDLOOP(2)
248 YUV2RGB_OPERANDS 243 YUV2RGB_OPERANDS
249 YUV2RGB_ENDFUNC 244 YUV2RGB_ENDFUNC
250 } 245 }