Mercurial > mplayer.hg
comparison postproc/swscale_template.c @ 7723:11492d5b0896
mmx yuy2 output
author | michael |
---|---|
date | Sun, 13 Oct 2002 17:23:02 +0000 |
parents | c6aa14b47d03 |
children | 772d6d27fd66 |
comparison
equal
deleted
inserted
replaced
7722:a181875e0aa8 | 7723:11492d5b0896 |
---|---|
105 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | 105 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), |
106 "r" (dest), "m" (dstW), | 106 "r" (dest), "m" (dstW), |
107 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | 107 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) |
108 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | 108 : "%eax", "%ebx", "%ecx", "%edx", "%esi" |
109 */ | 109 */ |
110 #define YSCALEYUV2RGBX \ | 110 #define YSCALEYUV2PACKEDX \ |
111 "xorl %%eax, %%eax \n\t"\ | 111 "xorl %%eax, %%eax \n\t"\ |
112 ".balign 16 \n\t"\ | 112 ".balign 16 \n\t"\ |
113 "1: \n\t"\ | 113 "1: \n\t"\ |
114 "movl %1, %%edx \n\t" /* -chrFilterSize */\ | 114 "movl %1, %%edx \n\t" /* -chrFilterSize */\ |
115 "movl %3, %%ebx \n\t" /* chrMmxFilter+chrFilterSize */\ | 115 "movl %3, %%ebx \n\t" /* chrMmxFilter+chrFilterSize */\ |
142 "pmulhw %%mm0, %%mm5 \n\t"\ | 142 "pmulhw %%mm0, %%mm5 \n\t"\ |
143 "paddw %%mm2, %%mm1 \n\t"\ | 143 "paddw %%mm2, %%mm1 \n\t"\ |
144 "paddw %%mm5, %%mm7 \n\t"\ | 144 "paddw %%mm5, %%mm7 \n\t"\ |
145 "addl $1, %%edx \n\t"\ | 145 "addl $1, %%edx \n\t"\ |
146 " jnz 2b \n\t"\ | 146 " jnz 2b \n\t"\ |
147 \ | 147 |
148 | |
149 #define YSCALEYUV2RGBX \ | |
150 YSCALEYUV2PACKEDX\ | |
148 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ | 151 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ |
149 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | 152 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ |
150 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ | 153 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
151 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ | 154 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
152 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ | 155 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ |
232 "paddw %%mm4, %%mm2 \n\t"\ | 235 "paddw %%mm4, %%mm2 \n\t"\ |
233 "paddw %%mm2, %%mm1 \n\t" /* G*/\ | 236 "paddw %%mm2, %%mm1 \n\t" /* G*/\ |
234 \ | 237 \ |
235 "packuswb %%mm1, %%mm1 \n\t" | 238 "packuswb %%mm1, %%mm1 \n\t" |
236 | 239 |
240 #define YSCALEYUV2PACKED \ | |
241 "movd %6, %%mm6 \n\t" /*yalpha1*/\ | |
242 "punpcklwd %%mm6, %%mm6 \n\t"\ | |
243 "punpcklwd %%mm6, %%mm6 \n\t"\ | |
244 "psraw $3, %%mm6 \n\t"\ | |
245 "movq %%mm6, 3968(%2) \n\t"\ | |
246 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ | |
247 "punpcklwd %%mm5, %%mm5 \n\t"\ | |
248 "punpcklwd %%mm5, %%mm5 \n\t"\ | |
249 "psraw $3, %%mm5 \n\t"\ | |
250 "movq %%mm5, 3976(%2) \n\t"\ | |
251 "xorl %%eax, %%eax \n\t"\ | |
252 ".balign 16 \n\t"\ | |
253 "1: \n\t"\ | |
254 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ | |
255 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |
256 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | |
257 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | |
258 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ | |
259 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ | |
260 "movq 3976(%2), %%mm0 \n\t"\ | |
261 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ | |
262 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ | |
263 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ | |
264 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ | |
265 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ | |
266 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ | |
267 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ | |
268 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ | |
269 "movq 8(%0, %%eax, 2), %%mm6 \n\t" /*buf0[eax]*/\ | |
270 "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\ | |
271 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ | |
272 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ | |
273 "pmulhw 3968(%2), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ | |
274 "pmulhw 3968(%2), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ | |
275 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ | |
276 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ | |
277 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ | |
278 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ | |
279 | |
237 #define YSCALEYUV2RGB \ | 280 #define YSCALEYUV2RGB \ |
238 "movd %6, %%mm6 \n\t" /*yalpha1*/\ | 281 "movd %6, %%mm6 \n\t" /*yalpha1*/\ |
239 "punpcklwd %%mm6, %%mm6 \n\t"\ | 282 "punpcklwd %%mm6, %%mm6 \n\t"\ |
240 "punpcklwd %%mm6, %%mm6 \n\t"\ | 283 "punpcklwd %%mm6, %%mm6 \n\t"\ |
241 "movq %%mm6, 3968(%2) \n\t"\ | 284 "movq %%mm6, 3968(%2) \n\t"\ |
304 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ | 347 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
305 "packuswb %%mm0, %%mm2 \n\t"\ | 348 "packuswb %%mm0, %%mm2 \n\t"\ |
306 "packuswb %%mm6, %%mm5 \n\t"\ | 349 "packuswb %%mm6, %%mm5 \n\t"\ |
307 "packuswb %%mm3, %%mm4 \n\t"\ | 350 "packuswb %%mm3, %%mm4 \n\t"\ |
308 "pxor %%mm7, %%mm7 \n\t" | 351 "pxor %%mm7, %%mm7 \n\t" |
309 | 352 |
353 #define YSCALEYUV2PACKED1 \ | |
354 "xorl %%eax, %%eax \n\t"\ | |
355 ".balign 16 \n\t"\ | |
356 "1: \n\t"\ | |
357 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ | |
358 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ | |
359 "psraw $7, %%mm3 \n\t" \ | |
360 "psraw $7, %%mm4 \n\t" \ | |
361 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ | |
362 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ | |
363 "psraw $7, %%mm1 \n\t" \ | |
364 "psraw $7, %%mm7 \n\t" \ | |
365 | |
310 #define YSCALEYUV2RGB1 \ | 366 #define YSCALEYUV2RGB1 \ |
311 "xorl %%eax, %%eax \n\t"\ | 367 "xorl %%eax, %%eax \n\t"\ |
312 ".balign 16 \n\t"\ | 368 ".balign 16 \n\t"\ |
313 "1: \n\t"\ | 369 "1: \n\t"\ |
314 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ | 370 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
353 "packuswb %%mm0, %%mm2 \n\t"\ | 409 "packuswb %%mm0, %%mm2 \n\t"\ |
354 "packuswb %%mm6, %%mm5 \n\t"\ | 410 "packuswb %%mm6, %%mm5 \n\t"\ |
355 "packuswb %%mm3, %%mm4 \n\t"\ | 411 "packuswb %%mm3, %%mm4 \n\t"\ |
356 "pxor %%mm7, %%mm7 \n\t" | 412 "pxor %%mm7, %%mm7 \n\t" |
357 | 413 |
414 #define YSCALEYUV2PACKED1b \ | |
415 "xorl %%eax, %%eax \n\t"\ | |
416 ".balign 16 \n\t"\ | |
417 "1: \n\t"\ | |
418 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ | |
419 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |
420 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | |
421 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | |
422 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ | |
423 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ | |
424 "psrlw $8, %%mm3 \n\t" \ | |
425 "psrlw $8, %%mm4 \n\t" \ | |
426 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ | |
427 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ | |
428 "psraw $7, %%mm1 \n\t" \ | |
429 "psraw $7, %%mm7 \n\t" | |
430 | |
358 // do vertical chrominance interpolation | 431 // do vertical chrominance interpolation |
359 #define YSCALEYUV2RGB1b \ | 432 #define YSCALEYUV2RGB1b \ |
360 "xorl %%eax, %%eax \n\t"\ | 433 "xorl %%eax, %%eax \n\t"\ |
361 ".balign 16 \n\t"\ | 434 ".balign 16 \n\t"\ |
362 "1: \n\t"\ | 435 "1: \n\t"\ |
650 #else | 723 #else |
651 #undef WRITEBGR24 | 724 #undef WRITEBGR24 |
652 #define WRITEBGR24 WRITEBGR24MMX | 725 #define WRITEBGR24 WRITEBGR24MMX |
653 #endif | 726 #endif |
654 | 727 |
728 #define WRITEYUY2 \ | |
729 "packuswb %%mm3, %%mm3 \n\t"\ | |
730 "packuswb %%mm4, %%mm4 \n\t"\ | |
731 "packuswb %%mm7, %%mm1 \n\t"\ | |
732 "punpcklbw %%mm4, %%mm3 \n\t"\ | |
733 "movq %%mm1, %%mm7 \n\t"\ | |
734 "punpcklbw %%mm3, %%mm1 \n\t"\ | |
735 "punpckhbw %%mm3, %%mm7 \n\t"\ | |
736 \ | |
737 MOVNTQ(%%mm1, (%4, %%eax, 2))\ | |
738 MOVNTQ(%%mm7, 8(%4, %%eax, 2))\ | |
739 \ | |
740 "addl $8, %%eax \n\t"\ | |
741 "cmpl %5, %%eax \n\t"\ | |
742 " jb 1b \n\t" | |
743 | |
744 | |
655 static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, | 745 static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
656 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | 746 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, |
657 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW, | 747 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW, |
658 int16_t * lumMmxFilter, int16_t * chrMmxFilter) | 748 int16_t * lumMmxFilter, int16_t * chrMmxFilter) |
659 { | 749 { |
750 | 840 |
751 | 841 |
752 /** | 842 /** |
753 * vertical scale YV12 to RGB | 843 * vertical scale YV12 to RGB |
754 */ | 844 */ |
755 static inline void RENAME(yuv2rgbX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, | 845 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
756 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | 846 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, |
757 uint8_t *dest, int dstW, int16_t * lumMmxFilter, int16_t * chrMmxFilter, int dstY) | 847 uint8_t *dest, int dstW, int16_t * lumMmxFilter, int16_t * chrMmxFilter, int dstY) |
758 { | 848 { |
759 switch(c->dstFormat) | 849 switch(c->dstFormat) |
760 { | 850 { |
829 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | 919 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) |
830 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | 920 : "%eax", "%ebx", "%ecx", "%edx", "%esi" |
831 ); | 921 ); |
832 } | 922 } |
833 break; | 923 break; |
924 case IMGFMT_YUY2: | |
925 { | |
926 asm volatile( | |
927 YSCALEYUV2PACKEDX | |
928 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | |
929 | |
930 "psraw $3, %%mm3 \n\t" | |
931 "psraw $3, %%mm4 \n\t" | |
932 "psraw $3, %%mm1 \n\t" | |
933 "psraw $3, %%mm7 \n\t" | |
934 WRITEYUY2 | |
935 | |
936 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
937 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
938 "r" (dest), "m" (dstW), | |
939 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
940 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
941 ); | |
942 } | |
943 break; | |
834 #endif | 944 #endif |
835 default: | 945 default: |
836 yuv2rgbXinC(c, lumFilter, lumSrc, lumFilterSize, | 946 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, |
837 chrFilter, chrSrc, chrFilterSize, | 947 chrFilter, chrSrc, chrFilterSize, |
838 dest, dstW, dstY); | 948 dest, dstW, dstY); |
839 break; | 949 break; |
840 } | 950 } |
841 } | 951 } |
842 | 952 |
843 /** | 953 /** |
844 * vertical bilinear scale YV12 to RGB | 954 * vertical bilinear scale YV12 to RGB |
845 */ | 955 */ |
846 static inline void RENAME(yuv2rgb2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, | 956 static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, |
847 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) | 957 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) |
848 { | 958 { |
849 int yalpha1=yalpha^4095; | 959 int yalpha1=yalpha^4095; |
850 int uvalpha1=uvalpha^4095; | 960 int uvalpha1=uvalpha^4095; |
851 int i; | 961 int i; |
1122 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | 1232 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
1123 "m" (yalpha1), "m" (uvalpha1) | 1233 "m" (yalpha1), "m" (uvalpha1) |
1124 : "%eax" | 1234 : "%eax" |
1125 ); | 1235 ); |
1126 return; | 1236 return; |
1237 case IMGFMT_YUY2: | |
1238 asm volatile( | |
1239 YSCALEYUV2PACKED | |
1240 WRITEYUY2 | |
1241 | |
1242 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | |
1243 "m" (yalpha1), "m" (uvalpha1) | |
1244 : "%eax" | |
1245 ); | |
1246 return; | |
1127 default: break; | 1247 default: break; |
1128 } | 1248 } |
1129 #endif //HAVE_MMX | 1249 #endif //HAVE_MMX |
1130 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_2_C) | 1250 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C) |
1131 } | 1251 } |
1132 | 1252 |
1133 /** | 1253 /** |
1134 * YV12 to RGB without scaling or interpolating | 1254 * YV12 to RGB without scaling or interpolating |
1135 */ | 1255 */ |
1136 static inline void RENAME(yuv2rgb1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1, | 1256 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1, |
1137 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y) | 1257 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y) |
1138 { | 1258 { |
1139 int uvalpha1=uvalpha^4095; | 1259 int uvalpha1=uvalpha^4095; |
1140 const int yalpha1=0; | 1260 const int yalpha1=0; |
1141 int i; | 1261 int i; |
1143 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1 | 1263 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1 |
1144 const int yalpha= 4096; //FIXME ... | 1264 const int yalpha= 4096; //FIXME ... |
1145 | 1265 |
1146 if(flags&SWS_FULL_CHR_H_INT) | 1266 if(flags&SWS_FULL_CHR_H_INT) |
1147 { | 1267 { |
1148 RENAME(yuv2rgb2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y); | 1268 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y); |
1149 return; | 1269 return; |
1150 } | 1270 } |
1151 | 1271 |
1152 #ifdef HAVE_MMX | 1272 #ifdef HAVE_MMX |
1153 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster | 1273 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster |
1202 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | 1322 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
1203 "m" (yalpha1), "m" (uvalpha1) | 1323 "m" (yalpha1), "m" (uvalpha1) |
1204 : "%eax" | 1324 : "%eax" |
1205 ); | 1325 ); |
1206 return; | 1326 return; |
1327 case IMGFMT_YUY2: | |
1328 asm volatile( | |
1329 YSCALEYUV2PACKED1 | |
1330 WRITEYUY2 | |
1331 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | |
1332 "m" (yalpha1), "m" (uvalpha1) | |
1333 : "%eax" | |
1334 ); | |
1335 return; | |
1207 } | 1336 } |
1208 } | 1337 } |
1209 else | 1338 else |
1210 { | 1339 { |
1211 switch(dstFormat) | 1340 switch(dstFormat) |
1258 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | 1387 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
1259 "m" (yalpha1), "m" (uvalpha1) | 1388 "m" (yalpha1), "m" (uvalpha1) |
1260 : "%eax" | 1389 : "%eax" |
1261 ); | 1390 ); |
1262 return; | 1391 return; |
1392 case IMGFMT_YUY2: | |
1393 asm volatile( | |
1394 YSCALEYUV2PACKED1b | |
1395 WRITEYUY2 | |
1396 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | |
1397 "m" (yalpha1), "m" (uvalpha1) | |
1398 : "%eax" | |
1399 ); | |
1400 return; | |
1263 } | 1401 } |
1264 } | 1402 } |
1265 #endif | 1403 #endif |
1266 if( uvalpha < 2048 ) | 1404 if( uvalpha < 2048 ) |
1267 { | 1405 { |
1268 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_1_C) | 1406 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C) |
1269 }else{ | 1407 }else{ |
1270 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_1B_C) | 1408 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C) |
1271 } | 1409 } |
1272 } | 1410 } |
1273 | 1411 |
1274 //FIXME yuy2* can read upto 7 samples to much | 1412 //FIXME yuy2* can read upto 7 samples to much |
1275 | 1413 |
2531 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | 2669 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); |
2532 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB | 2670 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB |
2533 { | 2671 { |
2534 int chrAlpha= vChrFilter[2*dstY+1]; | 2672 int chrAlpha= vChrFilter[2*dstY+1]; |
2535 | 2673 |
2536 RENAME(yuv2rgb1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), | 2674 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), |
2537 dest, dstW, chrAlpha, dstFormat, flags, dstY); | 2675 dest, dstW, chrAlpha, dstFormat, flags, dstY); |
2538 } | 2676 } |
2539 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB | 2677 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB |
2540 { | 2678 { |
2541 int lumAlpha= vLumFilter[2*dstY+1]; | 2679 int lumAlpha= vLumFilter[2*dstY+1]; |
2542 int chrAlpha= vChrFilter[2*dstY+1]; | 2680 int chrAlpha= vChrFilter[2*dstY+1]; |
2543 | 2681 |
2544 RENAME(yuv2rgb2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), | 2682 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), |
2545 dest, dstW, lumAlpha, chrAlpha, dstY); | 2683 dest, dstW, lumAlpha, chrAlpha, dstY); |
2546 } | 2684 } |
2547 else //General RGB | 2685 else //General RGB |
2548 { | 2686 { |
2549 RENAME(yuv2rgbX)(c, | 2687 RENAME(yuv2packedX)(c, |
2550 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, | 2688 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
2551 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | 2689 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
2552 dest, dstW, | 2690 dest, dstW, |
2553 lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+dstY*vChrFilterSize*4, dstY); | 2691 lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+dstY*vChrFilterSize*4, dstY); |
2554 } | 2692 } |
2569 } | 2707 } |
2570 else | 2708 else |
2571 { | 2709 { |
2572 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | 2710 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); |
2573 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | 2711 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); |
2574 yuv2rgbXinC(c, | 2712 yuv2packedXinC(c, |
2575 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, | 2713 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
2576 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | 2714 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
2577 dest, dstW, dstY); | 2715 dest, dstW, dstY); |
2578 } | 2716 } |
2579 } | 2717 } |