comparison postproc/swscale_template.c @ 7723:11492d5b0896

mmx yuy2 output
author michael
date Sun, 13 Oct 2002 17:23:02 +0000
parents c6aa14b47d03
children 772d6d27fd66
comparison
equal deleted inserted replaced
7722:a181875e0aa8 7723:11492d5b0896
105 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), 105 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
106 "r" (dest), "m" (dstW), 106 "r" (dest), "m" (dstW),
107 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) 107 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
108 : "%eax", "%ebx", "%ecx", "%edx", "%esi" 108 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
109 */ 109 */
110 #define YSCALEYUV2RGBX \ 110 #define YSCALEYUV2PACKEDX \
111 "xorl %%eax, %%eax \n\t"\ 111 "xorl %%eax, %%eax \n\t"\
112 ".balign 16 \n\t"\ 112 ".balign 16 \n\t"\
113 "1: \n\t"\ 113 "1: \n\t"\
114 "movl %1, %%edx \n\t" /* -chrFilterSize */\ 114 "movl %1, %%edx \n\t" /* -chrFilterSize */\
115 "movl %3, %%ebx \n\t" /* chrMmxFilter+chrFilterSize */\ 115 "movl %3, %%ebx \n\t" /* chrMmxFilter+chrFilterSize */\
142 "pmulhw %%mm0, %%mm5 \n\t"\ 142 "pmulhw %%mm0, %%mm5 \n\t"\
143 "paddw %%mm2, %%mm1 \n\t"\ 143 "paddw %%mm2, %%mm1 \n\t"\
144 "paddw %%mm5, %%mm7 \n\t"\ 144 "paddw %%mm5, %%mm7 \n\t"\
145 "addl $1, %%edx \n\t"\ 145 "addl $1, %%edx \n\t"\
146 " jnz 2b \n\t"\ 146 " jnz 2b \n\t"\
147 \ 147
148
149 #define YSCALEYUV2RGBX \
150 YSCALEYUV2PACKEDX\
148 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ 151 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
149 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ 152 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
150 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ 153 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
151 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ 154 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
152 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ 155 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\
232 "paddw %%mm4, %%mm2 \n\t"\ 235 "paddw %%mm4, %%mm2 \n\t"\
233 "paddw %%mm2, %%mm1 \n\t" /* G*/\ 236 "paddw %%mm2, %%mm1 \n\t" /* G*/\
234 \ 237 \
235 "packuswb %%mm1, %%mm1 \n\t" 238 "packuswb %%mm1, %%mm1 \n\t"
236 239
240 #define YSCALEYUV2PACKED \
241 "movd %6, %%mm6 \n\t" /*yalpha1*/\
242 "punpcklwd %%mm6, %%mm6 \n\t"\
243 "punpcklwd %%mm6, %%mm6 \n\t"\
244 "psraw $3, %%mm6 \n\t"\
245 "movq %%mm6, 3968(%2) \n\t"\
246 "movd %7, %%mm5 \n\t" /*uvalpha1*/\
247 "punpcklwd %%mm5, %%mm5 \n\t"\
248 "punpcklwd %%mm5, %%mm5 \n\t"\
249 "psraw $3, %%mm5 \n\t"\
250 "movq %%mm5, 3976(%2) \n\t"\
251 "xorl %%eax, %%eax \n\t"\
252 ".balign 16 \n\t"\
253 "1: \n\t"\
254 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\
255 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\
256 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
257 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
258 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
259 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
260 "movq 3976(%2), %%mm0 \n\t"\
261 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
262 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
263 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
264 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
265 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
266 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
267 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\
268 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\
269 "movq 8(%0, %%eax, 2), %%mm6 \n\t" /*buf0[eax]*/\
270 "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\
271 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
272 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
273 "pmulhw 3968(%2), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
274 "pmulhw 3968(%2), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
275 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
276 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
277 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
278 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
279
237 #define YSCALEYUV2RGB \ 280 #define YSCALEYUV2RGB \
238 "movd %6, %%mm6 \n\t" /*yalpha1*/\ 281 "movd %6, %%mm6 \n\t" /*yalpha1*/\
239 "punpcklwd %%mm6, %%mm6 \n\t"\ 282 "punpcklwd %%mm6, %%mm6 \n\t"\
240 "punpcklwd %%mm6, %%mm6 \n\t"\ 283 "punpcklwd %%mm6, %%mm6 \n\t"\
241 "movq %%mm6, 3968(%2) \n\t"\ 284 "movq %%mm6, 3968(%2) \n\t"\
304 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ 347 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
305 "packuswb %%mm0, %%mm2 \n\t"\ 348 "packuswb %%mm0, %%mm2 \n\t"\
306 "packuswb %%mm6, %%mm5 \n\t"\ 349 "packuswb %%mm6, %%mm5 \n\t"\
307 "packuswb %%mm3, %%mm4 \n\t"\ 350 "packuswb %%mm3, %%mm4 \n\t"\
308 "pxor %%mm7, %%mm7 \n\t" 351 "pxor %%mm7, %%mm7 \n\t"
309 352
353 #define YSCALEYUV2PACKED1 \
354 "xorl %%eax, %%eax \n\t"\
355 ".balign 16 \n\t"\
356 "1: \n\t"\
357 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\
358 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
359 "psraw $7, %%mm3 \n\t" \
360 "psraw $7, %%mm4 \n\t" \
361 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\
362 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\
363 "psraw $7, %%mm1 \n\t" \
364 "psraw $7, %%mm7 \n\t" \
365
310 #define YSCALEYUV2RGB1 \ 366 #define YSCALEYUV2RGB1 \
311 "xorl %%eax, %%eax \n\t"\ 367 "xorl %%eax, %%eax \n\t"\
312 ".balign 16 \n\t"\ 368 ".balign 16 \n\t"\
313 "1: \n\t"\ 369 "1: \n\t"\
314 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ 370 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\
353 "packuswb %%mm0, %%mm2 \n\t"\ 409 "packuswb %%mm0, %%mm2 \n\t"\
354 "packuswb %%mm6, %%mm5 \n\t"\ 410 "packuswb %%mm6, %%mm5 \n\t"\
355 "packuswb %%mm3, %%mm4 \n\t"\ 411 "packuswb %%mm3, %%mm4 \n\t"\
356 "pxor %%mm7, %%mm7 \n\t" 412 "pxor %%mm7, %%mm7 \n\t"
357 413
414 #define YSCALEYUV2PACKED1b \
415 "xorl %%eax, %%eax \n\t"\
416 ".balign 16 \n\t"\
417 "1: \n\t"\
418 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\
419 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\
420 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
421 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
422 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
423 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
424 "psrlw $8, %%mm3 \n\t" \
425 "psrlw $8, %%mm4 \n\t" \
426 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\
427 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\
428 "psraw $7, %%mm1 \n\t" \
429 "psraw $7, %%mm7 \n\t"
430
358 // do vertical chrominance interpolation 431 // do vertical chrominance interpolation
359 #define YSCALEYUV2RGB1b \ 432 #define YSCALEYUV2RGB1b \
360 "xorl %%eax, %%eax \n\t"\ 433 "xorl %%eax, %%eax \n\t"\
361 ".balign 16 \n\t"\ 434 ".balign 16 \n\t"\
362 "1: \n\t"\ 435 "1: \n\t"\
650 #else 723 #else
651 #undef WRITEBGR24 724 #undef WRITEBGR24
652 #define WRITEBGR24 WRITEBGR24MMX 725 #define WRITEBGR24 WRITEBGR24MMX
653 #endif 726 #endif
654 727
728 #define WRITEYUY2 \
729 "packuswb %%mm3, %%mm3 \n\t"\
730 "packuswb %%mm4, %%mm4 \n\t"\
731 "packuswb %%mm7, %%mm1 \n\t"\
732 "punpcklbw %%mm4, %%mm3 \n\t"\
733 "movq %%mm1, %%mm7 \n\t"\
734 "punpcklbw %%mm3, %%mm1 \n\t"\
735 "punpckhbw %%mm3, %%mm7 \n\t"\
736 \
737 MOVNTQ(%%mm1, (%4, %%eax, 2))\
738 MOVNTQ(%%mm7, 8(%4, %%eax, 2))\
739 \
740 "addl $8, %%eax \n\t"\
741 "cmpl %5, %%eax \n\t"\
742 " jb 1b \n\t"
743
744
655 static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, 745 static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
656 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, 746 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
657 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW, 747 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW,
658 int16_t * lumMmxFilter, int16_t * chrMmxFilter) 748 int16_t * lumMmxFilter, int16_t * chrMmxFilter)
659 { 749 {
750 840
751 841
752 /** 842 /**
753 * vertical scale YV12 to RGB 843 * vertical scale YV12 to RGB
754 */ 844 */
755 static inline void RENAME(yuv2rgbX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, 845 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
756 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, 846 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
757 uint8_t *dest, int dstW, int16_t * lumMmxFilter, int16_t * chrMmxFilter, int dstY) 847 uint8_t *dest, int dstW, int16_t * lumMmxFilter, int16_t * chrMmxFilter, int dstY)
758 { 848 {
759 switch(c->dstFormat) 849 switch(c->dstFormat)
760 { 850 {
829 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) 919 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
830 : "%eax", "%ebx", "%ecx", "%edx", "%esi" 920 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
831 ); 921 );
832 } 922 }
833 break; 923 break;
924 case IMGFMT_YUY2:
925 {
926 asm volatile(
927 YSCALEYUV2PACKEDX
928 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
929
930 "psraw $3, %%mm3 \n\t"
931 "psraw $3, %%mm4 \n\t"
932 "psraw $3, %%mm1 \n\t"
933 "psraw $3, %%mm7 \n\t"
934 WRITEYUY2
935
936 :: "m" (-lumFilterSize), "m" (-chrFilterSize),
937 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
938 "r" (dest), "m" (dstW),
939 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
940 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
941 );
942 }
943 break;
834 #endif 944 #endif
835 default: 945 default:
836 yuv2rgbXinC(c, lumFilter, lumSrc, lumFilterSize, 946 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
837 chrFilter, chrSrc, chrFilterSize, 947 chrFilter, chrSrc, chrFilterSize,
838 dest, dstW, dstY); 948 dest, dstW, dstY);
839 break; 949 break;
840 } 950 }
841 } 951 }
842 952
843 /** 953 /**
844 * vertical bilinear scale YV12 to RGB 954 * vertical bilinear scale YV12 to RGB
845 */ 955 */
846 static inline void RENAME(yuv2rgb2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, 956 static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
847 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) 957 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
848 { 958 {
849 int yalpha1=yalpha^4095; 959 int yalpha1=yalpha^4095;
850 int uvalpha1=uvalpha^4095; 960 int uvalpha1=uvalpha^4095;
851 int i; 961 int i;
1122 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), 1232 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1123 "m" (yalpha1), "m" (uvalpha1) 1233 "m" (yalpha1), "m" (uvalpha1)
1124 : "%eax" 1234 : "%eax"
1125 ); 1235 );
1126 return; 1236 return;
1237 case IMGFMT_YUY2:
1238 asm volatile(
1239 YSCALEYUV2PACKED
1240 WRITEYUY2
1241
1242 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1243 "m" (yalpha1), "m" (uvalpha1)
1244 : "%eax"
1245 );
1246 return;
1127 default: break; 1247 default: break;
1128 } 1248 }
1129 #endif //HAVE_MMX 1249 #endif //HAVE_MMX
1130 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_2_C) 1250 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C)
1131 } 1251 }
1132 1252
1133 /** 1253 /**
1134 * YV12 to RGB without scaling or interpolating 1254 * YV12 to RGB without scaling or interpolating
1135 */ 1255 */
1136 static inline void RENAME(yuv2rgb1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1, 1256 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
1137 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y) 1257 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
1138 { 1258 {
1139 int uvalpha1=uvalpha^4095; 1259 int uvalpha1=uvalpha^4095;
1140 const int yalpha1=0; 1260 const int yalpha1=0;
1141 int i; 1261 int i;
1143 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1 1263 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
1144 const int yalpha= 4096; //FIXME ... 1264 const int yalpha= 4096; //FIXME ...
1145 1265
1146 if(flags&SWS_FULL_CHR_H_INT) 1266 if(flags&SWS_FULL_CHR_H_INT)
1147 { 1267 {
1148 RENAME(yuv2rgb2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y); 1268 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
1149 return; 1269 return;
1150 } 1270 }
1151 1271
1152 #ifdef HAVE_MMX 1272 #ifdef HAVE_MMX
1153 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster 1273 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster
1202 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), 1322 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1203 "m" (yalpha1), "m" (uvalpha1) 1323 "m" (yalpha1), "m" (uvalpha1)
1204 : "%eax" 1324 : "%eax"
1205 ); 1325 );
1206 return; 1326 return;
1327 case IMGFMT_YUY2:
1328 asm volatile(
1329 YSCALEYUV2PACKED1
1330 WRITEYUY2
1331 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1332 "m" (yalpha1), "m" (uvalpha1)
1333 : "%eax"
1334 );
1335 return;
1207 } 1336 }
1208 } 1337 }
1209 else 1338 else
1210 { 1339 {
1211 switch(dstFormat) 1340 switch(dstFormat)
1258 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), 1387 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1259 "m" (yalpha1), "m" (uvalpha1) 1388 "m" (yalpha1), "m" (uvalpha1)
1260 : "%eax" 1389 : "%eax"
1261 ); 1390 );
1262 return; 1391 return;
1392 case IMGFMT_YUY2:
1393 asm volatile(
1394 YSCALEYUV2PACKED1b
1395 WRITEYUY2
1396 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1397 "m" (yalpha1), "m" (uvalpha1)
1398 : "%eax"
1399 );
1400 return;
1263 } 1401 }
1264 } 1402 }
1265 #endif 1403 #endif
1266 if( uvalpha < 2048 ) 1404 if( uvalpha < 2048 )
1267 { 1405 {
1268 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_1_C) 1406 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C)
1269 }else{ 1407 }else{
1270 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_1B_C) 1408 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C)
1271 } 1409 }
1272 } 1410 }
1273 1411
1274 //FIXME yuy2* can read upto 7 samples to much 1412 //FIXME yuy2* can read upto 7 samples to much
1275 1413
2531 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); 2669 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
2532 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB 2670 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB
2533 { 2671 {
2534 int chrAlpha= vChrFilter[2*dstY+1]; 2672 int chrAlpha= vChrFilter[2*dstY+1];
2535 2673
2536 RENAME(yuv2rgb1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), 2674 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
2537 dest, dstW, chrAlpha, dstFormat, flags, dstY); 2675 dest, dstW, chrAlpha, dstFormat, flags, dstY);
2538 } 2676 }
2539 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB 2677 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB
2540 { 2678 {
2541 int lumAlpha= vLumFilter[2*dstY+1]; 2679 int lumAlpha= vLumFilter[2*dstY+1];
2542 int chrAlpha= vChrFilter[2*dstY+1]; 2680 int chrAlpha= vChrFilter[2*dstY+1];
2543 2681
2544 RENAME(yuv2rgb2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), 2682 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
2545 dest, dstW, lumAlpha, chrAlpha, dstY); 2683 dest, dstW, lumAlpha, chrAlpha, dstY);
2546 } 2684 }
2547 else //General RGB 2685 else //General RGB
2548 { 2686 {
2549 RENAME(yuv2rgbX)(c, 2687 RENAME(yuv2packedX)(c,
2550 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, 2688 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2551 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, 2689 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2552 dest, dstW, 2690 dest, dstW,
2553 lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+dstY*vChrFilterSize*4, dstY); 2691 lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+dstY*vChrFilterSize*4, dstY);
2554 } 2692 }
2569 } 2707 }
2570 else 2708 else
2571 { 2709 {
2572 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); 2710 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2573 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); 2711 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
2574 yuv2rgbXinC(c, 2712 yuv2packedXinC(c,
2575 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, 2713 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2576 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, 2714 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2577 dest, dstW, dstY); 2715 dest, dstW, dstY);
2578 } 2716 }
2579 } 2717 }