Mercurial > mplayer.hg
comparison libswscale/yuv2rgb_template.c @ 27158:65b8334df960
spelling/grammar/wording overhaul
author | diego |
---|---|
date | Fri, 04 Jul 2008 13:49:45 +0000 |
parents | 383b83bd14c6 |
children | 29d9ec9b1f9d |
comparison
equal
deleted
inserted
replaced
27157:e2797c291ba9 | 27158:65b8334df960 |
---|---|
1 /* | 1 /* |
2 * yuv2rgb_mmx.c, Software YUV to RGB converter with Intel MMX "technology" | 2 * yuv2rgb_mmx.c, software YUV to RGB converter with Intel MMX "technology" |
3 * | 3 * |
4 * Copyright (C) 2000, Silicon Integrated System Corp. | 4 * Copyright (C) 2000, Silicon Integrated System Corp. |
5 * | 5 * |
6 * Author: Olie Lho <ollie@sis.com.tw> | 6 * Author: Olie Lho <ollie@sis.com.tw> |
7 * | 7 * |
29 #undef MOVNTQ | 29 #undef MOVNTQ |
30 #undef EMMS | 30 #undef EMMS |
31 #undef SFENCE | 31 #undef SFENCE |
32 | 32 |
33 #ifdef HAVE_3DNOW | 33 #ifdef HAVE_3DNOW |
34 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | 34 /* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */ |
35 #define EMMS "femms" | 35 #define EMMS "femms" |
36 #else | 36 #else |
37 #define EMMS "emms" | 37 #define EMMS "emms" |
38 #endif | 38 #endif |
39 | 39 |
145 | 145 |
146 b5Dither= ff_dither8[y&1]; | 146 b5Dither= ff_dither8[y&1]; |
147 g6Dither= ff_dither4[y&1]; | 147 g6Dither= ff_dither4[y&1]; |
148 g5Dither= ff_dither8[y&1]; | 148 g5Dither= ff_dither8[y&1]; |
149 r5Dither= ff_dither8[(y+1)&1]; | 149 r5Dither= ff_dither8[(y+1)&1]; |
150 /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8 | 150 /* This MMX assembly code deals with a SINGLE scan line at a time, |
151 pixels in each iteration */ | 151 * it converts 8 pixels in each iteration. */ |
152 asm volatile ( | 152 asm volatile ( |
153 /* load data for start of next scan line */ | 153 /* load data for start of next scan line */ |
154 "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ | 154 "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ |
155 "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ | 155 "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ |
156 "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ | 156 "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ |
157 //".balign 16 \n\t" | 157 //".balign 16 \n\t" |
158 "1: \n\t" | 158 "1: \n\t" |
159 /* no speed diference on my p3@500 with prefetch, | 159 /* No speed difference on my p3@500 with prefetch, |
160 * if it is faster for anyone with -benchmark then tell me | 160 * if it is faster for anyone with -benchmark then tell me. |
161 PREFETCH" 64(%0) \n\t" | 161 PREFETCH" 64(%0) \n\t" |
162 PREFETCH" 64(%1) \n\t" | 162 PREFETCH" 64(%1) \n\t" |
163 PREFETCH" 64(%2) \n\t" | 163 PREFETCH" 64(%2) \n\t" |
164 */ | 164 */ |
165 YUV2RGB | 165 YUV2RGB |
178 "pxor %%mm4, %%mm4;" /* zero mm4 */ | 178 "pxor %%mm4, %%mm4;" /* zero mm4 */ |
179 | 179 |
180 "movq %%mm0, %%mm5;" /* Copy B7-B0 */ | 180 "movq %%mm0, %%mm5;" /* Copy B7-B0 */ |
181 "movq %%mm2, %%mm7;" /* Copy G7-G0 */ | 181 "movq %%mm2, %%mm7;" /* Copy G7-G0 */ |
182 | 182 |
183 /* convert rgb24 plane to rgb16 pack for pixel 0-3 */ | 183 /* convert RGB24 plane to RGB16 pack for pixel 0-3 */ |
184 "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */ | 184 "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */ |
185 "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */ | 185 "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */ |
186 | 186 |
187 "psllw $3, %%mm2;" /* 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 */ | 187 "psllw $3, %%mm2;" /* 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 */ |
188 "por %%mm2, %%mm0;" /* r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 */ | 188 "por %%mm2, %%mm0;" /* r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 */ |
189 | 189 |
190 "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ | 190 "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ |
191 MOVNTQ " %%mm0, (%1);" /* store pixel 0-3 */ | 191 MOVNTQ " %%mm0, (%1);" /* store pixel 0-3 */ |
192 | 192 |
193 /* convert rgb24 plane to rgb16 pack for pixel 0-3 */ | 193 /* convert RGB24 plane to RGB16 pack for pixel 0-3 */ |
194 "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */ | 194 "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */ |
195 "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */ | 195 "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */ |
196 | 196 |
197 "psllw $3, %%mm7;" /* 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 */ | 197 "psllw $3, %%mm7;" /* 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 */ |
198 "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ | 198 "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ |
240 | 240 |
241 b5Dither= ff_dither8[y&1]; | 241 b5Dither= ff_dither8[y&1]; |
242 g6Dither= ff_dither4[y&1]; | 242 g6Dither= ff_dither4[y&1]; |
243 g5Dither= ff_dither8[y&1]; | 243 g5Dither= ff_dither8[y&1]; |
244 r5Dither= ff_dither8[(y+1)&1]; | 244 r5Dither= ff_dither8[(y+1)&1]; |
245 /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8 | 245 /* This MMX assembly code deals with a SINGLE scan line at a time, |
246 pixels in each iteration */ | 246 * it converts 8 pixels in each iteration. */ |
247 asm volatile ( | 247 asm volatile ( |
248 /* load data for start of next scan line */ | 248 /* load data for start of next scan line */ |
249 "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ | 249 "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ |
250 "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ | 250 "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ |
251 "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ | 251 "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ |
269 "pxor %%mm4, %%mm4;" /* zero mm4 */ | 269 "pxor %%mm4, %%mm4;" /* zero mm4 */ |
270 | 270 |
271 "movq %%mm0, %%mm5;" /* Copy B7-B0 */ | 271 "movq %%mm0, %%mm5;" /* Copy B7-B0 */ |
272 "movq %%mm2, %%mm7;" /* Copy G7-G0 */ | 272 "movq %%mm2, %%mm7;" /* Copy G7-G0 */ |
273 | 273 |
274 /* convert rgb24 plane to rgb16 pack for pixel 0-3 */ | 274 /* convert RGB24 plane to RGB16 pack for pixel 0-3 */ |
275 "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3_0_0_0 */ | 275 "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3_0_0_0 */ |
276 "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */ | 276 "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */ |
277 | 277 |
278 "psllw $2, %%mm2;" /* 0_0_0_0 0_0_g7g6 g5g4g3_0 0_0_0_0 */ | 278 "psllw $2, %%mm2;" /* 0_0_0_0 0_0_g7g6 g5g4g3_0 0_0_0_0 */ |
279 "por %%mm2, %%mm0;" /* 0_r7r6r5 r4r3g7g6 g5g4g3b7 b6b5b4b3 */ | 279 "por %%mm2, %%mm0;" /* 0_r7r6r5 r4r3g7g6 g5g4g3b7 b6b5b4b3 */ |
280 | 280 |
281 "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ | 281 "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ |
282 MOVNTQ " %%mm0, (%1);" /* store pixel 0-3 */ | 282 MOVNTQ " %%mm0, (%1);" /* store pixel 0-3 */ |
283 | 283 |
284 /* convert rgb24 plane to rgb16 pack for pixel 0-3 */ | 284 /* convert RGB24 plane to RGB16 pack for pixel 0-3 */ |
285 "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 0_g7g6g5 g4g3_0_0 */ | 285 "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 0_g7g6g5 g4g3_0_0 */ |
286 "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */ | 286 "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */ |
287 | 287 |
288 "psllw $2, %%mm7;" /* 0_0_0_0 0_0_g7g6 g5g4g3_0 0_0_0_0 */ | 288 "psllw $2, %%mm7;" /* 0_0_0_0 0_0_g7g6 g5g4g3_0 0_0_0_0 */ |
289 "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ | 289 "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ |
324 uint8_t *py = src[0] + y*srcStride[0]; | 324 uint8_t *py = src[0] + y*srcStride[0]; |
325 uint8_t *pu = src[1] + (y>>1)*srcStride[1]; | 325 uint8_t *pu = src[1] + (y>>1)*srcStride[1]; |
326 uint8_t *pv = src[2] + (y>>1)*srcStride[2]; | 326 uint8_t *pv = src[2] + (y>>1)*srcStride[2]; |
327 long index= -h_size/2; | 327 long index= -h_size/2; |
328 | 328 |
329 /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8 | 329 /* This MMX assembly code deals with a SINGLE scan line at a time, |
330 pixels in each iteration */ | 330 * it converts 8 pixels in each iteration. */ |
331 asm volatile ( | 331 asm volatile ( |
332 /* load data for start of next scan line */ | 332 /* load data for start of next scan line */ |
333 "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ | 333 "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ |
334 "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ | 334 "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ |
335 "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ | 335 "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ |
470 uint8_t *py = src[0] + y*srcStride[0]; | 470 uint8_t *py = src[0] + y*srcStride[0]; |
471 uint8_t *pu = src[1] + (y>>1)*srcStride[1]; | 471 uint8_t *pu = src[1] + (y>>1)*srcStride[1]; |
472 uint8_t *pv = src[2] + (y>>1)*srcStride[2]; | 472 uint8_t *pv = src[2] + (y>>1)*srcStride[2]; |
473 long index= -h_size/2; | 473 long index= -h_size/2; |
474 | 474 |
475 /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8 | 475 /* This MMX assembly code deals with a SINGLE scan line at a time, |
476 pixels in each iteration */ | 476 * it converts 8 pixels in each iteration. */ |
477 asm volatile ( | 477 asm volatile ( |
478 /* load data for start of next scan line */ | 478 /* load data for start of next scan line */ |
479 "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ | 479 "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ |
480 "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ | 480 "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ |
481 "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ | 481 "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ |