comparison postproc/rgb2rgb_template.c @ 2702:440312d953a8

yv12toyuy2 in MMX cleanup
author michael
date Mon, 05 Nov 2001 01:21:05 +0000
parents 9b47bc409083
children b4c6699d3893
comparison
equal deleted inserted replaced
2701:9b47bc409083 2702:440312d953a8
1 /* 1 /*
2 * 2 *
3 * rgb2rgb.c, Software RGB to RGB convertor 3 * rgb2rgb.c, Software RGB to RGB convertor
4 * Written by Nick Kurshev. 4 * Written by Nick Kurshev.
5 * palette stuff & yuv stuff by Michael
5 */ 6 */
6 #include <inttypes.h> 7 #include <inttypes.h>
7 #include "../config.h" 8 #include "../config.h"
8 #include "rgb2rgb.h" 9 #include "rgb2rgb.h"
9 #include "../mmx_defs.h" 10 #include "../mmx_defs.h"
183 } 184 }
184 185
185 /** 186 /**
186 * Pallete is assumed to contain bgr32 187 * Pallete is assumed to contain bgr32
187 */ 188 */
188 void palette8torgb32(uint8_t *src, uint8_t *dst, int src_size, uint8_t *palette) 189 void palette8torgb32(uint8_t *src, uint8_t *dst, int num_pixels, uint8_t *palette)
189 { 190 {
190 int i; 191 int i;
191 for(i=0; i<src_size; i++) 192 for(i=0; i<num_pixels; i++)
192 ((uint32_t *)dst)[i] = ((uint32_t *)palette)[ src[i] ]; 193 ((uint32_t *)dst)[i] = ((uint32_t *)palette)[ src[i] ];
193 } 194 }
194 195
195 /** 196 /**
196 * Pallete is assumed to contain bgr32 197 * Pallete is assumed to contain bgr32
197 */ 198 */
198 void palette8torgb24(uint8_t *src, uint8_t *dst, int src_size, uint8_t *palette) 199 void palette8torgb24(uint8_t *src, uint8_t *dst, int num_pixels, uint8_t *palette)
199 { 200 {
200 int i; 201 int i;
201 /* 202 /*
202 writes 1 byte o much and might cause alignment issues on some architectures? 203 writes 1 byte o much and might cause alignment issues on some architectures?
203 for(i=0; i<src_size; i++) 204 for(i=0; i<num_pixels; i++)
204 ((uint32_t *)(&dst[i*3])) = ((uint32_t *)palette)[ src[i] ]; 205 ((uint32_t *)(&dst[i*3])) = ((uint32_t *)palette)[ src[i] ];
205 */ 206 */
206 for(i=0; i<src_size; i++) 207 for(i=0; i<num_pixels; i++)
207 { 208 {
208 //FIXME slow? 209 //FIXME slow?
209 dst[0]= palette[ src[i]*4+0 ]; 210 dst[0]= palette[ src[i]*4+0 ];
210 dst[1]= palette[ src[i]*4+1 ]; 211 dst[1]= palette[ src[i]*4+1 ];
211 dst[2]= palette[ src[i]*4+2 ]; 212 dst[2]= palette[ src[i]*4+2 ];
212 dst+= 3; 213 dst+= 3;
213 } 214 }
214 } 215 }
215 216
216 void rgb32to16(uint8_t *src, uint8_t *dst, int src_size) 217 void rgb32to16(uint8_t *src, uint8_t *dst, int num_pixels)
217 { 218 {
218 int i; 219 int i;
219 for(i=0; i<src_size; i+=4) 220 for(i=0; i<num_pixels; i+=4)
220 { 221 {
221 const int b= src[i+0]; 222 const int b= src[i+0];
222 const int g= src[i+1]; 223 const int g= src[i+1];
223 const int r= src[i+2]; 224 const int r= src[i+2];
224 225
225 ((uint16_t *)dst)[i]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); 226 ((uint16_t *)dst)[i]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
226 } 227 }
227 } 228 }
228 229
229 void rgb32to15(uint8_t *src, uint8_t *dst, int src_size) 230 void rgb32to15(uint8_t *src, uint8_t *dst, int num_pixels)
230 { 231 {
231 int i; 232 int i;
232 for(i=0; i<src_size; i+=4) 233 for(i=0; i<num_pixels; i+=4)
233 { 234 {
234 const int b= src[i+0]; 235 const int b= src[i+0];
235 const int g= src[i+1]; 236 const int g= src[i+1];
236 const int r= src[i+2]; 237 const int r= src[i+2];
237 238
241 242
242 243
243 /** 244 /**
244 * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette 245 * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette
245 */ 246 */
246 void palette8torgb16(uint8_t *src, uint8_t *dst, int src_size, uint8_t *palette) 247 void palette8torgb16(uint8_t *src, uint8_t *dst, int num_pixels, uint8_t *palette)
247 { 248 {
248 int i; 249 int i;
249 for(i=0; i<src_size; i++) 250 for(i=0; i<num_pixels; i++)
250 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ]; 251 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
251 } 252 }
252 253
253 /** 254 /**
254 * Pallete is assumed to contain bgr15, see rgb32to15 to convert the palette 255 * Pallete is assumed to contain bgr15, see rgb32to15 to convert the palette
255 */ 256 */
256 void palette8torgb15(uint8_t *src, uint8_t *dst, int src_size, uint8_t *palette) 257 void palette8torgb15(uint8_t *src, uint8_t *dst, int num_pixels, uint8_t *palette)
257 { 258 {
258 int i; 259 int i;
259 for(i=0; i<src_size; i++) 260 for(i=0; i<num_pixels; i++)
260 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ]; 261 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
261 } 262 }
262 263 /**
263 void yv12toyuy2(uint8_t *ysrc, uint8_t *usrc, uint8_t *vsrc, uint8_t *dst, int src_size) 264 *
264 { 265 * num_pixels must be a multiple of 16 for the MMX version
265 int i; 266 */
266 src_size>>=1; 267 void yv12toyuy2(uint8_t *ysrc, uint8_t *usrc, uint8_t *vsrc, uint8_t *dst, int num_pixels)
267 for(i=0; i<src_size; i++) 268 {
269 #ifdef HAVE_MMX
270 asm volatile(
271 "xorl %%eax, %%eax \n\t"
272 "1: \n\t"
273 PREFETCH" 32(%1, %%eax, 2) \n\t"
274 PREFETCH" 32(%2, %%eax) \n\t"
275 PREFETCH" 32(%3, %%eax) \n\t"
276 "movq (%2, %%eax), %%mm0 \n\t" // U(0)
277 "movq %%mm0, %%mm2 \n\t" // U(0)
278 "movq (%3, %%eax), %%mm1 \n\t" // V(0)
279 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
280 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
281
282 "movq (%1, %%eax,2), %%mm3 \n\t" // Y(0)
283 "movq 8(%1, %%eax,2), %%mm5 \n\t" // Y(8)
284 "movq %%mm3, %%mm4 \n\t" // Y(0)
285 "movq %%mm5, %%mm6 \n\t" // Y(8)
286 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
287 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4)
288 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
289 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
290
291 MOVNTQ" %%mm3, (%0, %%eax, 4) \n\t"
292 MOVNTQ" %%mm4, 8(%0, %%eax, 4) \n\t"
293 MOVNTQ" %%mm5, 16(%0, %%eax, 4) \n\t"
294 MOVNTQ" %%mm6, 24(%0, %%eax, 4) \n\t"
295
296 "addl $8, %%eax \n\t"
297 "cmpl %4, %%eax \n\t"
298 " jb 1b \n\t"
299 EMMS" \n\t"
300 SFENCE
301 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (num_pixels>>1)
302 : "memory", "%eax"
303 );
304
305 #else
306 int i;
307 num_pixels>>=1;
308 for(i=0; i<num_pixels; i++)
268 { 309 {
269 dst[4*i+0] = ysrc[2*i+0]; 310 dst[4*i+0] = ysrc[2*i+0];
270 dst[4*i+1] = usrc[i]; 311 dst[4*i+1] = usrc[i];
271 dst[4*i+2] = ysrc[2*i+1]; 312 dst[4*i+2] = ysrc[2*i+1];
272 dst[4*i+3] = vsrc[i]; 313 dst[4*i+3] = vsrc[i];
273 } 314 }
274 315 #endif
275 } 316 }
276 317
277 void yuy2toyv12(uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int src_size) 318 void yuy2toyv12(uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int num_pixels)
278 { 319 {
279 int i; 320 int i;
280 src_size>>=1; 321 num_pixels>>=1;
281 for(i=0; i<src_size; i++) 322 for(i=0; i<num_pixels; i++)
282 { 323 {
283 ydst[2*i+0] = src[4*i+0]; 324 ydst[2*i+0] = src[4*i+0];
284 udst[i] = src[4*i+1]; 325 udst[i] = src[4*i+1];
285 ydst[2*i+1] = src[4*i+2]; 326 ydst[2*i+1] = src[4*i+2];
286 vdst[i] = src[4*i+3]; 327 vdst[i] = src[4*i+3];