Mercurial > mplayer.hg
comparison postproc/rgb2rgb_template.c @ 2702:440312d953a8
yv12toyuy2 in MMX
cleanup
author | michael |
---|---|
date | Mon, 05 Nov 2001 01:21:05 +0000 |
parents | 9b47bc409083 |
children | b4c6699d3893 |
comparison
equal
deleted
inserted
replaced
2701:9b47bc409083 | 2702:440312d953a8 |
---|---|
1 /* | 1 /* |
2 * | 2 * |
3 * rgb2rgb.c, Software RGB to RGB convertor | 3 * rgb2rgb.c, Software RGB to RGB convertor |
4 * Written by Nick Kurshev. | 4 * Written by Nick Kurshev. |
5 * palette stuff & yuv stuff by Michael | |
5 */ | 6 */ |
6 #include <inttypes.h> | 7 #include <inttypes.h> |
7 #include "../config.h" | 8 #include "../config.h" |
8 #include "rgb2rgb.h" | 9 #include "rgb2rgb.h" |
9 #include "../mmx_defs.h" | 10 #include "../mmx_defs.h" |
183 } | 184 } |
184 | 185 |
185 /** | 186 /** |
186 * Pallete is assumed to contain bgr32 | 187 * Pallete is assumed to contain bgr32 |
187 */ | 188 */ |
188 void palette8torgb32(uint8_t *src, uint8_t *dst, int src_size, uint8_t *palette) | 189 void palette8torgb32(uint8_t *src, uint8_t *dst, int num_pixels, uint8_t *palette) |
189 { | 190 { |
190 int i; | 191 int i; |
191 for(i=0; i<src_size; i++) | 192 for(i=0; i<num_pixels; i++) |
192 ((uint32_t *)dst)[i] = ((uint32_t *)palette)[ src[i] ]; | 193 ((uint32_t *)dst)[i] = ((uint32_t *)palette)[ src[i] ]; |
193 } | 194 } |
194 | 195 |
195 /** | 196 /** |
196 * Pallete is assumed to contain bgr32 | 197 * Pallete is assumed to contain bgr32 |
197 */ | 198 */ |
198 void palette8torgb24(uint8_t *src, uint8_t *dst, int src_size, uint8_t *palette) | 199 void palette8torgb24(uint8_t *src, uint8_t *dst, int num_pixels, uint8_t *palette) |
199 { | 200 { |
200 int i; | 201 int i; |
201 /* | 202 /* |
202 writes 1 byte o much and might cause alignment issues on some architectures? | 203 writes 1 byte o much and might cause alignment issues on some architectures? |
203 for(i=0; i<src_size; i++) | 204 for(i=0; i<num_pixels; i++) |
204 ((uint32_t *)(&dst[i*3])) = ((uint32_t *)palette)[ src[i] ]; | 205 ((uint32_t *)(&dst[i*3])) = ((uint32_t *)palette)[ src[i] ]; |
205 */ | 206 */ |
206 for(i=0; i<src_size; i++) | 207 for(i=0; i<num_pixels; i++) |
207 { | 208 { |
208 //FIXME slow? | 209 //FIXME slow? |
209 dst[0]= palette[ src[i]*4+0 ]; | 210 dst[0]= palette[ src[i]*4+0 ]; |
210 dst[1]= palette[ src[i]*4+1 ]; | 211 dst[1]= palette[ src[i]*4+1 ]; |
211 dst[2]= palette[ src[i]*4+2 ]; | 212 dst[2]= palette[ src[i]*4+2 ]; |
212 dst+= 3; | 213 dst+= 3; |
213 } | 214 } |
214 } | 215 } |
215 | 216 |
216 void rgb32to16(uint8_t *src, uint8_t *dst, int src_size) | 217 void rgb32to16(uint8_t *src, uint8_t *dst, int num_pixels) |
217 { | 218 { |
218 int i; | 219 int i; |
219 for(i=0; i<src_size; i+=4) | 220 for(i=0; i<num_pixels; i+=4) |
220 { | 221 { |
221 const int b= src[i+0]; | 222 const int b= src[i+0]; |
222 const int g= src[i+1]; | 223 const int g= src[i+1]; |
223 const int r= src[i+2]; | 224 const int r= src[i+2]; |
224 | 225 |
225 ((uint16_t *)dst)[i]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | 226 ((uint16_t *)dst)[i]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
226 } | 227 } |
227 } | 228 } |
228 | 229 |
229 void rgb32to15(uint8_t *src, uint8_t *dst, int src_size) | 230 void rgb32to15(uint8_t *src, uint8_t *dst, int num_pixels) |
230 { | 231 { |
231 int i; | 232 int i; |
232 for(i=0; i<src_size; i+=4) | 233 for(i=0; i<num_pixels; i+=4) |
233 { | 234 { |
234 const int b= src[i+0]; | 235 const int b= src[i+0]; |
235 const int g= src[i+1]; | 236 const int g= src[i+1]; |
236 const int r= src[i+2]; | 237 const int r= src[i+2]; |
237 | 238 |
241 | 242 |
242 | 243 |
243 /** | 244 /** |
244 * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette | 245 * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette |
245 */ | 246 */ |
246 void palette8torgb16(uint8_t *src, uint8_t *dst, int src_size, uint8_t *palette) | 247 void palette8torgb16(uint8_t *src, uint8_t *dst, int num_pixels, uint8_t *palette) |
247 { | 248 { |
248 int i; | 249 int i; |
249 for(i=0; i<src_size; i++) | 250 for(i=0; i<num_pixels; i++) |
250 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ]; | 251 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ]; |
251 } | 252 } |
252 | 253 |
253 /** | 254 /** |
254 * Pallete is assumed to contain bgr15, see rgb32to15 to convert the palette | 255 * Pallete is assumed to contain bgr15, see rgb32to15 to convert the palette |
255 */ | 256 */ |
256 void palette8torgb15(uint8_t *src, uint8_t *dst, int src_size, uint8_t *palette) | 257 void palette8torgb15(uint8_t *src, uint8_t *dst, int num_pixels, uint8_t *palette) |
257 { | 258 { |
258 int i; | 259 int i; |
259 for(i=0; i<src_size; i++) | 260 for(i=0; i<num_pixels; i++) |
260 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ]; | 261 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ]; |
261 } | 262 } |
262 | 263 /** |
263 void yv12toyuy2(uint8_t *ysrc, uint8_t *usrc, uint8_t *vsrc, uint8_t *dst, int src_size) | 264 * |
264 { | 265 * num_pixels must be a multiple of 16 for the MMX version |
265 int i; | 266 */ |
266 src_size>>=1; | 267 void yv12toyuy2(uint8_t *ysrc, uint8_t *usrc, uint8_t *vsrc, uint8_t *dst, int num_pixels) |
267 for(i=0; i<src_size; i++) | 268 { |
269 #ifdef HAVE_MMX | |
270 asm volatile( | |
271 "xorl %%eax, %%eax \n\t" | |
272 "1: \n\t" | |
273 PREFETCH" 32(%1, %%eax, 2) \n\t" | |
274 PREFETCH" 32(%2, %%eax) \n\t" | |
275 PREFETCH" 32(%3, %%eax) \n\t" | |
276 "movq (%2, %%eax), %%mm0 \n\t" // U(0) | |
277 "movq %%mm0, %%mm2 \n\t" // U(0) | |
278 "movq (%3, %%eax), %%mm1 \n\t" // V(0) | |
279 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
280 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) | |
281 | |
282 "movq (%1, %%eax,2), %%mm3 \n\t" // Y(0) | |
283 "movq 8(%1, %%eax,2), %%mm5 \n\t" // Y(8) | |
284 "movq %%mm3, %%mm4 \n\t" // Y(0) | |
285 "movq %%mm5, %%mm6 \n\t" // Y(8) | |
286 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0) | |
287 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4) | |
288 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) | |
289 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) | |
290 | |
291 MOVNTQ" %%mm3, (%0, %%eax, 4) \n\t" | |
292 MOVNTQ" %%mm4, 8(%0, %%eax, 4) \n\t" | |
293 MOVNTQ" %%mm5, 16(%0, %%eax, 4) \n\t" | |
294 MOVNTQ" %%mm6, 24(%0, %%eax, 4) \n\t" | |
295 | |
296 "addl $8, %%eax \n\t" | |
297 "cmpl %4, %%eax \n\t" | |
298 " jb 1b \n\t" | |
299 EMMS" \n\t" | |
300 SFENCE | |
301 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (num_pixels>>1) | |
302 : "memory", "%eax" | |
303 ); | |
304 | |
305 #else | |
306 int i; | |
307 num_pixels>>=1; | |
308 for(i=0; i<num_pixels; i++) | |
268 { | 309 { |
269 dst[4*i+0] = ysrc[2*i+0]; | 310 dst[4*i+0] = ysrc[2*i+0]; |
270 dst[4*i+1] = usrc[i]; | 311 dst[4*i+1] = usrc[i]; |
271 dst[4*i+2] = ysrc[2*i+1]; | 312 dst[4*i+2] = ysrc[2*i+1]; |
272 dst[4*i+3] = vsrc[i]; | 313 dst[4*i+3] = vsrc[i]; |
273 } | 314 } |
274 | 315 #endif |
275 } | 316 } |
276 | 317 |
277 void yuy2toyv12(uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int src_size) | 318 void yuy2toyv12(uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int num_pixels) |
278 { | 319 { |
279 int i; | 320 int i; |
280 src_size>>=1; | 321 num_pixels>>=1; |
281 for(i=0; i<src_size; i++) | 322 for(i=0; i<num_pixels; i++) |
282 { | 323 { |
283 ydst[2*i+0] = src[4*i+0]; | 324 ydst[2*i+0] = src[4*i+0]; |
284 udst[i] = src[4*i+1]; | 325 udst[i] = src[4*i+1]; |
285 ydst[2*i+1] = src[4*i+2]; | 326 ydst[2*i+1] = src[4*i+2]; |
286 vdst[i] = src[4*i+3]; | 327 vdst[i] = src[4*i+3]; |