Mercurial > mplayer.hg
comparison postproc/yuv2rgb_template.c @ 3143:86910f54c391
runtime cpu detection
author | michael |
---|---|
date | Mon, 26 Nov 2001 21:17:23 +0000 |
parents | 2cbecedb2616 |
children | 20806e535b96 |
comparison
equal
deleted
inserted
replaced
3142:0f6cce3a8059 | 3143:86910f54c391 |
---|---|
22 * You should have received a copy of the GNU General Public License | 22 * You should have received a copy of the GNU General Public License |
23 * along with GNU Make; see the file COPYING. If not, write to | 23 * along with GNU Make; see the file COPYING. If not, write to |
24 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | 24 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
25 * | 25 * |
26 * 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at) | 26 * 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at) |
27 * MMX/MMX2 Template stuff from Michael Niedermayer (needed for fast movntq support) | |
27 */ | 28 */ |
28 | 29 |
29 #include <stdio.h> | 30 #undef MOVNTQ |
30 #include <stdlib.h> | 31 #undef EMMS |
31 | 32 #undef SFENCE |
32 #include "../config.h" | 33 |
33 | 34 #ifdef HAVE_3DNOW |
34 //#include "libmpeg2/mpeg2.h" | 35 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ |
35 //#include "libmpeg2/mpeg2_internal.h" | 36 #define EMMS "femms" |
36 #include <inttypes.h> | 37 #else |
37 | 38 #define EMMS "emms" |
38 #include "rgb2rgb.h" | 39 #endif |
39 #include "../mmx_defs.h" | 40 |
40 | 41 #ifdef HAVE_MMX2 |
41 #define DITHER1XBPP | 42 #define MOVNTQ "movntq" |
42 | 43 #define SFENCE "sfence" |
43 /* hope these constant values are cache line aligned */ | 44 #else |
44 uint64_t __attribute__((aligned(8))) mmx_80w = 0x0080008000800080; | 45 #define MOVNTQ "movq" |
45 uint64_t __attribute__((aligned(8))) mmx_10w = 0x1010101010101010; | 46 #define SFENCE "/nop" |
46 uint64_t __attribute__((aligned(8))) mmx_00ffw = 0x00ff00ff00ff00ff; | 47 #endif |
47 uint64_t __attribute__((aligned(8))) mmx_Y_coeff = 0x253f253f253f253f; | |
48 | |
49 /* hope these constant values are cache line aligned */ | |
50 uint64_t __attribute__((aligned(8))) mmx_U_green = 0xf37df37df37df37d; | |
51 uint64_t __attribute__((aligned(8))) mmx_U_blue = 0x4093409340934093; | |
52 uint64_t __attribute__((aligned(8))) mmx_V_red = 0x3312331233123312; | |
53 uint64_t __attribute__((aligned(8))) mmx_V_green = 0xe5fce5fce5fce5fc; | |
54 | |
55 /* hope these constant values are cache line aligned */ | |
56 uint64_t __attribute__((aligned(8))) mmx_redmask = 0xf8f8f8f8f8f8f8f8; | |
57 uint64_t __attribute__((aligned(8))) mmx_grnmask = 0xfcfcfcfcfcfcfcfc; | |
58 | |
59 uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL; | |
60 uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL; | |
61 uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL; | |
62 | |
63 // the volatile is required because gcc otherwise optimizes some writes away not knowing that these | |
64 // are read in the asm block | |
65 volatile uint64_t __attribute__((aligned(8))) b5Dither; | |
66 volatile uint64_t __attribute__((aligned(8))) g5Dither; | |
67 volatile uint64_t __attribute__((aligned(8))) g6Dither; | |
68 volatile uint64_t __attribute__((aligned(8))) r5Dither; | |
69 | |
70 uint64_t __attribute__((aligned(8))) dither4[2]={ | |
71 0x0103010301030103LL, | |
72 0x0200020002000200LL,}; | |
73 | |
74 uint64_t __attribute__((aligned(8))) dither8[2]={ | |
75 0x0602060206020602LL, | |
76 0x0004000400040004LL,}; | |
77 | |
78 | |
79 | 48 |
80 #define YUV2RGB \ | 49 #define YUV2RGB \ |
81 /* Do the multiply part of the conversion for even and odd pixels, | 50 /* Do the multiply part of the conversion for even and odd pixels, |
82 register usage: | 51 register usage: |
83 mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels, | 52 mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels, |
150 "punpcklbw %%mm3, %%mm0;" /* B7 B6 B5 B4 B3 B2 B1 B0 */\ | 119 "punpcklbw %%mm3, %%mm0;" /* B7 B6 B5 B4 B3 B2 B1 B0 */\ |
151 "punpcklbw %%mm4, %%mm1;" /* R7 R6 R5 R4 R3 R2 R1 R0 */\ | 120 "punpcklbw %%mm4, %%mm1;" /* R7 R6 R5 R4 R3 R2 R1 R0 */\ |
152 "punpcklbw %%mm5, %%mm2;" /* G7 G6 G5 G4 G3 G2 G1 G0 */\ | 121 "punpcklbw %%mm5, %%mm2;" /* G7 G6 G5 G4 G3 G2 G1 G0 */\ |
153 | 122 |
154 | 123 |
155 static void yuv420_rgb16_mmx (uint8_t * image, uint8_t * py, | 124 static inline void RENAME(yuv420_rgb16) (uint8_t * image, uint8_t * py, |
156 uint8_t * pu, uint8_t * pv, | 125 uint8_t * pu, uint8_t * pv, |
157 int h_size, int v_size, | 126 int h_size, int v_size, |
158 int rgb_stride, int y_stride, int uv_stride) | 127 int rgb_stride, int y_stride, int uv_stride) |
159 { | 128 { |
160 int even = 1; | 129 int even = 1; |
251 } | 220 } |
252 | 221 |
253 __asm__ __volatile__ (EMMS); | 222 __asm__ __volatile__ (EMMS); |
254 } | 223 } |
255 | 224 |
256 static void yuv420_rgb15_mmx (uint8_t * image, uint8_t * py, | 225 static inline void RENAME(yuv420_rgb15) (uint8_t * image, uint8_t * py, |
257 uint8_t * pu, uint8_t * pv, | 226 uint8_t * pu, uint8_t * pv, |
258 int h_size, int v_size, | 227 int h_size, int v_size, |
259 int rgb_stride, int y_stride, int uv_stride) | 228 int rgb_stride, int y_stride, int uv_stride) |
260 { | 229 { |
261 int even = 1; | 230 int even = 1; |
348 } | 317 } |
349 | 318 |
350 __asm__ __volatile__ (EMMS); | 319 __asm__ __volatile__ (EMMS); |
351 } | 320 } |
352 | 321 |
353 static void yuv420_rgb24_mmx (uint8_t * image, uint8_t * py, | 322 static inline void RENAME(yuv420_rgb24) (uint8_t * image, uint8_t * py, |
354 uint8_t * pu, uint8_t * pv, | 323 uint8_t * pu, uint8_t * pv, |
355 int h_size, int v_size, | 324 int h_size, int v_size, |
356 int rgb_stride, int y_stride, int uv_stride) | 325 int rgb_stride, int y_stride, int uv_stride) |
357 { | 326 { |
358 int even = 1; | 327 int even = 1; |
503 | 472 |
504 __asm__ __volatile__ (EMMS); | 473 __asm__ __volatile__ (EMMS); |
505 } | 474 } |
506 | 475 |
507 | 476 |
508 static void yuv420_argb32_mmx (uint8_t * image, uint8_t * py, | 477 static inline void RENAME(yuv420_argb32) (uint8_t * image, uint8_t * py, |
509 uint8_t * pu, uint8_t * pv, | 478 uint8_t * pu, uint8_t * pv, |
510 int h_size, int v_size, | 479 int h_size, int v_size, |
511 int rgb_stride, int y_stride, int uv_stride) | 480 int rgb_stride, int y_stride, int uv_stride) |
512 { | 481 { |
513 int even = 1; | 482 int even = 1; |
597 } | 566 } |
598 | 567 |
599 __asm__ __volatile__ (EMMS); | 568 __asm__ __volatile__ (EMMS); |
600 } | 569 } |
601 | 570 |
602 yuv2rgb_fun yuv2rgb_init_mmx (int bpp, int mode) | 571 yuv2rgb_fun RENAME(yuv2rgb_init) (int bpp, int mode) |
603 { | 572 { |
604 if (bpp == 15 && mode == MODE_RGB) return yuv420_rgb15_mmx; | 573 if (bpp == 15 && mode == MODE_RGB) return RENAME(yuv420_rgb15); |
605 if (bpp == 16 && mode == MODE_RGB) return yuv420_rgb16_mmx; | 574 if (bpp == 16 && mode == MODE_RGB) return RENAME(yuv420_rgb16); |
606 if (bpp == 24 && mode == MODE_RGB) return yuv420_rgb24_mmx; | 575 if (bpp == 24 && mode == MODE_RGB) return RENAME(yuv420_rgb24); |
607 if (bpp == 32 && mode == MODE_RGB) return yuv420_argb32_mmx; | 576 if (bpp == 32 && mode == MODE_RGB) return RENAME(yuv420_argb32); |
608 return NULL; // Fallback to C. | 577 return NULL; // Fallback to C. |
609 } | 578 } |
610 | 579 |