comparison postproc/yuv2rgb_template.c @ 3143:86910f54c391

runtime cpu detection
author michael
date Mon, 26 Nov 2001 21:17:23 +0000
parents 2cbecedb2616
children 20806e535b96
comparison
equal deleted inserted replaced
3142:0f6cce3a8059 3143:86910f54c391
22 * You should have received a copy of the GNU General Public License 22 * You should have received a copy of the GNU General Public License
23 * along with GNU Make; see the file COPYING. If not, write to 23 * along with GNU Make; see the file COPYING. If not, write to
24 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 24 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
25 * 25 *
26 * 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at) 26 * 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at)
27 * MMX/MMX2 Template stuff from Michael Niedermayer (needed for fast movntq support)
27 */ 28 */
28 29
29 #include <stdio.h> 30 #undef MOVNTQ
30 #include <stdlib.h> 31 #undef EMMS
31 32 #undef SFENCE
32 #include "../config.h" 33
33 34 #ifdef HAVE_3DNOW
34 //#include "libmpeg2/mpeg2.h" 35 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
35 //#include "libmpeg2/mpeg2_internal.h" 36 #define EMMS "femms"
36 #include <inttypes.h> 37 #else
37 38 #define EMMS "emms"
38 #include "rgb2rgb.h" 39 #endif
39 #include "../mmx_defs.h" 40
40 41 #ifdef HAVE_MMX2
41 #define DITHER1XBPP 42 #define MOVNTQ "movntq"
42 43 #define SFENCE "sfence"
43 /* hope these constant values are cache line aligned */ 44 #else
44 uint64_t __attribute__((aligned(8))) mmx_80w = 0x0080008000800080; 45 #define MOVNTQ "movq"
45 uint64_t __attribute__((aligned(8))) mmx_10w = 0x1010101010101010; 46 #define SFENCE "/nop"
46 uint64_t __attribute__((aligned(8))) mmx_00ffw = 0x00ff00ff00ff00ff; 47 #endif
47 uint64_t __attribute__((aligned(8))) mmx_Y_coeff = 0x253f253f253f253f;
48
49 /* hope these constant values are cache line aligned */
50 uint64_t __attribute__((aligned(8))) mmx_U_green = 0xf37df37df37df37d;
51 uint64_t __attribute__((aligned(8))) mmx_U_blue = 0x4093409340934093;
52 uint64_t __attribute__((aligned(8))) mmx_V_red = 0x3312331233123312;
53 uint64_t __attribute__((aligned(8))) mmx_V_green = 0xe5fce5fce5fce5fc;
54
55 /* hope these constant values are cache line aligned */
56 uint64_t __attribute__((aligned(8))) mmx_redmask = 0xf8f8f8f8f8f8f8f8;
57 uint64_t __attribute__((aligned(8))) mmx_grnmask = 0xfcfcfcfcfcfcfcfc;
58
59 uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL;
60 uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL;
61 uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;
62
63 // the volatile is required because gcc otherwise optimizes some writes away not knowing that these
64 // are read in the asm block
65 volatile uint64_t __attribute__((aligned(8))) b5Dither;
66 volatile uint64_t __attribute__((aligned(8))) g5Dither;
67 volatile uint64_t __attribute__((aligned(8))) g6Dither;
68 volatile uint64_t __attribute__((aligned(8))) r5Dither;
69
70 uint64_t __attribute__((aligned(8))) dither4[2]={
71 0x0103010301030103LL,
72 0x0200020002000200LL,};
73
74 uint64_t __attribute__((aligned(8))) dither8[2]={
75 0x0602060206020602LL,
76 0x0004000400040004LL,};
77
78
79 48
80 #define YUV2RGB \ 49 #define YUV2RGB \
81 /* Do the multiply part of the conversion for even and odd pixels, 50 /* Do the multiply part of the conversion for even and odd pixels,
82 register usage: 51 register usage:
83 mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels, 52 mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
150 "punpcklbw %%mm3, %%mm0;" /* B7 B6 B5 B4 B3 B2 B1 B0 */\ 119 "punpcklbw %%mm3, %%mm0;" /* B7 B6 B5 B4 B3 B2 B1 B0 */\
151 "punpcklbw %%mm4, %%mm1;" /* R7 R6 R5 R4 R3 R2 R1 R0 */\ 120 "punpcklbw %%mm4, %%mm1;" /* R7 R6 R5 R4 R3 R2 R1 R0 */\
152 "punpcklbw %%mm5, %%mm2;" /* G7 G6 G5 G4 G3 G2 G1 G0 */\ 121 "punpcklbw %%mm5, %%mm2;" /* G7 G6 G5 G4 G3 G2 G1 G0 */\
153 122
154 123
155 static void yuv420_rgb16_mmx (uint8_t * image, uint8_t * py, 124 static inline void RENAME(yuv420_rgb16) (uint8_t * image, uint8_t * py,
156 uint8_t * pu, uint8_t * pv, 125 uint8_t * pu, uint8_t * pv,
157 int h_size, int v_size, 126 int h_size, int v_size,
158 int rgb_stride, int y_stride, int uv_stride) 127 int rgb_stride, int y_stride, int uv_stride)
159 { 128 {
160 int even = 1; 129 int even = 1;
251 } 220 }
252 221
253 __asm__ __volatile__ (EMMS); 222 __asm__ __volatile__ (EMMS);
254 } 223 }
255 224
256 static void yuv420_rgb15_mmx (uint8_t * image, uint8_t * py, 225 static inline void RENAME(yuv420_rgb15) (uint8_t * image, uint8_t * py,
257 uint8_t * pu, uint8_t * pv, 226 uint8_t * pu, uint8_t * pv,
258 int h_size, int v_size, 227 int h_size, int v_size,
259 int rgb_stride, int y_stride, int uv_stride) 228 int rgb_stride, int y_stride, int uv_stride)
260 { 229 {
261 int even = 1; 230 int even = 1;
348 } 317 }
349 318
350 __asm__ __volatile__ (EMMS); 319 __asm__ __volatile__ (EMMS);
351 } 320 }
352 321
353 static void yuv420_rgb24_mmx (uint8_t * image, uint8_t * py, 322 static inline void RENAME(yuv420_rgb24) (uint8_t * image, uint8_t * py,
354 uint8_t * pu, uint8_t * pv, 323 uint8_t * pu, uint8_t * pv,
355 int h_size, int v_size, 324 int h_size, int v_size,
356 int rgb_stride, int y_stride, int uv_stride) 325 int rgb_stride, int y_stride, int uv_stride)
357 { 326 {
358 int even = 1; 327 int even = 1;
503 472
504 __asm__ __volatile__ (EMMS); 473 __asm__ __volatile__ (EMMS);
505 } 474 }
506 475
507 476
508 static void yuv420_argb32_mmx (uint8_t * image, uint8_t * py, 477 static inline void RENAME(yuv420_argb32) (uint8_t * image, uint8_t * py,
509 uint8_t * pu, uint8_t * pv, 478 uint8_t * pu, uint8_t * pv,
510 int h_size, int v_size, 479 int h_size, int v_size,
511 int rgb_stride, int y_stride, int uv_stride) 480 int rgb_stride, int y_stride, int uv_stride)
512 { 481 {
513 int even = 1; 482 int even = 1;
597 } 566 }
598 567
599 __asm__ __volatile__ (EMMS); 568 __asm__ __volatile__ (EMMS);
600 } 569 }
601 570
602 yuv2rgb_fun yuv2rgb_init_mmx (int bpp, int mode) 571 yuv2rgb_fun RENAME(yuv2rgb_init) (int bpp, int mode)
603 { 572 {
604 if (bpp == 15 && mode == MODE_RGB) return yuv420_rgb15_mmx; 573 if (bpp == 15 && mode == MODE_RGB) return RENAME(yuv420_rgb15);
605 if (bpp == 16 && mode == MODE_RGB) return yuv420_rgb16_mmx; 574 if (bpp == 16 && mode == MODE_RGB) return RENAME(yuv420_rgb16);
606 if (bpp == 24 && mode == MODE_RGB) return yuv420_rgb24_mmx; 575 if (bpp == 24 && mode == MODE_RGB) return RENAME(yuv420_rgb24);
607 if (bpp == 32 && mode == MODE_RGB) return yuv420_argb32_mmx; 576 if (bpp == 32 && mode == MODE_RGB) return RENAME(yuv420_argb32);
608 return NULL; // Fallback to C. 577 return NULL; // Fallback to C.
609 } 578 }
610 579