comparison postproc/rgb2rgb.c @ 13720:821f464b4d90

adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
author aurel
date Thu, 21 Oct 2004 11:55:20 +0000
parents b80f95e24c96
children fed5d4ab5e5f
comparison
equal deleted inserted replaced
13719:43ecd6a73ec0 13720:821f464b4d90
9 */ 9 */
10 #include <inttypes.h> 10 #include <inttypes.h>
11 #include "../config.h" 11 #include "../config.h"
12 #include "rgb2rgb.h" 12 #include "rgb2rgb.h"
13 #include "swscale.h" 13 #include "swscale.h"
14 #include "../cpudetect.h"
14 #include "../mangle.h" 15 #include "../mangle.h"
15 #include "../bswap.h" 16 #include "../bswap.h"
16 #include "../libvo/fastmemcpy.h" 17 #include "../libvo/fastmemcpy.h"
17 18
18 #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit 19 #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
66 uint8_t *dst, 67 uint8_t *dst,
67 unsigned width, unsigned height, 68 unsigned width, unsigned height,
68 int srcStride1, int srcStride2, 69 int srcStride1, int srcStride2,
69 int srcStride3, int dstStride); 70 int srcStride3, int dstStride);
70 71
71 #ifdef ARCH_X86 72 #if defined(ARCH_X86) || defined(ARCH_X86_64)
72 static const uint64_t mmx_null __attribute__((aligned(8))) = 0x0000000000000000ULL; 73 static const uint64_t mmx_null __attribute__((aligned(8))) = 0x0000000000000000ULL;
73 static const uint64_t mmx_one __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; 74 static const uint64_t mmx_one __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL;
74 static const uint64_t mask32b attribute_used __attribute__((aligned(8))) = 0x000000FF000000FFULL; 75 static const uint64_t mask32b attribute_used __attribute__((aligned(8))) = 0x000000FF000000FFULL;
75 static const uint64_t mask32g attribute_used __attribute__((aligned(8))) = 0x0000FF000000FF00ULL; 76 static const uint64_t mask32g attribute_used __attribute__((aligned(8))) = 0x0000FF000000FF00ULL;
76 static const uint64_t mask32r attribute_used __attribute__((aligned(8))) = 0x00FF000000FF0000ULL; 77 static const uint64_t mask32r attribute_used __attribute__((aligned(8))) = 0x00FF000000FF0000ULL;
150 #undef HAVE_3DNOW 151 #undef HAVE_3DNOW
151 #undef HAVE_SSE2 152 #undef HAVE_SSE2
152 #define RENAME(a) a ## _C 153 #define RENAME(a) a ## _C
153 #include "rgb2rgb_template.c" 154 #include "rgb2rgb_template.c"
154 155
155 #ifdef ARCH_X86 156 #if defined(ARCH_X86) || defined(ARCH_X86_64)
156 157
157 //MMX versions 158 //MMX versions
158 #undef RENAME 159 #undef RENAME
159 #define HAVE_MMX 160 #define HAVE_MMX
160 #undef HAVE_MMX2 161 #undef HAVE_MMX2
179 #define HAVE_3DNOW 180 #define HAVE_3DNOW
180 #undef HAVE_SSE2 181 #undef HAVE_SSE2
181 #define RENAME(a) a ## _3DNOW 182 #define RENAME(a) a ## _3DNOW
182 #include "rgb2rgb_template.c" 183 #include "rgb2rgb_template.c"
183 184
184 #endif //ARCH_X86 185 #endif //ARCH_X86 || ARCH_X86_64
185 186
186 /* 187 /*
187 rgb15->rgb16 Original by Strepto/Astral 188 rgb15->rgb16 Original by Strepto/Astral
188 ported to gcc & bugfixed : A'rpi 189 ported to gcc & bugfixed : A'rpi
189 MMX2, 3DNOW optimization by Nick Kurshev 190 MMX2, 3DNOW optimization by Nick Kurshev
190 32bit c version, and and&add trick by Michael Niedermayer 191 32bit c version, and and&add trick by Michael Niedermayer
191 */ 192 */
192 193
193 void sws_rgb2rgb_init(int flags){ 194 void sws_rgb2rgb_init(int flags){
194 #ifdef ARCH_X86 195 #if defined(ARCH_X86) || defined(ARCH_X86_64)
195 if(flags & SWS_CPU_CAPS_MMX2){ 196 if(flags & SWS_CPU_CAPS_MMX2){
196 rgb15to16= rgb15to16_MMX2; 197 rgb15to16= rgb15to16_MMX2;
197 rgb15to24= rgb15to24_MMX2; 198 rgb15to24= rgb15to24_MMX2;
198 rgb15to32= rgb15to32_MMX2; 199 rgb15to32= rgb15to32_MMX2;
199 rgb16to24= rgb16to24_MMX2; 200 rgb16to24= rgb16to24_MMX2;