comparison postproc/rgb2rgb.c @ 2511:6db23dd30242

mmx, mmx2, 3dnow optimized 24to32
author nick
date Sun, 28 Oct 2001 12:02:16 +0000
parents 42e1ae2c8f5f
children 8899131fae4d
comparison
equal deleted inserted replaced
2510:42e1ae2c8f5f 2511:6db23dd30242
1 #include <inttypes.h> 1 #include <inttypes.h>
2 #include "../config.h" 2 #include "../config.h"
3 #include "rgb2rgb.h" 3 #include "rgb2rgb.h"
4 #include "mmx.h" 4 #include "mmx.h"
5
6 #ifdef HAVE_3DNOW
7 #define PREFETCH "prefetch"
8 #define PREFETCHW "prefetchw"
9 #elif HAVE_MMX2
10 #define PREFETCH "prefetchnta"
11 #define PREFETCHW "prefetcht0"
12 #endif
13
14 #ifdef HAVE_3DNOW
15 #define EMMS "femms"
16 #else
17 #define EMMS "emms"
18 #endif
19
20 #ifdef HAVE_MMX2
21 #define MOVNTQ "movntq"
22 #else
23 #define MOVNTQ "movq"
24 #endif
5 25
6 void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) 26 void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size)
7 { 27 {
8 uint8_t *dest = dst; 28 uint8_t *dest = dst;
9 uint8_t *s = src; 29 uint8_t *s = src;
12 const uint64_t mask32 = 0x00FFFFFF00FFFFFFULL; 32 const uint64_t mask32 = 0x00FFFFFF00FFFFFFULL;
13 uint8_t *mm_end; 33 uint8_t *mm_end;
14 #endif 34 #endif
15 end = s + src_size; 35 end = s + src_size;
16 #ifdef HAVE_MMX 36 #ifdef HAVE_MMX
37 #ifdef PREFETCH
38 __asm __volatile(
39 PREFETCH" %0\n\t"
40 PREFETCH" 64%0\n\t"
41 PREFETCHW" %1\n\t"
42 PREFETCHW" 64%1\n\t"::"m"(*s),"m"(*dest):"memory");
43 #endif
17 mm_end = (uint8_t*)((((unsigned long)end)/16)*16); 44 mm_end = (uint8_t*)((((unsigned long)end)/16)*16);
18 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); 45 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory");
19 while(s < mm_end) 46 while(s < mm_end)
20 { 47 {
48 #ifdef PREFETCH
49 __asm __volatile(
50 PREFETCH" 128%0\n\t"
51 PREFETCHW" 128%1"
52 ::"m"(*s),"m"(*dest):"memory");
53 #endif
21 __asm __volatile( 54 __asm __volatile(
22 "movd %1, %%mm0\n\t" 55 "movd %1, %%mm0\n\t"
23 "movd 3%1, %%mm1\n\t" 56 "movd 3%1, %%mm1\n\t"
24 "movd 6%1, %%mm2\n\t" 57 "movd 6%1, %%mm2\n\t"
25 "movd 9%1, %%mm3\n\t" 58 "movd 9%1, %%mm3\n\t"
26 "punpckldq %%mm1, %%mm0\n\t" 59 "punpckldq %%mm1, %%mm0\n\t"
27 "punpckldq %%mm3, %%mm2\n\t" 60 "punpckldq %%mm3, %%mm2\n\t"
28 "pand %%mm7, %%mm0\n\t" 61 "pand %%mm7, %%mm0\n\t"
29 "pand %%mm7, %%mm2\n\t" 62 "pand %%mm7, %%mm2\n\t"
30 "movq %%mm0, %0\n\t" 63 MOVNTQ" %%mm0, %0\n\t"
31 "movq %%mm2, 8%0" 64 MOVNTQ" %%mm2, 8%0"
32 :"=m"(*dest) 65 :"=m"(*dest)
33 :"m"(*s) 66 :"m"(*s)
34 :"memory"); 67 :"memory");
35 dest += 16; 68 dest += 16;
36 s += 12; 69 s += 12;
37 } 70 }
71 __asm __volatile(EMMS:::"memory");
38 #endif 72 #endif
39 while(s < end) 73 while(s < end)
40 { 74 {
41 *dest++ = *s++; 75 *dest++ = *s++;
42 *dest++ = *s++; 76 *dest++ = *s++;