Mercurial > mplayer.hg
comparison postproc/rgb2rgb_template.c @ 2511:6db23dd30242
mmx, mmx2, 3dnow optimized 24to32
author | nick |
---|---|
date | Sun, 28 Oct 2001 12:02:16 +0000 |
parents | 42e1ae2c8f5f |
children | 8899131fae4d |
comparison
equal
deleted
inserted
replaced
2510:42e1ae2c8f5f | 2511:6db23dd30242 |
---|---|
1 #include <inttypes.h> | 1 #include <inttypes.h> |
2 #include "../config.h" | 2 #include "../config.h" |
3 #include "rgb2rgb.h" | 3 #include "rgb2rgb.h" |
4 #include "mmx.h" | 4 #include "mmx.h" |
5 | |
6 #ifdef HAVE_3DNOW | |
7 #define PREFETCH "prefetch" | |
8 #define PREFETCHW "prefetchw" | |
9 #elif HAVE_MMX2 | |
10 #define PREFETCH "prefetchnta" | |
11 #define PREFETCHW "prefetcht0" | |
12 #endif | |
13 | |
14 #ifdef HAVE_3DNOW | |
15 #define EMMS "femms" | |
16 #else | |
17 #define EMMS "emms" | |
18 #endif | |
19 | |
20 #ifdef HAVE_MMX2 | |
21 #define MOVNTQ "movntq" | |
22 #else | |
23 #define MOVNTQ "movq" | |
24 #endif | |
5 | 25 |
6 void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) | 26 void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) |
7 { | 27 { |
8 uint8_t *dest = dst; | 28 uint8_t *dest = dst; |
9 uint8_t *s = src; | 29 uint8_t *s = src; |
12 const uint64_t mask32 = 0x00FFFFFF00FFFFFFULL; | 32 const uint64_t mask32 = 0x00FFFFFF00FFFFFFULL; |
13 uint8_t *mm_end; | 33 uint8_t *mm_end; |
14 #endif | 34 #endif |
15 end = s + src_size; | 35 end = s + src_size; |
16 #ifdef HAVE_MMX | 36 #ifdef HAVE_MMX |
37 #ifdef PREFETCH | |
38 __asm __volatile( | |
39 PREFETCH" %0\n\t" | |
40 PREFETCH" 64%0\n\t" | |
41 PREFETCHW" %1\n\t" | |
42 PREFETCHW" 64%1\n\t"::"m"(*s),"m"(*dest):"memory"); | |
43 #endif | |
17 mm_end = (uint8_t*)((((unsigned long)end)/16)*16); | 44 mm_end = (uint8_t*)((((unsigned long)end)/16)*16); |
18 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); | 45 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); |
19 while(s < mm_end) | 46 while(s < mm_end) |
20 { | 47 { |
48 #ifdef PREFETCH | |
49 __asm __volatile( | |
50 PREFETCH" 128%0\n\t" | |
51 PREFETCHW" 128%1" | |
52 ::"m"(*s),"m"(*dest):"memory"); | |
53 #endif | |
21 __asm __volatile( | 54 __asm __volatile( |
22 "movd %1, %%mm0\n\t" | 55 "movd %1, %%mm0\n\t" |
23 "movd 3%1, %%mm1\n\t" | 56 "movd 3%1, %%mm1\n\t" |
24 "movd 6%1, %%mm2\n\t" | 57 "movd 6%1, %%mm2\n\t" |
25 "movd 9%1, %%mm3\n\t" | 58 "movd 9%1, %%mm3\n\t" |
26 "punpckldq %%mm1, %%mm0\n\t" | 59 "punpckldq %%mm1, %%mm0\n\t" |
27 "punpckldq %%mm3, %%mm2\n\t" | 60 "punpckldq %%mm3, %%mm2\n\t" |
28 "pand %%mm7, %%mm0\n\t" | 61 "pand %%mm7, %%mm0\n\t" |
29 "pand %%mm7, %%mm2\n\t" | 62 "pand %%mm7, %%mm2\n\t" |
30 "movq %%mm0, %0\n\t" | 63 MOVNTQ" %%mm0, %0\n\t" |
31 "movq %%mm2, 8%0" | 64 MOVNTQ" %%mm2, 8%0" |
32 :"=m"(*dest) | 65 :"=m"(*dest) |
33 :"m"(*s) | 66 :"m"(*s) |
34 :"memory"); | 67 :"memory"); |
35 dest += 16; | 68 dest += 16; |
36 s += 12; | 69 s += 12; |
37 } | 70 } |
71 __asm __volatile(EMMS:::"memory"); | |
38 #endif | 72 #endif |
39 while(s < end) | 73 while(s < end) |
40 { | 74 { |
41 *dest++ = *s++; | 75 *dest++ = *s++; |
42 *dest++ = *s++; | 76 *dest++ = *s++; |