Mercurial > mplayer.hg
annotate libvo/fastmemcpy.h @ 23331:a35d0ad65e72
Add SSSE3 check
author | zuxy |
---|---|
date | Sun, 20 May 2007 05:04:14 +0000 |
parents | 6fd6bf6269f3 |
children | 499d246cd549 |
rev | line source |
---|---|
477 | 1 #ifndef __MPLAYER_MEMCPY |
644
88eb1a3f7bfb
Changed code, should be faster on Athlon/K6 but slower on PIII with SSE, more portable.
atmosfear
parents:
581
diff
changeset
|
2 #define __MPLAYER_MEMCPY 1 |
88eb1a3f7bfb
Changed code, should be faster on Athlon/K6 but slower on PIII with SSE, more portable.
atmosfear
parents:
581
diff
changeset
|
3 |
13787
e047e70a9767
Handle "xxx.h" vs "../xxx.h" include paths in a consistent way.
diego
parents:
12663
diff
changeset
|
4 #include "config.h" |
22003
6fd6bf6269f3
fastmemcpy.h always has to include inttypes.h on systems that need it.
diego
parents:
22000
diff
changeset
|
5 #include <inttypes.h> |
800 | 6 |
1131 | 7 #ifdef USE_FASTMEMCPY |
8 #if defined(HAVE_MMX) || defined(HAVE_MMX2) || defined(HAVE_3DNOW) \ | |
9 /* || defined(HAVE_SSE) || defined(HAVE_SSE2) */ | |
644
88eb1a3f7bfb
Changed code, should be faster on Athlon/K6 but slower on PIII with SSE, more portable.
atmosfear
parents:
581
diff
changeset
|
10 #include <stddef.h> |
477 | 11 |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
685
diff
changeset
|
12 extern void * fast_memcpy(void * to, const void * from, size_t len); |
4681 | 13 extern void * mem2agpcpy(void * to, const void * from, size_t len); |
376 | 14 #define memcpy(a,b,c) fast_memcpy(a,b,c) |
513 | 15 |
4681 | 16 #else /* HAVE_MMX/MMX2/3DNOW/SSE/SSE2 */ |
17 #define mem2agpcpy(a,b,c) memcpy(a,b,c) | |
478 | 18 #endif |
4681 | 19 |
20 #else /* USE_FASTMEMCPY */ | |
21 #define mem2agpcpy(a,b,c) memcpy(a,b,c) | |
22 #endif | |
4708 | 23 |
17095 | 24 static inline void * mem2agpcpy_pic(void * dst, const void * src, int bytesPerLine, int height, int dstStride, int srcStride) |
4708 | 25 { |
26 int i; | |
27 void *retval=dst; | |
28 | |
15069
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
29 if(dstStride == srcStride) |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
30 { |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
31 if (srcStride < 0) { |
22000
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
32 src = (uint8_t*)src + (height-1)*srcStride; |
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
33 dst = (uint8_t*)dst + (height-1)*dstStride; |
15069
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
34 srcStride = -srcStride; |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
35 } |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
36 |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
37 mem2agpcpy(dst, src, srcStride*height); |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
38 } |
4708 | 39 else |
40 { | |
41 for(i=0; i<height; i++) | |
42 { | |
43 mem2agpcpy(dst, src, bytesPerLine); | |
22000
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
44 src = (uint8_t*)src + srcStride; |
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
45 dst = (uint8_t*)dst + dstStride; |
4708 | 46 } |
47 } | |
48 | |
49 return retval; | |
50 } | |
51 | |
17095 | 52 static inline void * memcpy_pic(void * dst, const void * src, int bytesPerLine, int height, int dstStride, int srcStride) |
5504
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
53 { |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
54 int i; |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
55 void *retval=dst; |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
56 |
15069
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
57 if(dstStride == srcStride) |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
58 { |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
59 if (srcStride < 0) { |
22000
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
60 src = (uint8_t*)src + (height-1)*srcStride; |
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
61 dst = (uint8_t*)dst + (height-1)*dstStride; |
15069
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
62 srcStride = -srcStride; |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
63 } |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
64 |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
65 memcpy(dst, src, srcStride*height); |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
66 } |
5504
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
67 else |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
68 { |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
69 for(i=0; i<height; i++) |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
70 { |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
71 memcpy(dst, src, bytesPerLine); |
22000
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
72 src = (uint8_t*)src + srcStride; |
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
73 dst = (uint8_t*)dst + dstStride; |
5504
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
74 } |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
75 } |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
76 |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
77 return retval; |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
78 } |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
79 |
4681 | 80 #endif |