Mercurial > mplayer.hg
annotate libvo/fastmemcpy.h @ 27062:3673e0ea8e39
sync w/r27057
author | gpoirier |
---|---|
date | Tue, 17 Jun 2008 08:58:26 +0000 |
parents | b03f87f35c3c |
children | e7c989f7a7c9 |
rev | line source |
---|---|
23385 | 1 /* |
2 * This file is part of MPlayer. | |
3 * | |
4 * MPlayer is free software; you can redistribute it and/or | |
5 * modify it under the terms of the GNU Lesser General Public | |
6 * License as published by the Free Software Foundation; either | |
7 * version 2.1 of the License, or (at your option) any later version. | |
8 * | |
9 * MPlayer is distributed in the hope that it will be useful, | |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 * Lesser General Public License for more details. | |
13 * | |
14 * You should have received a copy of the GNU Lesser General Public | |
15 * License along with MPlayer; if not, write to the Free Software | |
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
17 */ | |
18 | |
26029 | 19 #ifndef MPLAYER_FASTMEMCPY_H |
20 #define MPLAYER_FASTMEMCPY_H | |
644
88eb1a3f7bfb
Changed code, should be faster on Athlon/K6 but slower on PIII with SSE, more portable.
atmosfear
parents:
581
diff
changeset
|
21 |
13787
e047e70a9767
Handle "xxx.h" vs "../xxx.h" include paths in a consistent way.
diego
parents:
12663
diff
changeset
|
22 #include "config.h" |
22003
6fd6bf6269f3
fastmemcpy.h always has to include inttypes.h on systems that need it.
diego
parents:
22000
diff
changeset
|
23 #include <inttypes.h> |
27044
b03f87f35c3c
Add missing string.h #include for memcpy prototype;
diego
parents:
26029
diff
changeset
|
24 #include <string.h> |
800 | 25 |
1131 | 26 #ifdef USE_FASTMEMCPY |
27 #if defined(HAVE_MMX) || defined(HAVE_MMX2) || defined(HAVE_3DNOW) \ | |
28 /* || defined(HAVE_SSE) || defined(HAVE_SSE2) */ | |
644
88eb1a3f7bfb
Changed code, should be faster on Athlon/K6 but slower on PIII with SSE, more portable.
atmosfear
parents:
581
diff
changeset
|
29 #include <stddef.h> |
477 | 30 |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
685
diff
changeset
|
31 extern void * fast_memcpy(void * to, const void * from, size_t len); |
4681 | 32 extern void * mem2agpcpy(void * to, const void * from, size_t len); |
513 | 33 |
4681 | 34 #else /* HAVE_MMX/MMX2/3DNOW/SSE/SSE2 */ |
35 #define mem2agpcpy(a,b,c) memcpy(a,b,c) | |
23457
a124f3abc1ec
Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents:
23385
diff
changeset
|
36 #define fast_memcpy(a,b,c) memcpy(a,b,c) |
478 | 37 #endif |
4681 | 38 |
39 #else /* USE_FASTMEMCPY */ | |
40 #define mem2agpcpy(a,b,c) memcpy(a,b,c) | |
23457
a124f3abc1ec
Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents:
23385
diff
changeset
|
41 #define fast_memcpy(a,b,c) memcpy(a,b,c) |
4681 | 42 #endif |
4708 | 43 |
17095 | 44 static inline void * mem2agpcpy_pic(void * dst, const void * src, int bytesPerLine, int height, int dstStride, int srcStride) |
4708 | 45 { |
46 int i; | |
47 void *retval=dst; | |
48 | |
15069
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
49 if(dstStride == srcStride) |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
50 { |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
51 if (srcStride < 0) { |
22000
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
52 src = (uint8_t*)src + (height-1)*srcStride; |
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
53 dst = (uint8_t*)dst + (height-1)*dstStride; |
15069
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
54 srcStride = -srcStride; |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
55 } |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
56 |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
57 mem2agpcpy(dst, src, srcStride*height); |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
58 } |
4708 | 59 else |
60 { | |
61 for(i=0; i<height; i++) | |
62 { | |
63 mem2agpcpy(dst, src, bytesPerLine); | |
22000
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
64 src = (uint8_t*)src + srcStride; |
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
65 dst = (uint8_t*)dst + dstStride; |
4708 | 66 } |
67 } | |
68 | |
69 return retval; | |
70 } | |
71 | |
23666
5c3c7efd9b75
Get rid of my_memcpy_pic code duplication in many filters.
reimar
parents:
23457
diff
changeset
|
72 #define memcpy_pic(d, s, b, h, ds, ss) memcpy_pic2(d, s, b, h, ds, ss, 0) |
5c3c7efd9b75
Get rid of my_memcpy_pic code duplication in many filters.
reimar
parents:
23457
diff
changeset
|
73 #define my_memcpy_pic(d, s, b, h, ds, ss) memcpy_pic2(d, s, b, h, ds, ss, 1) |
5c3c7efd9b75
Get rid of my_memcpy_pic code duplication in many filters.
reimar
parents:
23457
diff
changeset
|
74 |
5c3c7efd9b75
Get rid of my_memcpy_pic code duplication in many filters.
reimar
parents:
23457
diff
changeset
|
75 /** |
5c3c7efd9b75
Get rid of my_memcpy_pic code duplication in many filters.
reimar
parents:
23457
diff
changeset
|
76 * \param limit2width always skip data between end of line and start of next |
5c3c7efd9b75
Get rid of my_memcpy_pic code duplication in many filters.
reimar
parents:
23457
diff
changeset
|
77 * instead of copying the full block when strides are the same |
5c3c7efd9b75
Get rid of my_memcpy_pic code duplication in many filters.
reimar
parents:
23457
diff
changeset
|
78 */ |
5c3c7efd9b75
Get rid of my_memcpy_pic code duplication in many filters.
reimar
parents:
23457
diff
changeset
|
79 static inline void * memcpy_pic2(void * dst, const void * src, |
5c3c7efd9b75
Get rid of my_memcpy_pic code duplication in many filters.
reimar
parents:
23457
diff
changeset
|
80 int bytesPerLine, int height, |
5c3c7efd9b75
Get rid of my_memcpy_pic code duplication in many filters.
reimar
parents:
23457
diff
changeset
|
81 int dstStride, int srcStride, int limit2width) |
5504
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
82 { |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
83 int i; |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
84 void *retval=dst; |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
85 |
23666
5c3c7efd9b75
Get rid of my_memcpy_pic code duplication in many filters.
reimar
parents:
23457
diff
changeset
|
86 if(!limit2width && dstStride == srcStride) |
15069
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
87 { |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
88 if (srcStride < 0) { |
22000
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
89 src = (uint8_t*)src + (height-1)*srcStride; |
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
90 dst = (uint8_t*)dst + (height-1)*dstStride; |
15069
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
91 srcStride = -srcStride; |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
92 } |
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
93 |
23457
a124f3abc1ec
Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents:
23385
diff
changeset
|
94 fast_memcpy(dst, src, srcStride*height); |
15069
3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
henry
parents:
13787
diff
changeset
|
95 } |
5504
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
96 else |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
97 { |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
98 for(i=0; i<height; i++) |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
99 { |
23457
a124f3abc1ec
Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents:
23385
diff
changeset
|
100 fast_memcpy(dst, src, bytesPerLine); |
22000
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
101 src = (uint8_t*)src + srcStride; |
f7c209352770
get rid of void pointer arithmetic, as spotted by ICC
gpoirier
parents:
17095
diff
changeset
|
102 dst = (uint8_t*)dst + dstStride; |
5504
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
103 } |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
104 } |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
105 |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
106 return retval; |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
107 } |
23ba417cf64b
memcpy_pic() added (copy image plane with src/dst stride)
arpi
parents:
4708
diff
changeset
|
108 |
26029 | 109 #endif /* MPLAYER_FASTMEMCPY_H */ |