Mercurial > mplayer.hg
annotate libvo/aclib.c @ 11619:179138947307
This patch contains bugfixes for the esd audio output driver that I
uncovered while trying to send sound to a remote esd server over a
wireless (11 mbs, just enough to handle to sound) link.
First, the sound was full "ticking" sounds. I found a bug that
prevented the "send the remainder of this block" code from ever being
called - so large chunks of audio were simply being ignored. Fixing
this bug removed the "ticking" from audio streams.
Fixing this bug, however, uncovered another problem - when the socket
buffer was full, doing a blocking write to finish the buffer would take
far too long and would turn video into a chunky mess. I'd imagine this
blocking write would be fine for an audio-only stream, but it turns out
to hold up the video far too much.
The solution in this patch is to write as much data as possible to the
socket, and then return as soon as possible, reporting the number of
bytes actually written accurately back to mplayer. I've tested it on
both local and remote esd servers, and it works well.
Patch by Benjamin Osheroff <ben@gimbo.net>
author | attila |
---|---|
date | Wed, 10 Dec 2003 12:19:13 +0000 |
parents | e7153e62a7f4 |
children | 4b8417674f1c |
rev | line source |
---|---|
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
1 #include "../config.h" |
3393 | 2 #ifdef USE_FASTMEMCPY |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
3 |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
4 /* |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
5 aclib - advanced C library ;) |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
6 This file contains functions which improve and expand standard C-library |
3393 | 7 see aclib_template.c ... this file only contains runtime cpu detection and config options stuff |
8 runtime cpu detection by michael niedermayer (michaelni@gmx.at) is under GPL | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
9 */ |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
10 #include <stddef.h> |
3393 | 11 #include "../cpudetect.h" |
8123
9fc45fe0d444
*HUGE* set of compiler warning fixes, unused variables removal
arpi
parents:
7072
diff
changeset
|
12 #include "fastmemcpy.h" |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
13 |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
14 #define BLOCK_SIZE 4096 |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
15 #define CONFUSION_FACTOR 0 |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
16 //Feel free to fine-tune the above 2, it might be possible to get some speedup with them :) |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
17 |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
18 //#define STATISTICS |
3393 | 19 #ifdef ARCH_X86 |
20 #define CAN_COMPILE_X86_ASM | |
21 #endif | |
22 | |
23 //Note: we have MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one | |
24 //Plain C versions | |
25 //#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) | |
26 //#define COMPILE_C | |
27 //#endif | |
28 | |
29 #ifdef CAN_COMPILE_X86_ASM | |
30 | |
31 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
32 #define COMPILE_MMX | |
33 #endif | |
34 | |
5208 | 35 #if (defined (HAVE_MMX2) && !defined (HAVE_SSE2)) || defined (RUNTIME_CPUDETECT) |
3393 | 36 #define COMPILE_MMX2 |
37 #endif | |
38 | |
39 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
40 #define COMPILE_3DNOW | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
41 #endif |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
42 |
5208 | 43 #if defined (HAVE_SSE2) || defined (RUNTIME_CPUDETECT) |
44 #define COMPILE_SSE | |
45 #endif | |
46 | |
3393 | 47 #undef HAVE_MMX |
48 #undef HAVE_MMX2 | |
49 #undef HAVE_3DNOW | |
5208 | 50 #undef HAVE_SSE |
51 #undef HAVE_SSE2 | |
3393 | 52 #undef ARCH_X86 |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
53 /* |
3393 | 54 #ifdef COMPILE_C |
55 #undef HAVE_MMX | |
56 #undef HAVE_MMX2 | |
57 #undef HAVE_3DNOW | |
58 #undef ARCH_X86 | |
59 #define RENAME(a) a ## _C | |
60 #include "aclib_template.c" | |
61 #endif | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
62 */ |
3393 | 63 //MMX versions |
64 #ifdef COMPILE_MMX | |
65 #undef RENAME | |
66 #define HAVE_MMX | |
67 #undef HAVE_MMX2 | |
68 #undef HAVE_3DNOW | |
5208 | 69 #undef HAVE_SSE |
70 #undef HAVE_SSE2 | |
3393 | 71 #define ARCH_X86 |
72 #define RENAME(a) a ## _MMX | |
73 #include "aclib_template.c" | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
74 #endif |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
75 |
3393 | 76 //MMX2 versions |
77 #ifdef COMPILE_MMX2 | |
78 #undef RENAME | |
79 #define HAVE_MMX | |
80 #define HAVE_MMX2 | |
81 #undef HAVE_3DNOW | |
5208 | 82 #undef HAVE_SSE |
83 #undef HAVE_SSE2 | |
3393 | 84 #define ARCH_X86 |
85 #define RENAME(a) a ## _MMX2 | |
86 #include "aclib_template.c" | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
87 #endif |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
88 |
3393 | 89 //3DNOW versions |
90 #ifdef COMPILE_3DNOW | |
91 #undef RENAME | |
92 #define HAVE_MMX | |
93 #undef HAVE_MMX2 | |
94 #define HAVE_3DNOW | |
5208 | 95 #undef HAVE_SSE |
96 #undef HAVE_SSE2 | |
3393 | 97 #define ARCH_X86 |
98 #define RENAME(a) a ## _3DNow | |
99 #include "aclib_template.c" | |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
100 #endif |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
101 |
5208 | 102 //SSE versions (only used on SSE2 cpus) |
103 #ifdef COMPILE_SSE | |
104 #undef RENAME | |
105 #define HAVE_MMX | |
106 #define HAVE_MMX2 | |
107 #undef HAVE_3DNOW | |
108 #define HAVE_SSE | |
109 #define HAVE_SSE2 | |
110 #define ARCH_X86 | |
111 #define RENAME(a) a ## _SSE | |
112 #include "aclib_template.c" | |
113 #endif | |
114 | |
3393 | 115 #endif // CAN_COMPILE_X86_ASM |
116 | |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
117 |
7072 | 118 void * fast_memcpy(void * to, const void * from, size_t len) |
3393 | 119 { |
120 #ifdef RUNTIME_CPUDETECT | |
121 #ifdef CAN_COMPILE_X86_ASM | |
122 // ordered per speed fasterst first | |
5208 | 123 if(gCpuCaps.hasSSE2) |
124 fast_memcpy_SSE(to, from, len); | |
125 else if(gCpuCaps.hasMMX2) | |
3393 | 126 fast_memcpy_MMX2(to, from, len); |
127 else if(gCpuCaps.has3DNow) | |
128 fast_memcpy_3DNow(to, from, len); | |
129 else if(gCpuCaps.hasMMX) | |
130 fast_memcpy_MMX(to, from, len); | |
131 else | |
132 #endif //CAN_COMPILE_X86_ASM | |
133 memcpy(to, from, len); // prior to mmx we use the standart memcpy | |
134 #else | |
5208 | 135 #ifdef HAVE_SSE2 |
136 fast_memcpy_SSE(to, from, len); | |
137 #elif defined (HAVE_MMX2) | |
3393 | 138 fast_memcpy_MMX2(to, from, len); |
139 #elif defined (HAVE_3DNOW) | |
140 fast_memcpy_3DNow(to, from, len); | |
141 #elif defined (HAVE_MMX) | |
142 fast_memcpy_MMX(to, from, len); | |
143 #else | |
144 memcpy(to, from, len); // prior to mmx we use the standart memcpy | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
145 #endif |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
146 |
3393 | 147 #endif //!RUNTIME_CPUDETECT |
5543
c75f75806af1
memcpy must return destination ptr patch by Adam <adam@cfar.umd.edu>
michael
parents:
5208
diff
changeset
|
148 return to; |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
149 } |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
150 |
8127
e7153e62a7f4
On non-x86 platforms, memcpy was re-implemented in mplayer and was called
jkeil
parents:
8123
diff
changeset
|
151 #undef mem2agpcpy |
7072 | 152 void * mem2agpcpy(void * to, const void * from, size_t len) |
4681 | 153 { |
154 #ifdef RUNTIME_CPUDETECT | |
155 #ifdef CAN_COMPILE_X86_ASM | |
156 // ordered per speed fasterst first | |
5208 | 157 if(gCpuCaps.hasSSE2) |
158 mem2agpcpy_SSE(to, from, len); | |
159 else if(gCpuCaps.hasMMX2) | |
4681 | 160 mem2agpcpy_MMX2(to, from, len); |
161 else if(gCpuCaps.has3DNow) | |
162 mem2agpcpy_3DNow(to, from, len); | |
163 else if(gCpuCaps.hasMMX) | |
164 mem2agpcpy_MMX(to, from, len); | |
165 else | |
166 #endif //CAN_COMPILE_X86_ASM | |
167 memcpy(to, from, len); // prior to mmx we use the standart memcpy | |
168 #else | |
5208 | 169 #ifdef HAVE_SSE2 |
170 mem2agpcpy_SSE(to, from, len); | |
171 #elif defined (HAVE_MMX2) | |
4681 | 172 mem2agpcpy_MMX2(to, from, len); |
173 #elif defined (HAVE_3DNOW) | |
174 mem2agpcpy_3DNow(to, from, len); | |
175 #elif defined (HAVE_MMX) | |
176 mem2agpcpy_MMX(to, from, len); | |
177 #else | |
178 memcpy(to, from, len); // prior to mmx we use the standart memcpy | |
179 #endif | |
180 | |
181 #endif //!RUNTIME_CPUDETECT | |
8123
9fc45fe0d444
*HUGE* set of compiler warning fixes, unused variables removal
arpi
parents:
7072
diff
changeset
|
182 return to; |
4681 | 183 } |
184 | |
185 #endif /* use fastmemcpy */ | |
186 |