Mercurial > libavutil.hg
annotate intreadwrite.h @ 728:1fa3820b1a84 libavutil
ARM asm for AV_RN*()
ARMv6 and later support unaligned loads and stores for single
word/halfword but not double/multiple. GCC is ignorant of this and
will always use bytewise accesses for unaligned data. Casting to an
int32_t pointer is dangerous since a load/store double or multiple
instruction might be used (this happens with some code in FFmpeg).
Implementing the AV_[RW]* macros with inline asm using only supported
instructions gives fast and safe unaligned accesses. ARM RVCT does
the right thing with generic code.
This gives an overall speedup of up to 10%.
author | mru |
---|---|
date | Sat, 18 Apr 2009 00:00:28 +0000 |
parents | 98b64f65be0d |
children | 753953ed8ff0 |
rev | line source |
---|---|
263 | 1 /* |
2 * This file is part of FFmpeg. | |
3 * | |
4 * FFmpeg is free software; you can redistribute it and/or | |
5 * modify it under the terms of the GNU Lesser General Public | |
6 * License as published by the Free Software Foundation; either | |
7 * version 2.1 of the License, or (at your option) any later version. | |
8 * | |
9 * FFmpeg is distributed in the hope that it will be useful, | |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 * Lesser General Public License for more details. | |
13 * | |
14 * You should have received a copy of the GNU Lesser General Public | |
15 * License along with FFmpeg; if not, write to the Free Software | |
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
17 */ | |
18 | |
567 | 19 #ifndef AVUTIL_INTREADWRITE_H |
20 #define AVUTIL_INTREADWRITE_H | |
152
5b211d03227b
Move BE_*/LE_*/ST*/LD* macros to a common place. Some further
reimar
parents:
diff
changeset
|
21 |
343 | 22 #include <stdint.h> |
469 | 23 #include "config.h" |
350
c2034e89e9a2
intreadwrite.h needs bswap.h if HAVE_FAST_UNALIGNED is set, so include it.
reimar
parents:
343
diff
changeset
|
24 #include "bswap.h" |
343 | 25 |
727 | 26 /* |
27 * Arch-specific headers can provide any combination of | |
28 * AV_[RW][BLN](16|32|64) macros. Preprocessor symbols must be | |
29 * defined, even if these are implemented as inline functions. | |
30 */ | |
31 | |
728 | 32 #if ARCH_ARM |
33 # include "arm/intreadwrite.h" | |
34 #endif | |
727 | 35 |
36 /* | |
37 * Define AV_[RW]N helper macros to simplify definitions not provided | |
38 * by per-arch headers. | |
39 */ | |
40 | |
41 #if defined(__GNUC__) | |
152
5b211d03227b
Move BE_*/LE_*/ST*/LD* macros to a common place. Some further
reimar
parents:
diff
changeset
|
42 |
5b211d03227b
Move BE_*/LE_*/ST*/LD* macros to a common place. Some further
reimar
parents:
diff
changeset
|
43 struct unaligned_64 { uint64_t l; } __attribute__((packed)); |
5b211d03227b
Move BE_*/LE_*/ST*/LD* macros to a common place. Some further
reimar
parents:
diff
changeset
|
44 struct unaligned_32 { uint32_t l; } __attribute__((packed)); |
5b211d03227b
Move BE_*/LE_*/ST*/LD* macros to a common place. Some further
reimar
parents:
diff
changeset
|
45 struct unaligned_16 { uint16_t l; } __attribute__((packed)); |
5b211d03227b
Move BE_*/LE_*/ST*/LD* macros to a common place. Some further
reimar
parents:
diff
changeset
|
46 |
727 | 47 # define AV_RN(s, p) (((const struct unaligned_##s *) (p))->l) |
48 # define AV_WN(s, p, v) (((struct unaligned_##s *) (p))->l) = (v) | |
152
5b211d03227b
Move BE_*/LE_*/ST*/LD* macros to a common place. Some further
reimar
parents:
diff
changeset
|
49 |
525
0d4beab5e3c9
intreadwrite: support DEC compiler __unaligned type qualifier
mru
parents:
524
diff
changeset
|
50 #elif defined(__DECC) |
0d4beab5e3c9
intreadwrite: support DEC compiler __unaligned type qualifier
mru
parents:
524
diff
changeset
|
51 |
727 | 52 # define AV_RN(s, p) (*((const __unaligned uint##s##_t*)(p))) |
53 # define AV_WN(s, p, v) *((__unaligned uint##s##_t*)(p)) = (v) | |
525
0d4beab5e3c9
intreadwrite: support DEC compiler __unaligned type qualifier
mru
parents:
524
diff
changeset
|
54 |
727 | 55 #elif HAVE_FAST_UNALIGNED |
56 | |
57 # define AV_RN(s, p) (*((const uint##s##_t*)(p))) | |
58 # define AV_WN(s, p, v) *((uint##s##_t*)(p)) = (v) | |
525
0d4beab5e3c9
intreadwrite: support DEC compiler __unaligned type qualifier
mru
parents:
524
diff
changeset
|
59 |
0d4beab5e3c9
intreadwrite: support DEC compiler __unaligned type qualifier
mru
parents:
524
diff
changeset
|
60 #else |
152
5b211d03227b
Move BE_*/LE_*/ST*/LD* macros to a common place. Some further
reimar
parents:
diff
changeset
|
61 |
727 | 62 #ifndef AV_RB16 |
63 #define AV_RB16(x) ((((const uint8_t*)(x))[0] << 8) | \ | |
64 ((const uint8_t*)(x))[1]) | |
65 #endif | |
66 #ifndef AV_WB16 | |
336 | 67 #define AV_WB16(p, d) do { \ |
235 | 68 ((uint8_t*)(p))[1] = (d); \ |
336 | 69 ((uint8_t*)(p))[0] = (d)>>8; } while(0) |
727 | 70 #endif |
232
9845a508ffbd
add AV_WB/WL for lswriting, similar to AV_RB/RL (also increment version)
alex
parents:
231
diff
changeset
|
71 |
727 | 72 #ifndef AV_RL16 |
444 | 73 #define AV_RL16(x) ((((const uint8_t*)(x))[1] << 8) | \ |
74 ((const uint8_t*)(x))[0]) | |
727 | 75 #endif |
76 #ifndef AV_WL16 | |
336 | 77 #define AV_WL16(p, d) do { \ |
326
46b4da5bf9ed
cosmetics: Reorder endianness macros by bit depth, alignment prettyprinting.
diego
parents:
282
diff
changeset
|
78 ((uint8_t*)(p))[0] = (d); \ |
336 | 79 ((uint8_t*)(p))[1] = (d)>>8; } while(0) |
727 | 80 #endif |
326
46b4da5bf9ed
cosmetics: Reorder endianness macros by bit depth, alignment prettyprinting.
diego
parents:
282
diff
changeset
|
81 |
727 | 82 #ifndef AV_RB32 |
444 | 83 #define AV_RB32(x) ((((const uint8_t*)(x))[0] << 24) | \ |
84 (((const uint8_t*)(x))[1] << 16) | \ | |
85 (((const uint8_t*)(x))[2] << 8) | \ | |
86 ((const uint8_t*)(x))[3]) | |
727 | 87 #endif |
88 #ifndef AV_WB32 | |
336 | 89 #define AV_WB32(p, d) do { \ |
235 | 90 ((uint8_t*)(p))[3] = (d); \ |
91 ((uint8_t*)(p))[2] = (d)>>8; \ | |
92 ((uint8_t*)(p))[1] = (d)>>16; \ | |
336 | 93 ((uint8_t*)(p))[0] = (d)>>24; } while(0) |
727 | 94 #endif |
232
9845a508ffbd
add AV_WB/WL for lswriting, similar to AV_RB/RL (also increment version)
alex
parents:
231
diff
changeset
|
95 |
727 | 96 #ifndef AV_RL32 |
444 | 97 #define AV_RL32(x) ((((const uint8_t*)(x))[3] << 24) | \ |
98 (((const uint8_t*)(x))[2] << 16) | \ | |
99 (((const uint8_t*)(x))[1] << 8) | \ | |
100 ((const uint8_t*)(x))[0]) | |
727 | 101 #endif |
102 #ifndef AV_WL32 | |
336 | 103 #define AV_WL32(p, d) do { \ |
235 | 104 ((uint8_t*)(p))[0] = (d); \ |
105 ((uint8_t*)(p))[1] = (d)>>8; \ | |
106 ((uint8_t*)(p))[2] = (d)>>16; \ | |
336 | 107 ((uint8_t*)(p))[3] = (d)>>24; } while(0) |
727 | 108 #endif |
152
5b211d03227b
Move BE_*/LE_*/ST*/LD* macros to a common place. Some further
reimar
parents:
diff
changeset
|
109 |
727 | 110 #ifndef AV_RB64 |
444 | 111 #define AV_RB64(x) (((uint64_t)((const uint8_t*)(x))[0] << 56) | \ |
112 ((uint64_t)((const uint8_t*)(x))[1] << 48) | \ | |
113 ((uint64_t)((const uint8_t*)(x))[2] << 40) | \ | |
114 ((uint64_t)((const uint8_t*)(x))[3] << 32) | \ | |
115 ((uint64_t)((const uint8_t*)(x))[4] << 24) | \ | |
116 ((uint64_t)((const uint8_t*)(x))[5] << 16) | \ | |
117 ((uint64_t)((const uint8_t*)(x))[6] << 8) | \ | |
118 (uint64_t)((const uint8_t*)(x))[7]) | |
727 | 119 #endif |
120 #ifndef AV_WB64 | |
336 | 121 #define AV_WB64(p, d) do { \ |
335 | 122 ((uint8_t*)(p))[7] = (d); \ |
123 ((uint8_t*)(p))[6] = (d)>>8; \ | |
124 ((uint8_t*)(p))[5] = (d)>>16; \ | |
125 ((uint8_t*)(p))[4] = (d)>>24; \ | |
126 ((uint8_t*)(p))[3] = (d)>>32; \ | |
127 ((uint8_t*)(p))[2] = (d)>>40; \ | |
128 ((uint8_t*)(p))[1] = (d)>>48; \ | |
336 | 129 ((uint8_t*)(p))[0] = (d)>>56; } while(0) |
727 | 130 #endif |
335 | 131 |
727 | 132 #ifndef AV_RL64 |
444 | 133 #define AV_RL64(x) (((uint64_t)((const uint8_t*)(x))[7] << 56) | \ |
134 ((uint64_t)((const uint8_t*)(x))[6] << 48) | \ | |
135 ((uint64_t)((const uint8_t*)(x))[5] << 40) | \ | |
136 ((uint64_t)((const uint8_t*)(x))[4] << 32) | \ | |
137 ((uint64_t)((const uint8_t*)(x))[3] << 24) | \ | |
138 ((uint64_t)((const uint8_t*)(x))[2] << 16) | \ | |
139 ((uint64_t)((const uint8_t*)(x))[1] << 8) | \ | |
140 (uint64_t)((const uint8_t*)(x))[0]) | |
727 | 141 #endif |
142 #ifndef AV_WL64 | |
336 | 143 #define AV_WL64(p, d) do { \ |
335 | 144 ((uint8_t*)(p))[0] = (d); \ |
145 ((uint8_t*)(p))[1] = (d)>>8; \ | |
146 ((uint8_t*)(p))[2] = (d)>>16; \ | |
147 ((uint8_t*)(p))[3] = (d)>>24; \ | |
148 ((uint8_t*)(p))[4] = (d)>>32; \ | |
149 ((uint8_t*)(p))[5] = (d)>>40; \ | |
150 ((uint8_t*)(p))[6] = (d)>>48; \ | |
336 | 151 ((uint8_t*)(p))[7] = (d)>>56; } while(0) |
727 | 152 #endif |
153 | |
154 #ifdef WORDS_BIGENDIAN | |
155 # define AV_RN(s, p) AV_RB##s(p) | |
156 # define AV_WN(s, p, v) AV_WB##s(p, v) | |
157 #else | |
158 # define AV_RN(s, p) AV_RL##s(p) | |
159 # define AV_WN(s, p, v) AV_WL##s(p, v) | |
160 #endif | |
161 | |
162 #endif /* HAVE_FAST_UNALIGNED */ | |
163 | |
164 #ifndef AV_RN16 | |
165 # define AV_RN16(p) AV_RN(16, p) | |
166 #endif | |
167 | |
168 #ifndef AV_RN32 | |
169 # define AV_RN32(p) AV_RN(32, p) | |
170 #endif | |
171 | |
172 #ifndef AV_RN64 | |
173 # define AV_RN64(p) AV_RN(64, p) | |
174 #endif | |
175 | |
176 #ifndef AV_WN16 | |
177 # define AV_WN16(p, v) AV_WN(16, p, v) | |
178 #endif | |
179 | |
180 #ifndef AV_WN32 | |
181 # define AV_WN32(p, v) AV_WN(32, p, v) | |
182 #endif | |
183 | |
184 #ifndef AV_WN64 | |
185 # define AV_WN64(p, v) AV_WN(64, p, v) | |
186 #endif | |
187 | |
188 #ifdef WORDS_BIGENDIAN | |
189 # define AV_RB(s, p) AV_RN(s, p) | |
190 # define AV_WB(s, p, v) AV_WN(s, p, v) | |
191 # define AV_RL(s, p) bswap_##s(AV_RN(s, p)) | |
192 # define AV_WL(s, p, v) AV_WN(s, p, bswap_##s(v)) | |
193 #else | |
194 # define AV_RB(s, p) bswap_##s(AV_RN(s, p)) | |
195 # define AV_WB(s, p, v) AV_WN(s, p, bswap_##s(v)) | |
196 # define AV_RL(s, p) AV_RN(s, p) | |
197 # define AV_WL(s, p, v) AV_WN(s, p, v) | |
198 #endif | |
199 | |
200 #define AV_RB8(x) (((const uint8_t*)(x))[0]) | |
201 #define AV_WB8(p, d) do { ((uint8_t*)(p))[0] = (d); } while(0) | |
202 | |
203 #define AV_RL8(x) AV_RB8(x) | |
204 #define AV_WL8(p, d) AV_WB8(p, d) | |
205 | |
206 #ifndef AV_RB16 | |
207 # define AV_RB16(p) AV_RB(16, p) | |
208 #endif | |
209 #ifndef AV_WB16 | |
210 # define AV_WB16(p, v) AV_WB(16, p, v) | |
211 #endif | |
212 | |
213 #ifndef AV_RL16 | |
214 # define AV_RL16(p) AV_RL(16, p) | |
215 #endif | |
216 #ifndef AV_WL16 | |
217 # define AV_WL16(p, v) AV_WL(16, p, v) | |
218 #endif | |
219 | |
220 #ifndef AV_RB32 | |
221 # define AV_RB32(p) AV_RB(32, p) | |
222 #endif | |
223 #ifndef AV_WB32 | |
224 # define AV_WB32(p, v) AV_WB(32, p, v) | |
225 #endif | |
226 | |
227 #ifndef AV_RL32 | |
228 # define AV_RL32(p) AV_RL(32, p) | |
229 #endif | |
230 #ifndef AV_WL32 | |
231 # define AV_WL32(p, v) AV_WL(32, p, v) | |
232 #endif | |
233 | |
234 #ifndef AV_RB64 | |
235 # define AV_RB64(p) AV_RB(64, p) | |
236 #endif | |
237 #ifndef AV_WB64 | |
238 # define AV_WB64(p, v) AV_WB(64, p, v) | |
239 #endif | |
240 | |
241 #ifndef AV_RL64 | |
242 # define AV_RL64(p) AV_RL(64, p) | |
243 #endif | |
244 #ifndef AV_WL64 | |
245 # define AV_WL64(p, v) AV_WL(64, p, v) | |
246 #endif | |
524 | 247 |
248 #define AV_RB24(x) ((((const uint8_t*)(x))[0] << 16) | \ | |
249 (((const uint8_t*)(x))[1] << 8) | \ | |
250 ((const uint8_t*)(x))[2]) | |
251 #define AV_WB24(p, d) do { \ | |
252 ((uint8_t*)(p))[2] = (d); \ | |
253 ((uint8_t*)(p))[1] = (d)>>8; \ | |
254 ((uint8_t*)(p))[0] = (d)>>16; } while(0) | |
255 | |
256 #define AV_RL24(x) ((((const uint8_t*)(x))[2] << 16) | \ | |
257 (((const uint8_t*)(x))[1] << 8) | \ | |
258 ((const uint8_t*)(x))[0]) | |
259 #define AV_WL24(p, d) do { \ | |
260 ((uint8_t*)(p))[0] = (d); \ | |
261 ((uint8_t*)(p))[1] = (d)>>8; \ | |
262 ((uint8_t*)(p))[2] = (d)>>16; } while(0) | |
335 | 263 |
567 | 264 #endif /* AVUTIL_INTREADWRITE_H */ |