annotate arm/intreadwrite.h @ 728:1fa3820b1a84 libavutil

ARM asm for AV_RN*() ARMv6 and later support unaligned loads and stores for single word/halfword but not double/multiple. GCC is ignorant of this and will always use bytewise accesses for unaligned data. Casting to an int32_t pointer is dangerous since a load/store double or multiple instruction might be used (this happens with some code in FFmpeg). Implementing the AV_[RW]* macros with inline asm using only supported instructions gives fast and safe unaligned accesses. ARM RVCT does the right thing with generic code. This gives an overall speedup of up to 10%.
author mru
date Sat, 18 Apr 2009 00:00:28 +0000
parents
children 2d52bcf3e4e6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
728
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
1 /*
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
2 * This file is part of FFmpeg.
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
3 *
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
4 * FFmpeg is free software; you can redistribute it and/or
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
5 * modify it under the terms of the GNU Lesser General Public
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
6 * License as published by the Free Software Foundation; either
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
7 * version 2.1 of the License, or (at your option) any later version.
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
8 *
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
9 * FFmpeg is distributed in the hope that it will be useful,
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
12 * Lesser General Public License for more details.
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
13 *
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
14 * You should have received a copy of the GNU Lesser General Public
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
15 * License along with FFmpeg; if not, write to the Free Software
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
17 */
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
18
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
19 #ifndef AVUTIL_ARM_INTREADWRITE_H
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
20 #define AVUTIL_ARM_INTREADWRITE_H
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
21
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
22 #include <stdint.h>
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
23 #include "config.h"
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
24
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
25 #if HAVE_FAST_UNALIGNED && HAVE_INLINE_ASM
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
26
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
27 #define AV_RN16 AV_RN16
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
28 static inline uint16_t AV_RN16(const void *p)
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
29 {
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
30 uint16_t v;
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
31 __asm__ ("ldrh %0, %1" : "=r"(v) : "m"(*(const uint16_t *)p));
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
32 return v;
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
33 }
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
34
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
35 #define AV_WN16 AV_WN16
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
36 static inline void AV_WN16(void *p, uint16_t v)
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
37 {
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
38 __asm__ ("strh %1, %0" : "=m"(*(uint16_t *)p) : "r"(v));
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
39 }
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
40
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
41 #define AV_RN32 AV_RN32
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
42 static inline uint32_t AV_RN32(const void *p)
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
43 {
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
44 uint32_t v;
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
45 __asm__ ("ldr %0, %1" : "=r"(v) : "m"(*(const uint32_t *)p));
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
46 return v;
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
47 }
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
48
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
49 #define AV_WN32 AV_WN32
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
50 static inline void AV_WN32(void *p, uint32_t v)
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
51 {
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
52 __asm__ ("str %1, %0" : "=m"(*(uint32_t *)p) : "r"(v));
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
53 }
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
54
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
55 #define AV_RN64 AV_RN64
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
56 static inline uint64_t AV_RN64(const void *p)
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
57 {
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
58 union { uint64_t v; uint32_t hl[2]; } v;
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
59 __asm__ ("ldr %0, %2 \n\t"
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
60 "ldr %1, %3 \n\t"
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
61 : "=r"(v.hl[0]), "=r"(v.hl[1])
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
62 : "m"(*(const uint32_t*)p), "m"(*((const uint32_t*)p+1)));
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
63 return v.v;
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
64 }
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
65
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
66 #define AV_WN64 AV_WN64
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
67 static inline void AV_WN64(void *p, uint64_t v)
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
68 {
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
69 union { uint64_t v; uint32_t hl[2]; } vv = { v };
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
70 __asm__ ("str %2, %0 \n\t"
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
71 "str %3, %1 \n\t"
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
72 : "=m"(*(uint32_t*)p), "=m"(*((uint32_t*)p+1))
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
73 : "r"(vv.hl[0]), "r"(vv.hl[1]));
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
74 }
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
75
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
76 #endif /* HAVE_INLINE_ASM */
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
77
1fa3820b1a84 ARM asm for AV_RN*()
mru
parents:
diff changeset
78 #endif /* AVUTIL_ARM_INTREADWRITE_H */