Mercurial > libavutil.hg
comparison ppc/intreadwrite.h @ 729:753953ed8ff0 libavutil
PPC asm for AV_RL*()
PPC is normally big endian but has special little endian load/store
instructions. Using these avoids a separate byteswap. This makes the
vorbis decoder about 5% faster. Not much else uses little-endian
read/write extensively.
GCC generates horrible PPC code for the default AV_[RW]B64 (which uses
a packed struct), so we override it with a plain pointer cast.
author | mru |
---|---|
date | Sat, 18 Apr 2009 00:00:31 +0000 |
parents | |
children | eff5131d6a33 |
comparison
equal
deleted
inserted
replaced
728:1fa3820b1a84 | 729:753953ed8ff0 |
---|---|
1 /* | |
2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | |
3 * | |
4 * This file is part of FFmpeg. | |
5 * | |
6 * FFmpeg is free software; you can redistribute it and/or | |
7 * modify it under the terms of the GNU Lesser General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2.1 of the License, or (at your option) any later version. | |
10 * | |
11 * FFmpeg is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
17 * License along with FFmpeg; if not, write to the Free Software | |
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 */ | |
20 | |
21 #ifndef AVUTIL_PPC_INTREADWRITE_H | |
22 #define AVUTIL_PPC_INTREADWRITE_H | |
23 | |
24 #include <stdint.h> | |
25 #include "config.h" | |
26 | |
27 #define AV_RL16 AV_RL16 | |
28 static inline uint16_t AV_RL16(const void *p) | |
29 { | |
30 uint16_t v; | |
31 __asm__ ("lhbrx %0, %y1" : "=r"(v) : "Z"(*(const uint16_t*)p)); | |
32 return v; | |
33 } | |
34 | |
35 #define AV_WL16 AV_WL16 | |
36 static inline void AV_WL16(void *p, uint16_t v) | |
37 { | |
38 __asm__ ("sthbrx %1, %y0" : "=Z"(*(uint16_t*)p) : "r"(v)); | |
39 } | |
40 | |
41 #define AV_RL32 AV_RL32 | |
42 static inline uint32_t AV_RL32(const void *p) | |
43 { | |
44 uint32_t v; | |
45 __asm__ ("lwbrx %0, %y1" : "=r"(v) : "Z"(*(const uint32_t*)p)); | |
46 return v; | |
47 } | |
48 | |
49 #define AV_WL32 AV_WL32 | |
50 static inline void AV_WL32(void *p, uint32_t v) | |
51 { | |
52 __asm__ ("stwbrx %1, %y0" : "=Z"(*(uint32_t*)p) : "r"(v)); | |
53 } | |
54 | |
55 #if HAVE_LDBRX | |
56 | |
57 #define AV_RL64 AV_RL64 | |
58 static inline uint64_t AV_RL64(const void *p) | |
59 { | |
60 uint64_t v; | |
61 __asm__ ("ldbrx %0, %y1" : "=r"(v) : "Z"(*(const uint64_t*)p)); | |
62 return v; | |
63 } | |
64 | |
65 #define AV_WL64 AV_WL64 | |
66 static inline void AV_WL64(void *p, uint64_t v) | |
67 { | |
68 __asm__ ("stdbrx %1, %y0" : "=Z"(*(uint64_t*)p) : "r"(v)); | |
69 } | |
70 | |
71 #else | |
72 | |
73 #define AV_RL64 AV_RL64 | |
74 static inline uint64_t AV_RL64(const void *p) | |
75 { | |
76 union { uint64_t v; uint32_t hl[2]; } v; | |
77 __asm__ ("lwbrx %0, %y2 \n\t" | |
78 "lwbrx %1, %y3 \n\t" | |
79 : "=r"(v.hl[1]), "=r"(v.hl[0]) | |
80 : "Z"(*(const uint32_t*)p), "Z"(*((const uint32_t*)p+1))); | |
81 return v.v; | |
82 } | |
83 | |
84 #define AV_WL64 AV_WL64 | |
85 static inline void AV_WL64(void *p, uint64_t v) | |
86 { | |
87 union { uint64_t v; uint32_t hl[2]; } vv = { v }; | |
88 __asm__ ("stwbrx %2, %y0 \n\t" | |
89 "stwbrx %3, %y1 \n\t" | |
90 : "=Z"(*(uint32_t*)p), "=Z"(*((uint32_t*)p+1)) | |
91 : "r"(vv.hl[1]), "r"(vv.hl[0])); | |
92 } | |
93 | |
94 #endif /* HAVE_LDBRX */ | |
95 | |
96 /* | |
97 * GCC fails miserably on the packed struct version which is used by | |
98 * default, so we override it here. | |
99 */ | |
100 | |
101 #define AV_RB64(p) (*(const uint64_t *)(p)) | |
102 #define AV_WB64(p, v) (*(uint64_t *)(p) = (v)) | |
103 | |
104 #endif /* AVUTIL_PPC_INTREADWRITE_H */ |