comparison ppc/intreadwrite.h @ 729:753953ed8ff0 libavutil

PPC asm for AV_RL*() PPC is normally big endian but has special little endian load/store instructions. Using these avoids a separate byteswap. This makes the vorbis decoder about 5% faster. Not much else uses little-endian read/write extensively. GCC generates horrible PPC code for the default AV_[RW]B64 (which uses a packed struct), so we override it with a plain pointer cast.
author mru
date Sat, 18 Apr 2009 00:00:31 +0000
parents
children eff5131d6a33
comparison
equal deleted inserted replaced
728:1fa3820b1a84 729:753953ed8ff0
1 /*
2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #ifndef AVUTIL_PPC_INTREADWRITE_H
22 #define AVUTIL_PPC_INTREADWRITE_H
23
24 #include <stdint.h>
25 #include "config.h"
26
27 #define AV_RL16 AV_RL16
28 static inline uint16_t AV_RL16(const void *p)
29 {
30 uint16_t v;
31 __asm__ ("lhbrx %0, %y1" : "=r"(v) : "Z"(*(const uint16_t*)p));
32 return v;
33 }
34
35 #define AV_WL16 AV_WL16
36 static inline void AV_WL16(void *p, uint16_t v)
37 {
38 __asm__ ("sthbrx %1, %y0" : "=Z"(*(uint16_t*)p) : "r"(v));
39 }
40
41 #define AV_RL32 AV_RL32
42 static inline uint32_t AV_RL32(const void *p)
43 {
44 uint32_t v;
45 __asm__ ("lwbrx %0, %y1" : "=r"(v) : "Z"(*(const uint32_t*)p));
46 return v;
47 }
48
49 #define AV_WL32 AV_WL32
50 static inline void AV_WL32(void *p, uint32_t v)
51 {
52 __asm__ ("stwbrx %1, %y0" : "=Z"(*(uint32_t*)p) : "r"(v));
53 }
54
55 #if HAVE_LDBRX
56
57 #define AV_RL64 AV_RL64
58 static inline uint64_t AV_RL64(const void *p)
59 {
60 uint64_t v;
61 __asm__ ("ldbrx %0, %y1" : "=r"(v) : "Z"(*(const uint64_t*)p));
62 return v;
63 }
64
65 #define AV_WL64 AV_WL64
66 static inline void AV_WL64(void *p, uint64_t v)
67 {
68 __asm__ ("stdbrx %1, %y0" : "=Z"(*(uint64_t*)p) : "r"(v));
69 }
70
71 #else
72
73 #define AV_RL64 AV_RL64
74 static inline uint64_t AV_RL64(const void *p)
75 {
76 union { uint64_t v; uint32_t hl[2]; } v;
77 __asm__ ("lwbrx %0, %y2 \n\t"
78 "lwbrx %1, %y3 \n\t"
79 : "=r"(v.hl[1]), "=r"(v.hl[0])
80 : "Z"(*(const uint32_t*)p), "Z"(*((const uint32_t*)p+1)));
81 return v.v;
82 }
83
84 #define AV_WL64 AV_WL64
85 static inline void AV_WL64(void *p, uint64_t v)
86 {
87 union { uint64_t v; uint32_t hl[2]; } vv = { v };
88 __asm__ ("stwbrx %2, %y0 \n\t"
89 "stwbrx %3, %y1 \n\t"
90 : "=Z"(*(uint32_t*)p), "=Z"(*((uint32_t*)p+1))
91 : "r"(vv.hl[1]), "r"(vv.hl[0]));
92 }
93
94 #endif /* HAVE_LDBRX */
95
96 /*
97 * GCC fails miserably on the packed struct version which is used by
98 * default, so we override it here.
99 */
100
101 #define AV_RB64(p) (*(const uint64_t *)(p))
102 #define AV_WB64(p, v) (*(uint64_t *)(p) = (v))
103
104 #endif /* AVUTIL_PPC_INTREADWRITE_H */