Mercurial > libavcodec.hg
annotate bswap.h @ 2609:0f74a379a890 libavcodec
store the number of runs to avoid storing the last run value
about 10% lower bitrate for -qscale 32 (forman & some music video)
worst case bitrate increase <0.1% (lossless or low qscale)
and now the bad news, even though this just adds a single subtraction and an if() into the medium sized unpack_coeffs() loop and the if() will only be false once per unpac_coeff() call, gcc produces 50% slower code, i didnt look at the generated asm yet, not sure if i want to ...
author | michael |
---|---|
date | Fri, 15 Apr 2005 13:24:30 +0000 |
parents | 15cfba1b97b5 |
children | 4023235edd2e |
rev | line source |
---|---|
1106 | 1 /** |
2 * @file bswap.h | |
3 * byte swap. | |
4 */ | |
5 | |
424 | 6 #ifndef __BSWAP_H__ |
7 #define __BSWAP_H__ | |
8 | |
9 #ifdef HAVE_BYTESWAP_H | |
10 #include <byteswap.h> | |
11 #else | |
12 | |
2293
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
13 #ifdef ARCH_X86_64 |
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
14 # define LEGACY_REGS "=Q" |
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
15 #else |
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
16 # define LEGACY_REGS "=q" |
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
17 #endif |
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
18 |
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
19 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
20 static inline uint16_t ByteSwap16(uint16_t x) |
424 | 21 { |
22 __asm("xchgb %b0,%h0" : | |
2293
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
23 LEGACY_REGS (x) : |
424 | 24 "0" (x)); |
25 return x; | |
26 } | |
27 #define bswap_16(x) ByteSwap16(x) | |
28 | |
2293
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
29 static inline uint32_t ByteSwap32(uint32_t x) |
424 | 30 { |
31 #if __CPU__ > 386 | |
32 __asm("bswap %0": | |
33 "=r" (x) : | |
34 #else | |
35 __asm("xchgb %b0,%h0\n" | |
36 " rorl $16,%0\n" | |
37 " xchgb %b0,%h0": | |
2293
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
38 LEGACY_REGS (x) : |
424 | 39 #endif |
40 "0" (x)); | |
41 return x; | |
42 } | |
43 #define bswap_32(x) ByteSwap32(x) | |
44 | |
2293
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
45 static inline uint64_t ByteSwap64(uint64_t x) |
424 | 46 { |
2293
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
47 #ifdef ARCH_X86_64 |
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
48 __asm("bswap %0": |
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
49 "=r" (x) : |
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
50 "0" (x)); |
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
51 return x; |
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
52 #else |
433
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
53 register union { __extension__ uint64_t __ll; |
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
54 uint32_t __l[2]; } __x; |
424 | 55 asm("xchgl %0,%1": |
56 "=r"(__x.__l[0]),"=r"(__x.__l[1]): | |
2293
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
57 "0"(bswap_32((uint32_t)x)),"1"(bswap_32((uint32_t)(x>>32)))); |
424 | 58 return __x.__ll; |
2293
15cfba1b97b5
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents:
1378
diff
changeset
|
59 #endif |
424 | 60 } |
61 #define bswap_64(x) ByteSwap64(x) | |
62 | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
63 #elif defined(ARCH_SH4) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
64 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
65 static inline uint16_t ByteSwap16(uint16_t x) { |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
66 __asm__("swap.b %0,%0":"=r"(x):"0"(x)); |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
67 return x; |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
68 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
69 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
70 static inline uint32_t ByteSwap32(uint32_t x) { |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
71 __asm__( |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
72 "swap.b %0,%0\n" |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
73 "swap.w %0,%0\n" |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
74 "swap.b %0,%0\n" |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
75 :"=r"(x):"0"(x)); |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
76 return x; |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
77 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
78 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
79 #define bswap_16(x) ByteSwap16(x) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
80 #define bswap_32(x) ByteSwap32(x) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
81 |
1378 | 82 static inline uint64_t ByteSwap64(uint64_t x) |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
83 { |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
84 union { |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
85 uint64_t ll; |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
86 struct { |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
87 uint32_t l,h; |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
88 } l; |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
89 } r; |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
90 r.l.l = bswap_32 (x); |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
91 r.l.h = bswap_32 (x>>32); |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
92 return r.ll; |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
93 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
94 #define bswap_64(x) ByteSwap64(x) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1106
diff
changeset
|
95 |
424 | 96 #else |
97 | |
98 #define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8) | |
99 | |
100 | |
101 // code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc. | |
102 #define bswap_32(x) \ | |
103 ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ | |
104 (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) | |
105 | |
1378 | 106 static inline uint64_t ByteSwap64(uint64_t x) |
433
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
107 { |
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
108 union { |
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
109 uint64_t ll; |
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
110 uint32_t l[2]; |
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
111 } w, r; |
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
112 w.ll = x; |
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
113 r.l[0] = bswap_32 (w.l[1]); |
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
114 r.l[1] = bswap_32 (w.l[0]); |
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
115 return r.ll; |
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
116 } |
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
117 #define bswap_64(x) ByteSwap64(x) |
133d2867d4b2
don't use inttypes.h - use inline function for bswap_64()
bellard
parents:
430
diff
changeset
|
118 |
424 | 119 #endif /* !ARCH_X86 */ |
120 | |
121 #endif /* !HAVE_BYTESWAP_H */ | |
122 | |
123 // be2me ... BigEndian to MachineEndian | |
124 // le2me ... LittleEndian to MachineEndian | |
125 | |
126 #ifdef WORDS_BIGENDIAN | |
127 #define be2me_16(x) (x) | |
128 #define be2me_32(x) (x) | |
129 #define be2me_64(x) (x) | |
130 #define le2me_16(x) bswap_16(x) | |
131 #define le2me_32(x) bswap_32(x) | |
132 #define le2me_64(x) bswap_64(x) | |
133 #else | |
134 #define be2me_16(x) bswap_16(x) | |
135 #define be2me_32(x) bswap_32(x) | |
136 #define be2me_64(x) bswap_64(x) | |
137 #define le2me_16(x) (x) | |
138 #define le2me_32(x) (x) | |
139 #define le2me_64(x) (x) | |
140 #endif | |
141 | |
142 #endif /* __BSWAP_H__ */ |