annotate bswap.h @ 2609:0f74a379a890 libavcodec

store the number of runs to avoid storing the last run value about 10% lower bitrate for -qscale 32 (forman & some music video) worst case bitrate increase <0.1% (lossless or low qscale) and now the bad news, even though this just adds a single subtraction and an if() into the medium sized unpack_coeffs() loop and the if() will only be false once per unpac_coeff() call, gcc produces 50% slower code, i didnt look at the generated asm yet, not sure if i want to ...
author michael
date Fri, 15 Apr 2005 13:24:30 +0000
parents 15cfba1b97b5
children 4023235edd2e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1106
1e39f273ecd6 per file doxy
michaelni
parents: 433
diff changeset
1 /**
1e39f273ecd6 per file doxy
michaelni
parents: 433
diff changeset
2 * @file bswap.h
1e39f273ecd6 per file doxy
michaelni
parents: 433
diff changeset
3 * byte swap.
1e39f273ecd6 per file doxy
michaelni
parents: 433
diff changeset
4 */
1e39f273ecd6 per file doxy
michaelni
parents: 433
diff changeset
5
424
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
6 #ifndef __BSWAP_H__
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
7 #define __BSWAP_H__
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
8
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
9 #ifdef HAVE_BYTESWAP_H
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
10 #include <byteswap.h>
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
11 #else
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
12
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
13 #ifdef ARCH_X86_64
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
14 # define LEGACY_REGS "=Q"
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
15 #else
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
16 # define LEGACY_REGS "=q"
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
17 #endif
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
18
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
19 #if defined(ARCH_X86) || defined(ARCH_X86_64)
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
20 static inline uint16_t ByteSwap16(uint16_t x)
424
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
21 {
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
22 __asm("xchgb %b0,%h0" :
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
23 LEGACY_REGS (x) :
424
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
24 "0" (x));
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
25 return x;
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
26 }
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
27 #define bswap_16(x) ByteSwap16(x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
28
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
29 static inline uint32_t ByteSwap32(uint32_t x)
424
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
30 {
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
31 #if __CPU__ > 386
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
32 __asm("bswap %0":
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
33 "=r" (x) :
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
34 #else
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
35 __asm("xchgb %b0,%h0\n"
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
36 " rorl $16,%0\n"
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
37 " xchgb %b0,%h0":
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
38 LEGACY_REGS (x) :
424
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
39 #endif
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
40 "0" (x));
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
41 return x;
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
42 }
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
43 #define bswap_32(x) ByteSwap32(x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
44
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
45 static inline uint64_t ByteSwap64(uint64_t x)
424
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
46 {
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
47 #ifdef ARCH_X86_64
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
48 __asm("bswap %0":
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
49 "=r" (x) :
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
50 "0" (x));
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
51 return x;
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
52 #else
433
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
53 register union { __extension__ uint64_t __ll;
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
54 uint32_t __l[2]; } __x;
424
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
55 asm("xchgl %0,%1":
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
56 "=r"(__x.__l[0]),"=r"(__x.__l[1]):
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
57 "0"(bswap_32((uint32_t)x)),"1"(bswap_32((uint32_t)(x>>32))));
424
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
58 return __x.__ll;
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1378
diff changeset
59 #endif
424
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
60 }
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
61 #define bswap_64(x) ByteSwap64(x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
62
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
63 #elif defined(ARCH_SH4)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
64
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
65 static inline uint16_t ByteSwap16(uint16_t x) {
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
66 __asm__("swap.b %0,%0":"=r"(x):"0"(x));
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
67 return x;
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
68 }
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
69
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
70 static inline uint32_t ByteSwap32(uint32_t x) {
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
71 __asm__(
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
72 "swap.b %0,%0\n"
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
73 "swap.w %0,%0\n"
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
74 "swap.b %0,%0\n"
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
75 :"=r"(x):"0"(x));
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
76 return x;
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
77 }
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
78
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
79 #define bswap_16(x) ByteSwap16(x)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
80 #define bswap_32(x) ByteSwap32(x)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
81
1378
1831d86117a3 warning fixes
al3x
parents: 1259
diff changeset
82 static inline uint64_t ByteSwap64(uint64_t x)
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
83 {
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
84 union {
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
85 uint64_t ll;
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
86 struct {
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
87 uint32_t l,h;
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
88 } l;
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
89 } r;
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
90 r.l.l = bswap_32 (x);
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
91 r.l.h = bswap_32 (x>>32);
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
92 return r.ll;
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
93 }
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
94 #define bswap_64(x) ByteSwap64(x)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents: 1106
diff changeset
95
424
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
96 #else
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
97
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
98 #define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
99
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
100
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
101 // code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc.
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
102 #define bswap_32(x) \
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
103 ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
104 (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
105
1378
1831d86117a3 warning fixes
al3x
parents: 1259
diff changeset
106 static inline uint64_t ByteSwap64(uint64_t x)
433
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
107 {
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
108 union {
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
109 uint64_t ll;
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
110 uint32_t l[2];
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
111 } w, r;
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
112 w.ll = x;
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
113 r.l[0] = bswap_32 (w.l[1]);
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
114 r.l[1] = bswap_32 (w.l[0]);
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
115 return r.ll;
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
116 }
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
117 #define bswap_64(x) ByteSwap64(x)
133d2867d4b2 don't use inttypes.h - use inline function for bswap_64()
bellard
parents: 430
diff changeset
118
424
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
119 #endif /* !ARCH_X86 */
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
120
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
121 #endif /* !HAVE_BYTESWAP_H */
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
122
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
123 // be2me ... BigEndian to MachineEndian
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
124 // le2me ... LittleEndian to MachineEndian
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
125
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
126 #ifdef WORDS_BIGENDIAN
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
127 #define be2me_16(x) (x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
128 #define be2me_32(x) (x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
129 #define be2me_64(x) (x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
130 #define le2me_16(x) bswap_16(x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
131 #define le2me_32(x) bswap_32(x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
132 #define le2me_64(x) bswap_64(x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
133 #else
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
134 #define be2me_16(x) bswap_16(x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
135 #define be2me_32(x) bswap_32(x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
136 #define be2me_64(x) bswap_64(x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
137 #define le2me_16(x) (x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
138 #define le2me_32(x) (x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
139 #define le2me_64(x) (x)
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
140 #endif
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
141
92b8e1affb66 moved in libavcodec
glantau
parents:
diff changeset
142 #endif /* __BSWAP_H__ */