Mercurial > libavcodec.hg
annotate s3tc.c @ 12033:5de2b84a1fc3 libavcodec
Eliminate another redundant instruction in vp56/8 arithcoder
Necessary because of this GCC bug:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44474
To do this, convert some, but not all (!) of the variables in VP56RangeCoder
into local variables.
If we convert c->high into a local variable, gcc gets the stupids and refuses
to use a conditional move for the unpredictable main branch.
TODO: dispense with this bullshit and write an asm version.
author | darkshikari |
---|---|
date | Wed, 30 Jun 2010 23:59:27 +0000 |
parents | 2acf0ae7b041 |
children |
rev | line source |
---|---|
4933 | 1 /* |
2 * S3 Texture Compression (S3TC) decoding functions | |
3 * Copyright (c) 2007 by Ivo van Poorten | |
4 * | |
5214 | 5 * see also: http://wiki.multimedia.cx/index.php?title=S3TC |
6 * | |
4933 | 7 * This file is part of FFmpeg. |
8 * | |
9 * FFmpeg is free software; you can redistribute it and/or | |
10 * modify it under the terms of the GNU Lesser General Public | |
11 * License as published by the Free Software Foundation; either | |
12 * version 2.1 of the License, or (at your option) any later version. | |
13 * | |
14 * FFmpeg is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
20 * License along with FFmpeg; if not, write to the Free Software | |
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 */ | |
23 | |
8573
2acf0ae7b041
Fix build: Add intreadwrite.h and bswap.h #includes where necessary.
diego
parents:
6306
diff
changeset
|
24 #include "libavutil/intreadwrite.h" |
4933 | 25 #include "avcodec.h" |
26 #include "s3tc.h" | |
27 | |
28 static inline void dxt1_decode_pixels(const uint8_t *s, uint32_t *d, | |
29 unsigned int qstride, unsigned int flag, | |
30 uint64_t alpha) { | |
31 unsigned int x, y, c0, c1, a = (!flag * 255) << 24; | |
32 unsigned int rb0, rb1, rb2, rb3, g0, g1, g2, g3; | |
33 uint32_t colors[4], pixels; | |
34 | |
6306 | 35 c0 = AV_RL16(s); |
36 c1 = AV_RL16(s+2); | |
4933 | 37 |
38 rb0 = (c0<<3 | c0<<8) & 0xf800f8; | |
39 rb1 = (c1<<3 | c1<<8) & 0xf800f8; | |
40 rb0 += (rb0>>5) & 0x070007; | |
41 rb1 += (rb1>>5) & 0x070007; | |
42 g0 = (c0 <<5) & 0x00fc00; | |
43 g1 = (c1 <<5) & 0x00fc00; | |
44 g0 += (g0 >>6) & 0x000300; | |
45 g1 += (g1 >>6) & 0x000300; | |
46 | |
47 colors[0] = rb0 + g0 + a; | |
48 colors[1] = rb1 + g1 + a; | |
49 | |
50 if (c0 > c1 || flag) { | |
51 rb2 = (((2*rb0+rb1) * 21) >> 6) & 0xff00ff; | |
52 rb3 = (((2*rb1+rb0) * 21) >> 6) & 0xff00ff; | |
53 g2 = (((2*g0 +g1 ) * 21) >> 6) & 0x00ff00; | |
54 g3 = (((2*g1 +g0 ) * 21) >> 6) & 0x00ff00; | |
55 colors[3] = rb3 + g3 + a; | |
56 } else { | |
57 rb2 = ((rb0+rb1) >> 1) & 0xff00ff; | |
58 g2 = ((g0 +g1 ) >> 1) & 0x00ff00; | |
59 colors[3] = 0; | |
60 } | |
61 | |
62 colors[2] = rb2 + g2 + a; | |
63 | |
6306 | 64 pixels = AV_RL32(s+4); |
4933 | 65 for (y=0; y<4; y++) { |
66 for (x=0; x<4; x++) { | |
67 a = (alpha & 0x0f) << 28; | |
68 a += a >> 4; | |
69 d[x] = a + colors[pixels&3]; | |
70 pixels >>= 2; | |
71 alpha >>= 4; | |
72 } | |
73 d += qstride; | |
74 } | |
75 } | |
76 | |
77 void ff_decode_dxt1(const uint8_t *s, uint8_t *dst, | |
78 const unsigned int w, const unsigned int h, | |
79 const unsigned int stride) { | |
80 unsigned int bx, by, qstride = stride/4; | |
81 uint32_t *d = (uint32_t *) dst; | |
82 | |
83 for (by=0; by < h/4; by++, d += stride-w) | |
84 for (bx=0; bx < w/4; bx++, s+=8, d+=4) | |
85 dxt1_decode_pixels(s, d, qstride, 0, 0LL); | |
86 } | |
87 | |
88 void ff_decode_dxt3(const uint8_t *s, uint8_t *dst, | |
89 const unsigned int w, const unsigned int h, | |
90 const unsigned int stride) { | |
91 unsigned int bx, by, qstride = stride/4; | |
92 uint32_t *d = (uint32_t *) dst; | |
93 | |
94 for (by=0; by < h/4; by++, d += stride-w) | |
95 for (bx=0; bx < w/4; bx++, s+=16, d+=4) | |
96 dxt1_decode_pixels(s+8, d, qstride, 1, AV_RL64(s)); | |
97 } |