Mercurial > libavcodec.hg
comparison simple_idct.c @ 175:bd77d3cbb233 libavcodec
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
author | arpi_esp |
---|---|
date | Sun, 09 Dec 2001 12:39:54 +0000 |
parents | |
children | ccf36af385f3 |
comparison
equal
deleted
inserted
replaced
174:ac5075a55488 | 175:bd77d3cbb233 |
---|---|
1 /* | |
2 Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at) | |
3 | |
4 This program is free software; you can redistribute it and/or modify | |
5 it under the terms of the GNU General Public License as published by | |
6 the Free Software Foundation; either version 2 of the License, or | |
7 (at your option) any later version. | |
8 | |
9 This program is distributed in the hope that it will be useful, | |
10 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 GNU General Public License for more details. | |
13 | |
14 You should have received a copy of the GNU General Public License | |
15 along with this program; if not, write to the Free Software | |
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 */ | |
18 | |
19 /* | |
20 based upon some outcommented c code from mpeg2dec (idct_mmx.c written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) | |
21 */ | |
22 | |
23 #include <inttypes.h> | |
24 | |
25 #include "simple_idct.h" | |
26 | |
27 #if 0 | |
28 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ | |
29 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ | |
30 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ | |
31 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ | |
32 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ | |
33 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ | |
34 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ | |
35 #define ROW_SHIFT 8 | |
36 #define COL_SHIFT 17 | |
37 #else | |
38 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
39 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
40 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
41 #define W4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
42 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
43 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
44 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
45 #define ROW_SHIFT 11 | |
46 #define COL_SHIFT 20 // 6 | |
47 #endif | |
48 #if 1 | |
49 static void inline idctRow (int16_t * row) | |
50 { | |
51 int a0, a1, a2, a3, b0, b1, b2, b3; | |
52 const int C1 =W1; | |
53 const int C2 =W2; | |
54 const int C3 =W3; | |
55 const int C4 =W4; | |
56 const int C5 =W5; | |
57 const int C6 =W6; | |
58 const int C7 =W7; | |
59 | |
60 if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7])) { | |
61 row[0] = row[1] = row[2] = row[3] = row[4] = | |
62 row[5] = row[6] = row[7] = row[0]<<3; | |
63 return; | |
64 } | |
65 | |
66 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1)); | |
67 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1)); | |
68 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1)); | |
69 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1)); | |
70 | |
71 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7]; | |
72 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7]; | |
73 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7]; | |
74 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7]; | |
75 | |
76 row[0] = (a0 + b0) >> ROW_SHIFT; | |
77 row[1] = (a1 + b1) >> ROW_SHIFT; | |
78 row[2] = (a2 + b2) >> ROW_SHIFT; | |
79 row[3] = (a3 + b3) >> ROW_SHIFT; | |
80 row[4] = (a3 - b3) >> ROW_SHIFT; | |
81 row[5] = (a2 - b2) >> ROW_SHIFT; | |
82 row[6] = (a1 - b1) >> ROW_SHIFT; | |
83 row[7] = (a0 - b0) >> ROW_SHIFT; | |
84 } | |
85 | |
86 static void inline idctCol (int16_t * col) | |
87 { | |
88 int a0, a1, a2, a3, b0, b1, b2, b3; | |
89 const int C1 =W1; | |
90 const int C2 =W2; | |
91 const int C3 =W3; | |
92 const int C4 =W4; | |
93 const int C5 =W5; | |
94 const int C6 =W6; | |
95 const int C7 =W7; | |
96 /* | |
97 if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) { | |
98 col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] = | |
99 col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3; | |
100 return; | |
101 }*/ | |
102 col[0] += (1<<(COL_SHIFT-1))/W4; | |
103 a0 = C4*col[8*0] + C2*col[8*2] + C4*col[8*4] + C6*col[8*6]; | |
104 a1 = C4*col[8*0] + C6*col[8*2] - C4*col[8*4] - C2*col[8*6]; | |
105 a2 = C4*col[8*0] - C6*col[8*2] - C4*col[8*4] + C2*col[8*6]; | |
106 a3 = C4*col[8*0] - C2*col[8*2] + C4*col[8*4] - C6*col[8*6]; | |
107 | |
108 b0 = C1*col[8*1] + C3*col[8*3] + C5*col[8*5] + C7*col[8*7]; | |
109 b1 = C3*col[8*1] - C7*col[8*3] - C1*col[8*5] - C5*col[8*7]; | |
110 b2 = C5*col[8*1] - C1*col[8*3] + C7*col[8*5] + C3*col[8*7]; | |
111 b3 = C7*col[8*1] - C5*col[8*3] + C3*col[8*5] - C1*col[8*7]; | |
112 | |
113 col[8*0] = (a0 + b0) >> COL_SHIFT; | |
114 col[8*1] = (a1 + b1) >> COL_SHIFT; | |
115 col[8*2] = (a2 + b2) >> COL_SHIFT; | |
116 col[8*3] = (a3 + b3) >> COL_SHIFT; | |
117 col[8*4] = (a3 - b3) >> COL_SHIFT; | |
118 col[8*5] = (a2 - b2) >> COL_SHIFT; | |
119 col[8*6] = (a1 - b1) >> COL_SHIFT; | |
120 col[8*7] = (a0 - b0) >> COL_SHIFT; | |
121 } | |
122 | |
123 void simple_idct (short *block) | |
124 { | |
125 int i; | |
126 for(i=0; i<8; i++) | |
127 idctRow(block + 8*i); | |
128 | |
129 for(i=0; i<8; i++) | |
130 idctCol(block + i); | |
131 | |
132 } | |
133 | |
134 #else | |
135 | |
136 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
137 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
138 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
139 #define W4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
140 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
141 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
142 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
143 #define COL_SHIFT 31 // 6 | |
144 | |
145 static void inline idctRow (int32_t *out, int16_t * row) | |
146 { | |
147 int a0, a1, a2, a3, b0, b1, b2, b3; | |
148 const int C1 =W1; | |
149 const int C2 =W2; | |
150 const int C3 =W3; | |
151 const int C4 =W4; | |
152 const int C5 =W5; | |
153 const int C6 =W6; | |
154 const int C7 =W7; | |
155 /* | |
156 if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7])) { | |
157 row[0] = row[1] = row[2] = row[3] = row[4] = | |
158 row[5] = row[6] = row[7] = row[0]<<14; | |
159 return; | |
160 } | |
161 */ | |
162 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6]; | |
163 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6]; | |
164 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6]; | |
165 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6]; | |
166 | |
167 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7]; | |
168 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7]; | |
169 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7]; | |
170 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7]; | |
171 | |
172 out[0] = (a0 + b0); | |
173 out[1] = (a1 + b1); | |
174 out[2] = (a2 + b2); | |
175 out[3] = (a3 + b3); | |
176 out[4] = (a3 - b3); | |
177 out[5] = (a2 - b2); | |
178 out[6] = (a1 - b1); | |
179 out[7] = (a0 - b0); | |
180 } | |
181 | |
182 static void inline idctCol (int32_t *in, int16_t * col) | |
183 { | |
184 int64_t a0, a1, a2, a3, b0, b1, b2, b3; | |
185 const int64_t C1 =W1; | |
186 const int64_t C2 =W2; | |
187 const int64_t C3 =W3; | |
188 const int64_t C4 =W4; | |
189 const int64_t C5 =W5; | |
190 const int64_t C6 =W6; | |
191 const int64_t C7 =W7; | |
192 /* | |
193 if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) { | |
194 col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] = | |
195 col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3; | |
196 return; | |
197 }*/ | |
198 in[0] += (1<<(COL_SHIFT-1))/W4; | |
199 a0 = C4*in[8*0] + C2*in[8*2] + C4*in[8*4] + C6*in[8*6]; | |
200 a1 = C4*in[8*0] + C6*in[8*2] - C4*in[8*4] - C2*in[8*6]; | |
201 a2 = C4*in[8*0] - C6*in[8*2] - C4*in[8*4] + C2*in[8*6]; | |
202 a3 = C4*in[8*0] - C2*in[8*2] + C4*in[8*4] - C6*in[8*6]; | |
203 | |
204 b0 = C1*in[8*1] + C3*in[8*3] + C5*in[8*5] + C7*in[8*7]; | |
205 b1 = C3*in[8*1] - C7*in[8*3] - C1*in[8*5] - C5*in[8*7]; | |
206 b2 = C5*in[8*1] - C1*in[8*3] + C7*in[8*5] + C3*in[8*7]; | |
207 b3 = C7*in[8*1] - C5*in[8*3] + C3*in[8*5] - C1*in[8*7]; | |
208 | |
209 col[8*0] = (a0 + b0) >> COL_SHIFT; | |
210 col[8*1] = (a1 + b1) >> COL_SHIFT; | |
211 col[8*2] = (a2 + b2) >> COL_SHIFT; | |
212 col[8*3] = (a3 + b3) >> COL_SHIFT; | |
213 col[8*4] = (a3 - b3) >> COL_SHIFT; | |
214 col[8*5] = (a2 - b2) >> COL_SHIFT; | |
215 col[8*6] = (a1 - b1) >> COL_SHIFT; | |
216 col[8*7] = (a0 - b0) >> COL_SHIFT; | |
217 } | |
218 | |
219 void simple_idct (short *block) | |
220 { | |
221 int i; | |
222 int32_t temp[64]; | |
223 for(i=0; i<8; i++) | |
224 idctRow(temp+8*i, block + 8*i); | |
225 | |
226 for(i=0; i<8; i++) | |
227 idctCol(temp+i, block + i); | |
228 | |
229 } | |
230 | |
231 #endif |