Mercurial > libavcodec.hg
annotate simple_idct.c @ 3010:533c6386eca9 libavcodec
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
if this gives better quality then SATD then someone should port the x86 code too or maybe we could even just call it from libx264
the 4x4 one could be tried too ...
author | michael |
---|---|
date | Wed, 04 Jan 2006 16:31:23 +0000 |
parents | bfabfdf9ce55 |
children | 0b546eab515d |
rev | line source |
---|---|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
1 /* |
429 | 2 * Simple IDCT |
3 * | |
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5 * | |
6 * This library is free software; you can redistribute it and/or | |
7 * modify it under the terms of the GNU Lesser General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2 of the License, or (at your option) any later version. | |
10 * | |
11 * This library is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
17 * License along with this library; if not, write to the Free Software | |
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
19 */ | |
2967 | 20 |
1106 | 21 /** |
22 * @file simple_idct.c | |
23 * simpleidct in C. | |
24 */ | |
2967 | 25 |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
26 /* |
429 | 27 based upon some outcommented c code from mpeg2dec (idct_mmx.c |
2967 | 28 written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) |
429 | 29 */ |
396
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
352
diff
changeset
|
30 #include "avcodec.h" |
479 | 31 #include "dsputil.h" |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
32 #include "simple_idct.h" |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
33 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
34 #if 0 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
35 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
36 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
37 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
38 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
39 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
40 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
41 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
42 #define ROW_SHIFT 8 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
43 #define COL_SHIFT 17 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
44 #else |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
45 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
46 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
47 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
352
5a8eb5cf9f92
C4=16383 for the c version too and even for some outcommented code
michaelni
parents:
215
diff
changeset
|
48 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
49 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
50 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
51 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
52 #define ROW_SHIFT 11 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
53 #define COL_SHIFT 20 // 6 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
54 #endif |
205 | 55 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
56 #if defined(ARCH_POWERPC_405) |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
57 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
58 /* signed 16x16 -> 32 multiply add accumulate */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
59 #define MAC16(rt, ra, rb) \ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
60 asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
61 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
62 /* signed 16x16 -> 32 multiply */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
63 #define MUL16(rt, ra, rb) \ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
64 asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
65 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
66 #else |
205 | 67 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
68 /* signed 16x16 -> 32 multiply add accumulate */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
69 #define MAC16(rt, ra, rb) rt += (ra) * (rb) |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
70 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
71 /* signed 16x16 -> 32 multiply */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
72 #define MUL16(rt, ra, rb) rt = (ra) * (rb) |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
73 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
74 #endif |
205 | 75 |
1008 | 76 static inline void idctRowCondDC (DCTELEM * row) |
205 | 77 { |
2979 | 78 int a0, a1, a2, a3, b0, b1, b2, b3; |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
79 #ifdef FAST_64BIT |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
80 uint64_t temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
81 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
82 uint32_t temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
83 #endif |
205 | 84 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
85 #ifdef FAST_64BIT |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
86 #ifdef WORDS_BIGENDIAN |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
87 #define ROW0_MASK 0xffff000000000000LL |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
88 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
89 #define ROW0_MASK 0xffffLL |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
90 #endif |
1008 | 91 if(sizeof(DCTELEM)==2){ |
2967 | 92 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | |
1008 | 93 ((uint64_t *)row)[1]) == 0) { |
94 temp = (row[0] << 3) & 0xffff; | |
95 temp += temp << 16; | |
96 temp += temp << 32; | |
97 ((uint64_t *)row)[0] = temp; | |
98 ((uint64_t *)row)[1] = temp; | |
99 return; | |
2979 | 100 } |
1008 | 101 }else{ |
102 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { | |
103 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; | |
104 return; | |
105 } | |
106 } | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
107 #else |
1008 | 108 if(sizeof(DCTELEM)==2){ |
109 if (!(((uint32_t*)row)[1] | | |
110 ((uint32_t*)row)[2] | | |
2967 | 111 ((uint32_t*)row)[3] | |
1008 | 112 row[1])) { |
113 temp = (row[0] << 3) & 0xffff; | |
114 temp += temp << 16; | |
115 ((uint32_t*)row)[0]=((uint32_t*)row)[1] = | |
116 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; | |
117 return; | |
118 } | |
119 }else{ | |
120 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { | |
121 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; | |
122 return; | |
123 } | |
124 } | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
125 #endif |
205 | 126 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
127 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
2979 | 128 a1 = a0; |
129 a2 = a0; | |
130 a3 = a0; | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
131 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
132 /* no need to optimize : gcc does it */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
133 a0 += W2 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
134 a1 += W6 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
135 a2 -= W6 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
136 a3 -= W2 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
137 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
138 MUL16(b0, W1, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
139 MAC16(b0, W3, row[3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
140 MUL16(b1, W3, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
141 MAC16(b1, -W7, row[3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
142 MUL16(b2, W5, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
143 MAC16(b2, -W1, row[3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
144 MUL16(b3, W7, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
145 MAC16(b3, -W5, row[3]); |
205 | 146 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
147 #ifdef FAST_64BIT |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
148 temp = ((uint64_t*)row)[1]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
149 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
150 temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
151 #endif |
2979 | 152 if (temp != 0) { |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
153 a0 += W4*row[4] + W6*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
154 a1 += - W4*row[4] - W2*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
155 a2 += - W4*row[4] + W2*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
156 a3 += W4*row[4] - W6*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
157 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
158 MAC16(b0, W5, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
159 MAC16(b0, W7, row[7]); |
2967 | 160 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
161 MAC16(b1, -W1, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
162 MAC16(b1, -W5, row[7]); |
2967 | 163 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
164 MAC16(b2, W7, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
165 MAC16(b2, W3, row[7]); |
2967 | 166 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
167 MAC16(b3, W3, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
168 MAC16(b3, -W1, row[7]); |
2979 | 169 } |
205 | 170 |
2979 | 171 row[0] = (a0 + b0) >> ROW_SHIFT; |
172 row[7] = (a0 - b0) >> ROW_SHIFT; | |
173 row[1] = (a1 + b1) >> ROW_SHIFT; | |
174 row[6] = (a1 - b1) >> ROW_SHIFT; | |
175 row[2] = (a2 + b2) >> ROW_SHIFT; | |
176 row[5] = (a2 - b2) >> ROW_SHIFT; | |
177 row[3] = (a3 + b3) >> ROW_SHIFT; | |
178 row[4] = (a3 - b3) >> ROW_SHIFT; | |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
179 } |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
180 |
2967 | 181 static inline void idctSparseColPut (uint8_t *dest, int line_size, |
1008 | 182 DCTELEM * col) |
205 | 183 { |
2979 | 184 int a0, a1, a2, a3, b0, b1, b2, b3; |
1064 | 185 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
186 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
187 /* XXX: I did that only to give same values as previous code */ |
2979 | 188 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
189 a1 = a0; | |
190 a2 = a0; | |
191 a3 = a0; | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
192 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
193 a0 += + W2*col[8*2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
194 a1 += + W6*col[8*2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
195 a2 += - W6*col[8*2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
196 a3 += - W2*col[8*2]; |
205 | 197 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
198 MUL16(b0, W1, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
199 MUL16(b1, W3, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
200 MUL16(b2, W5, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
201 MUL16(b3, W7, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
202 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
203 MAC16(b0, + W3, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
204 MAC16(b1, - W7, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
205 MAC16(b2, - W1, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
206 MAC16(b3, - W5, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
207 |
2979 | 208 if(col[8*4]){ |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
209 a0 += + W4*col[8*4]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
210 a1 += - W4*col[8*4]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
211 a2 += - W4*col[8*4]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
212 a3 += + W4*col[8*4]; |
2979 | 213 } |
205 | 214 |
2979 | 215 if (col[8*5]) { |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
216 MAC16(b0, + W5, col[8*5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
217 MAC16(b1, - W1, col[8*5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
218 MAC16(b2, + W7, col[8*5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
219 MAC16(b3, + W3, col[8*5]); |
2979 | 220 } |
205 | 221 |
2979 | 222 if(col[8*6]){ |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
223 a0 += + W6*col[8*6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
224 a1 += - W2*col[8*6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
225 a2 += + W2*col[8*6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
226 a3 += - W6*col[8*6]; |
2979 | 227 } |
205 | 228 |
2979 | 229 if (col[8*7]) { |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
230 MAC16(b0, + W7, col[8*7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
231 MAC16(b1, - W5, col[8*7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
232 MAC16(b2, + W3, col[8*7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
233 MAC16(b3, - W1, col[8*7]); |
2979 | 234 } |
205 | 235 |
479 | 236 dest[0] = cm[(a0 + b0) >> COL_SHIFT]; |
237 dest += line_size; | |
238 dest[0] = cm[(a1 + b1) >> COL_SHIFT]; | |
239 dest += line_size; | |
240 dest[0] = cm[(a2 + b2) >> COL_SHIFT]; | |
241 dest += line_size; | |
242 dest[0] = cm[(a3 + b3) >> COL_SHIFT]; | |
243 dest += line_size; | |
244 dest[0] = cm[(a3 - b3) >> COL_SHIFT]; | |
245 dest += line_size; | |
246 dest[0] = cm[(a2 - b2) >> COL_SHIFT]; | |
247 dest += line_size; | |
248 dest[0] = cm[(a1 - b1) >> COL_SHIFT]; | |
249 dest += line_size; | |
250 dest[0] = cm[(a0 - b0) >> COL_SHIFT]; | |
251 } | |
252 | |
2967 | 253 static inline void idctSparseColAdd (uint8_t *dest, int line_size, |
1008 | 254 DCTELEM * col) |
479 | 255 { |
2979 | 256 int a0, a1, a2, a3, b0, b1, b2, b3; |
1064 | 257 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
479 | 258 |
259 /* XXX: I did that only to give same values as previous code */ | |
2979 | 260 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
261 a1 = a0; | |
262 a2 = a0; | |
263 a3 = a0; | |
479 | 264 |
265 a0 += + W2*col[8*2]; | |
266 a1 += + W6*col[8*2]; | |
267 a2 += - W6*col[8*2]; | |
268 a3 += - W2*col[8*2]; | |
269 | |
270 MUL16(b0, W1, col[8*1]); | |
271 MUL16(b1, W3, col[8*1]); | |
272 MUL16(b2, W5, col[8*1]); | |
273 MUL16(b3, W7, col[8*1]); | |
274 | |
275 MAC16(b0, + W3, col[8*3]); | |
276 MAC16(b1, - W7, col[8*3]); | |
277 MAC16(b2, - W1, col[8*3]); | |
278 MAC16(b3, - W5, col[8*3]); | |
279 | |
2979 | 280 if(col[8*4]){ |
479 | 281 a0 += + W4*col[8*4]; |
282 a1 += - W4*col[8*4]; | |
283 a2 += - W4*col[8*4]; | |
284 a3 += + W4*col[8*4]; | |
2979 | 285 } |
479 | 286 |
2979 | 287 if (col[8*5]) { |
479 | 288 MAC16(b0, + W5, col[8*5]); |
289 MAC16(b1, - W1, col[8*5]); | |
290 MAC16(b2, + W7, col[8*5]); | |
291 MAC16(b3, + W3, col[8*5]); | |
2979 | 292 } |
479 | 293 |
2979 | 294 if(col[8*6]){ |
479 | 295 a0 += + W6*col[8*6]; |
296 a1 += - W2*col[8*6]; | |
297 a2 += + W2*col[8*6]; | |
298 a3 += - W6*col[8*6]; | |
2979 | 299 } |
479 | 300 |
2979 | 301 if (col[8*7]) { |
479 | 302 MAC16(b0, + W7, col[8*7]); |
303 MAC16(b1, - W5, col[8*7]); | |
304 MAC16(b2, + W3, col[8*7]); | |
305 MAC16(b3, - W1, col[8*7]); | |
2979 | 306 } |
479 | 307 |
308 dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; | |
309 dest += line_size; | |
310 dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; | |
311 dest += line_size; | |
312 dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; | |
313 dest += line_size; | |
314 dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; | |
315 dest += line_size; | |
316 dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)]; | |
317 dest += line_size; | |
318 dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)]; | |
319 dest += line_size; | |
320 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; | |
321 dest += line_size; | |
322 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; | |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
323 } |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
324 |
1008 | 325 static inline void idctSparseCol (DCTELEM * col) |
633 | 326 { |
2979 | 327 int a0, a1, a2, a3, b0, b1, b2, b3; |
633 | 328 |
329 /* XXX: I did that only to give same values as previous code */ | |
2979 | 330 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
331 a1 = a0; | |
332 a2 = a0; | |
333 a3 = a0; | |
633 | 334 |
335 a0 += + W2*col[8*2]; | |
336 a1 += + W6*col[8*2]; | |
337 a2 += - W6*col[8*2]; | |
338 a3 += - W2*col[8*2]; | |
339 | |
340 MUL16(b0, W1, col[8*1]); | |
341 MUL16(b1, W3, col[8*1]); | |
342 MUL16(b2, W5, col[8*1]); | |
343 MUL16(b3, W7, col[8*1]); | |
344 | |
345 MAC16(b0, + W3, col[8*3]); | |
346 MAC16(b1, - W7, col[8*3]); | |
347 MAC16(b2, - W1, col[8*3]); | |
348 MAC16(b3, - W5, col[8*3]); | |
349 | |
2979 | 350 if(col[8*4]){ |
633 | 351 a0 += + W4*col[8*4]; |
352 a1 += - W4*col[8*4]; | |
353 a2 += - W4*col[8*4]; | |
354 a3 += + W4*col[8*4]; | |
2979 | 355 } |
633 | 356 |
2979 | 357 if (col[8*5]) { |
633 | 358 MAC16(b0, + W5, col[8*5]); |
359 MAC16(b1, - W1, col[8*5]); | |
360 MAC16(b2, + W7, col[8*5]); | |
361 MAC16(b3, + W3, col[8*5]); | |
2979 | 362 } |
633 | 363 |
2979 | 364 if(col[8*6]){ |
633 | 365 a0 += + W6*col[8*6]; |
366 a1 += - W2*col[8*6]; | |
367 a2 += + W2*col[8*6]; | |
368 a3 += - W6*col[8*6]; | |
2979 | 369 } |
633 | 370 |
2979 | 371 if (col[8*7]) { |
633 | 372 MAC16(b0, + W7, col[8*7]); |
373 MAC16(b1, - W5, col[8*7]); | |
374 MAC16(b2, + W3, col[8*7]); | |
375 MAC16(b3, - W1, col[8*7]); | |
2979 | 376 } |
633 | 377 |
378 col[0 ] = ((a0 + b0) >> COL_SHIFT); | |
379 col[8 ] = ((a1 + b1) >> COL_SHIFT); | |
380 col[16] = ((a2 + b2) >> COL_SHIFT); | |
381 col[24] = ((a3 + b3) >> COL_SHIFT); | |
382 col[32] = ((a3 - b3) >> COL_SHIFT); | |
383 col[40] = ((a2 - b2) >> COL_SHIFT); | |
384 col[48] = ((a1 - b1) >> COL_SHIFT); | |
385 col[56] = ((a0 - b0) >> COL_SHIFT); | |
386 } | |
387 | |
1064 | 388 void simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block) |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
389 { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
390 int i; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
391 for(i=0; i<8; i++) |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
392 idctRowCondDC(block + i*8); |
2967 | 393 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
394 for(i=0; i<8; i++) |
479 | 395 idctSparseColPut(dest + i, line_size, block + i); |
396 } | |
397 | |
1064 | 398 void simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block) |
479 | 399 { |
400 int i; | |
401 for(i=0; i<8; i++) | |
402 idctRowCondDC(block + i*8); | |
2967 | 403 |
479 | 404 for(i=0; i<8; i++) |
405 idctSparseColAdd(dest + i, line_size, block + i); | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
406 } |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
407 |
1008 | 408 void simple_idct(DCTELEM *block) |
633 | 409 { |
410 int i; | |
411 for(i=0; i<8; i++) | |
412 idctRowCondDC(block + i*8); | |
2967 | 413 |
633 | 414 for(i=0; i<8; i++) |
415 idctSparseCol(block + i); | |
416 } | |
417 | |
719 | 418 /* 2x4x8 idct */ |
419 | |
420 #define CN_SHIFT 12 | |
421 #define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5)) | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
422 #define C1 C_FIX(0.6532814824) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
423 #define C2 C_FIX(0.2705980501) |
719 | 424 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
425 /* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
426 and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
427 #define C_SHIFT (4+1+12) |
719 | 428 |
1064 | 429 static inline void idct4col(uint8_t *dest, int line_size, const DCTELEM *col) |
719 | 430 { |
431 int c0, c1, c2, c3, a0, a1, a2, a3; | |
1064 | 432 const uint8_t *cm = cropTbl + MAX_NEG_CROP; |
719 | 433 |
434 a0 = col[8*0]; | |
435 a1 = col[8*2]; | |
436 a2 = col[8*4]; | |
437 a3 = col[8*6]; | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
438 c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
439 c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
719 | 440 c1 = a1 * C1 + a3 * C2; |
441 c3 = a1 * C2 - a3 * C1; | |
442 dest[0] = cm[(c0 + c1) >> C_SHIFT]; | |
443 dest += line_size; | |
444 dest[0] = cm[(c2 + c3) >> C_SHIFT]; | |
445 dest += line_size; | |
446 dest[0] = cm[(c2 - c3) >> C_SHIFT]; | |
447 dest += line_size; | |
448 dest[0] = cm[(c0 - c1) >> C_SHIFT]; | |
449 } | |
450 | |
451 #define BF(k) \ | |
452 {\ | |
453 int a0, a1;\ | |
454 a0 = ptr[k];\ | |
455 a1 = ptr[8 + k];\ | |
456 ptr[k] = a0 + a1;\ | |
457 ptr[8 + k] = a0 - a1;\ | |
458 } | |
459 | |
460 /* only used by DV codec. The input must be interlaced. 128 is added | |
461 to the pixels before clamping to avoid systematic error | |
462 (1024*sqrt(2)) offset would be needed otherwise. */ | |
463 /* XXX: I think a 1.0/sqrt(2) normalization should be needed to | |
464 compensate the extra butterfly stage - I don't have the full DV | |
465 specification */ | |
1064 | 466 void simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block) |
719 | 467 { |
468 int i; | |
1008 | 469 DCTELEM *ptr; |
2967 | 470 |
719 | 471 /* butterfly */ |
472 ptr = block; | |
473 for(i=0;i<4;i++) { | |
474 BF(0); | |
475 BF(1); | |
476 BF(2); | |
477 BF(3); | |
478 BF(4); | |
479 BF(5); | |
480 BF(6); | |
481 BF(7); | |
482 ptr += 2 * 8; | |
483 } | |
484 | |
485 /* IDCT8 on each line */ | |
486 for(i=0; i<8; i++) { | |
487 idctRowCondDC(block + i*8); | |
488 } | |
489 | |
490 /* IDCT4 and store */ | |
491 for(i=0;i<8;i++) { | |
492 idct4col(dest + i, 2 * line_size, block + i); | |
493 idct4col(dest + line_size + i, 2 * line_size, block + 8 + i); | |
494 } | |
495 } | |
936 | 496 |
497 /* 8x4 & 4x8 WMV2 IDCT */ | |
498 #undef CN_SHIFT | |
499 #undef C_SHIFT | |
500 #undef C_FIX | |
501 #undef C1 | |
502 #undef C2 | |
503 #define CN_SHIFT 12 | |
504 #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5)) | |
505 #define C1 C_FIX(0.6532814824) | |
506 #define C2 C_FIX(0.2705980501) | |
507 #define C3 C_FIX(0.5) | |
508 #define C_SHIFT (4+1+12) | |
1064 | 509 static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col) |
936 | 510 { |
511 int c0, c1, c2, c3, a0, a1, a2, a3; | |
1064 | 512 const uint8_t *cm = cropTbl + MAX_NEG_CROP; |
936 | 513 |
514 a0 = col[8*0]; | |
515 a1 = col[8*1]; | |
516 a2 = col[8*2]; | |
517 a3 = col[8*3]; | |
518 c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1)); | |
519 c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); | |
520 c1 = a1 * C1 + a3 * C2; | |
521 c3 = a1 * C2 - a3 * C1; | |
522 dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)]; | |
523 dest += line_size; | |
524 dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)]; | |
525 dest += line_size; | |
526 dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)]; | |
527 dest += line_size; | |
528 dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)]; | |
529 } | |
530 | |
531 #define RN_SHIFT 15 | |
532 #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5)) | |
533 #define R1 R_FIX(0.6532814824) | |
534 #define R2 R_FIX(0.2705980501) | |
535 #define R3 R_FIX(0.5) | |
536 #define R_SHIFT 11 | |
1008 | 537 static inline void idct4row(DCTELEM *row) |
936 | 538 { |
539 int c0, c1, c2, c3, a0, a1, a2, a3; | |
1064 | 540 //const uint8_t *cm = cropTbl + MAX_NEG_CROP; |
936 | 541 |
542 a0 = row[0]; | |
543 a1 = row[1]; | |
544 a2 = row[2]; | |
545 a3 = row[3]; | |
546 c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1)); | |
547 c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1)); | |
548 c1 = a1 * R1 + a3 * R2; | |
549 c3 = a1 * R2 - a3 * R1; | |
550 row[0]= (c0 + c1) >> R_SHIFT; | |
551 row[1]= (c2 + c3) >> R_SHIFT; | |
552 row[2]= (c2 - c3) >> R_SHIFT; | |
553 row[3]= (c0 - c1) >> R_SHIFT; | |
554 } | |
555 | |
1064 | 556 void simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block) |
936 | 557 { |
558 int i; | |
559 | |
560 /* IDCT8 on each line */ | |
561 for(i=0; i<4; i++) { | |
562 idctRowCondDC(block + i*8); | |
563 } | |
564 | |
565 /* IDCT4 and store */ | |
566 for(i=0;i<8;i++) { | |
567 idct4col_add(dest + i, line_size, block + i); | |
568 } | |
569 } | |
570 | |
1064 | 571 void simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block) |
936 | 572 { |
573 int i; | |
574 | |
575 /* IDCT4 on each line */ | |
576 for(i=0; i<8; i++) { | |
577 idct4row(block + i*8); | |
578 } | |
579 | |
580 /* IDCT8 and store */ | |
581 for(i=0; i<4; i++){ | |
582 idctSparseColAdd(dest + i, line_size, block + i); | |
583 } | |
584 } | |
585 |