Mercurial > libavcodec.hg
annotate simple_idct.c @ 5309:0662a270aab7 libavcodec
AC-3 decoder, soc revision 38, Aug 7 00:03:00 2006 UTC by cloud9
major code cleanup.
correct implementation of imdct.
implemented imdct for block switching also.
when coupling is not in use all the
ac3 streams are decoded correctly.
but when coupling is in use there is a bug.
i am currently finding the root of the bug.
if anybody can help.
author | jbr |
---|---|
date | Sat, 14 Jul 2007 15:56:55 +0000 |
parents | 3d2b2a2a664e |
children | eb2b4b1da4cd |
rev | line source |
---|---|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
1 /* |
429 | 2 * Simple IDCT |
3 * | |
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
6 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
7 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
429 | 9 * modify it under the terms of the GNU Lesser General Public |
10 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
429 | 12 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
429 | 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 * Lesser General Public License for more details. | |
17 * | |
18 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
429 | 21 */ |
2967 | 22 |
1106 | 23 /** |
24 * @file simple_idct.c | |
25 * simpleidct in C. | |
26 */ | |
2967 | 27 |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
28 /* |
429 | 29 based upon some outcommented c code from mpeg2dec (idct_mmx.c |
2967 | 30 written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) |
429 | 31 */ |
396
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
352
diff
changeset
|
32 #include "avcodec.h" |
479 | 33 #include "dsputil.h" |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
34 #include "simple_idct.h" |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
35 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
36 #if 0 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
37 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
38 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
39 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
40 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
41 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
42 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
43 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
44 #define ROW_SHIFT 8 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
45 #define COL_SHIFT 17 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
46 #else |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
47 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
48 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
49 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
352
5a8eb5cf9f92
C4=16383 for the c version too and even for some outcommented code
michaelni
parents:
215
diff
changeset
|
50 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
51 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
52 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
53 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
54 #define ROW_SHIFT 11 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
55 #define COL_SHIFT 20 // 6 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
56 #endif |
205 | 57 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
58 #if defined(ARCH_POWERPC_405) |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
59 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
60 /* signed 16x16 -> 32 multiply add accumulate */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
61 #define MAC16(rt, ra, rb) \ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
62 asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
63 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
64 /* signed 16x16 -> 32 multiply */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
65 #define MUL16(rt, ra, rb) \ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
66 asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
67 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
68 #else |
205 | 69 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
70 /* signed 16x16 -> 32 multiply add accumulate */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
71 #define MAC16(rt, ra, rb) rt += (ra) * (rb) |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
72 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
73 /* signed 16x16 -> 32 multiply */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
74 #define MUL16(rt, ra, rb) rt = (ra) * (rb) |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
75 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
76 #endif |
205 | 77 |
1008 | 78 static inline void idctRowCondDC (DCTELEM * row) |
205 | 79 { |
2979 | 80 int a0, a1, a2, a3, b0, b1, b2, b3; |
4603 | 81 #ifdef HAVE_FAST_64BIT |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
82 uint64_t temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
83 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
84 uint32_t temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
85 #endif |
205 | 86 |
4603 | 87 #ifdef HAVE_FAST_64BIT |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
88 #ifdef WORDS_BIGENDIAN |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
89 #define ROW0_MASK 0xffff000000000000LL |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
90 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
91 #define ROW0_MASK 0xffffLL |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
92 #endif |
1008 | 93 if(sizeof(DCTELEM)==2){ |
2967 | 94 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | |
1008 | 95 ((uint64_t *)row)[1]) == 0) { |
96 temp = (row[0] << 3) & 0xffff; | |
97 temp += temp << 16; | |
98 temp += temp << 32; | |
99 ((uint64_t *)row)[0] = temp; | |
100 ((uint64_t *)row)[1] = temp; | |
101 return; | |
2979 | 102 } |
1008 | 103 }else{ |
104 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { | |
105 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; | |
106 return; | |
107 } | |
108 } | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
109 #else |
1008 | 110 if(sizeof(DCTELEM)==2){ |
111 if (!(((uint32_t*)row)[1] | | |
112 ((uint32_t*)row)[2] | | |
2967 | 113 ((uint32_t*)row)[3] | |
1008 | 114 row[1])) { |
115 temp = (row[0] << 3) & 0xffff; | |
116 temp += temp << 16; | |
117 ((uint32_t*)row)[0]=((uint32_t*)row)[1] = | |
118 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; | |
119 return; | |
120 } | |
121 }else{ | |
122 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { | |
123 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; | |
124 return; | |
125 } | |
126 } | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
127 #endif |
205 | 128 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
129 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
2979 | 130 a1 = a0; |
131 a2 = a0; | |
132 a3 = a0; | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
133 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
134 /* no need to optimize : gcc does it */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
135 a0 += W2 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
136 a1 += W6 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
137 a2 -= W6 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
138 a3 -= W2 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
139 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
140 MUL16(b0, W1, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
141 MAC16(b0, W3, row[3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
142 MUL16(b1, W3, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
143 MAC16(b1, -W7, row[3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
144 MUL16(b2, W5, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
145 MAC16(b2, -W1, row[3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
146 MUL16(b3, W7, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
147 MAC16(b3, -W5, row[3]); |
205 | 148 |
4603 | 149 #ifdef HAVE_FAST_64BIT |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
150 temp = ((uint64_t*)row)[1]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
151 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
152 temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
153 #endif |
2979 | 154 if (temp != 0) { |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
155 a0 += W4*row[4] + W6*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
156 a1 += - W4*row[4] - W2*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
157 a2 += - W4*row[4] + W2*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
158 a3 += W4*row[4] - W6*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
159 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
160 MAC16(b0, W5, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
161 MAC16(b0, W7, row[7]); |
2967 | 162 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
163 MAC16(b1, -W1, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
164 MAC16(b1, -W5, row[7]); |
2967 | 165 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
166 MAC16(b2, W7, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
167 MAC16(b2, W3, row[7]); |
2967 | 168 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
169 MAC16(b3, W3, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
170 MAC16(b3, -W1, row[7]); |
2979 | 171 } |
205 | 172 |
2979 | 173 row[0] = (a0 + b0) >> ROW_SHIFT; |
174 row[7] = (a0 - b0) >> ROW_SHIFT; | |
175 row[1] = (a1 + b1) >> ROW_SHIFT; | |
176 row[6] = (a1 - b1) >> ROW_SHIFT; | |
177 row[2] = (a2 + b2) >> ROW_SHIFT; | |
178 row[5] = (a2 - b2) >> ROW_SHIFT; | |
179 row[3] = (a3 + b3) >> ROW_SHIFT; | |
180 row[4] = (a3 - b3) >> ROW_SHIFT; | |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
181 } |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
182 |
2967 | 183 static inline void idctSparseColPut (uint8_t *dest, int line_size, |
1008 | 184 DCTELEM * col) |
205 | 185 { |
2979 | 186 int a0, a1, a2, a3, b0, b1, b2, b3; |
4176 | 187 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
188 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
189 /* XXX: I did that only to give same values as previous code */ |
2979 | 190 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
191 a1 = a0; | |
192 a2 = a0; | |
193 a3 = a0; | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
194 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
195 a0 += + W2*col[8*2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
196 a1 += + W6*col[8*2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
197 a2 += - W6*col[8*2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
198 a3 += - W2*col[8*2]; |
205 | 199 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
200 MUL16(b0, W1, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
201 MUL16(b1, W3, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
202 MUL16(b2, W5, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
203 MUL16(b3, W7, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
204 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
205 MAC16(b0, + W3, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
206 MAC16(b1, - W7, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
207 MAC16(b2, - W1, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
208 MAC16(b3, - W5, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
209 |
2979 | 210 if(col[8*4]){ |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
211 a0 += + W4*col[8*4]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
212 a1 += - W4*col[8*4]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
213 a2 += - W4*col[8*4]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
214 a3 += + W4*col[8*4]; |
2979 | 215 } |
205 | 216 |
2979 | 217 if (col[8*5]) { |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
218 MAC16(b0, + W5, col[8*5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
219 MAC16(b1, - W1, col[8*5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
220 MAC16(b2, + W7, col[8*5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
221 MAC16(b3, + W3, col[8*5]); |
2979 | 222 } |
205 | 223 |
2979 | 224 if(col[8*6]){ |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
225 a0 += + W6*col[8*6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
226 a1 += - W2*col[8*6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
227 a2 += + W2*col[8*6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
228 a3 += - W6*col[8*6]; |
2979 | 229 } |
205 | 230 |
2979 | 231 if (col[8*7]) { |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
232 MAC16(b0, + W7, col[8*7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
233 MAC16(b1, - W5, col[8*7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
234 MAC16(b2, + W3, col[8*7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
235 MAC16(b3, - W1, col[8*7]); |
2979 | 236 } |
205 | 237 |
479 | 238 dest[0] = cm[(a0 + b0) >> COL_SHIFT]; |
239 dest += line_size; | |
240 dest[0] = cm[(a1 + b1) >> COL_SHIFT]; | |
241 dest += line_size; | |
242 dest[0] = cm[(a2 + b2) >> COL_SHIFT]; | |
243 dest += line_size; | |
244 dest[0] = cm[(a3 + b3) >> COL_SHIFT]; | |
245 dest += line_size; | |
246 dest[0] = cm[(a3 - b3) >> COL_SHIFT]; | |
247 dest += line_size; | |
248 dest[0] = cm[(a2 - b2) >> COL_SHIFT]; | |
249 dest += line_size; | |
250 dest[0] = cm[(a1 - b1) >> COL_SHIFT]; | |
251 dest += line_size; | |
252 dest[0] = cm[(a0 - b0) >> COL_SHIFT]; | |
253 } | |
254 | |
2967 | 255 static inline void idctSparseColAdd (uint8_t *dest, int line_size, |
1008 | 256 DCTELEM * col) |
479 | 257 { |
2979 | 258 int a0, a1, a2, a3, b0, b1, b2, b3; |
4176 | 259 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
479 | 260 |
261 /* XXX: I did that only to give same values as previous code */ | |
2979 | 262 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
263 a1 = a0; | |
264 a2 = a0; | |
265 a3 = a0; | |
479 | 266 |
267 a0 += + W2*col[8*2]; | |
268 a1 += + W6*col[8*2]; | |
269 a2 += - W6*col[8*2]; | |
270 a3 += - W2*col[8*2]; | |
271 | |
272 MUL16(b0, W1, col[8*1]); | |
273 MUL16(b1, W3, col[8*1]); | |
274 MUL16(b2, W5, col[8*1]); | |
275 MUL16(b3, W7, col[8*1]); | |
276 | |
277 MAC16(b0, + W3, col[8*3]); | |
278 MAC16(b1, - W7, col[8*3]); | |
279 MAC16(b2, - W1, col[8*3]); | |
280 MAC16(b3, - W5, col[8*3]); | |
281 | |
2979 | 282 if(col[8*4]){ |
479 | 283 a0 += + W4*col[8*4]; |
284 a1 += - W4*col[8*4]; | |
285 a2 += - W4*col[8*4]; | |
286 a3 += + W4*col[8*4]; | |
2979 | 287 } |
479 | 288 |
2979 | 289 if (col[8*5]) { |
479 | 290 MAC16(b0, + W5, col[8*5]); |
291 MAC16(b1, - W1, col[8*5]); | |
292 MAC16(b2, + W7, col[8*5]); | |
293 MAC16(b3, + W3, col[8*5]); | |
2979 | 294 } |
479 | 295 |
2979 | 296 if(col[8*6]){ |
479 | 297 a0 += + W6*col[8*6]; |
298 a1 += - W2*col[8*6]; | |
299 a2 += + W2*col[8*6]; | |
300 a3 += - W6*col[8*6]; | |
2979 | 301 } |
479 | 302 |
2979 | 303 if (col[8*7]) { |
479 | 304 MAC16(b0, + W7, col[8*7]); |
305 MAC16(b1, - W5, col[8*7]); | |
306 MAC16(b2, + W3, col[8*7]); | |
307 MAC16(b3, - W1, col[8*7]); | |
2979 | 308 } |
479 | 309 |
310 dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; | |
311 dest += line_size; | |
312 dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; | |
313 dest += line_size; | |
314 dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; | |
315 dest += line_size; | |
316 dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; | |
317 dest += line_size; | |
318 dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)]; | |
319 dest += line_size; | |
320 dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)]; | |
321 dest += line_size; | |
322 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; | |
323 dest += line_size; | |
324 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; | |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
325 } |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
326 |
1008 | 327 static inline void idctSparseCol (DCTELEM * col) |
633 | 328 { |
2979 | 329 int a0, a1, a2, a3, b0, b1, b2, b3; |
633 | 330 |
331 /* XXX: I did that only to give same values as previous code */ | |
2979 | 332 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
333 a1 = a0; | |
334 a2 = a0; | |
335 a3 = a0; | |
633 | 336 |
337 a0 += + W2*col[8*2]; | |
338 a1 += + W6*col[8*2]; | |
339 a2 += - W6*col[8*2]; | |
340 a3 += - W2*col[8*2]; | |
341 | |
342 MUL16(b0, W1, col[8*1]); | |
343 MUL16(b1, W3, col[8*1]); | |
344 MUL16(b2, W5, col[8*1]); | |
345 MUL16(b3, W7, col[8*1]); | |
346 | |
347 MAC16(b0, + W3, col[8*3]); | |
348 MAC16(b1, - W7, col[8*3]); | |
349 MAC16(b2, - W1, col[8*3]); | |
350 MAC16(b3, - W5, col[8*3]); | |
351 | |
2979 | 352 if(col[8*4]){ |
633 | 353 a0 += + W4*col[8*4]; |
354 a1 += - W4*col[8*4]; | |
355 a2 += - W4*col[8*4]; | |
356 a3 += + W4*col[8*4]; | |
2979 | 357 } |
633 | 358 |
2979 | 359 if (col[8*5]) { |
633 | 360 MAC16(b0, + W5, col[8*5]); |
361 MAC16(b1, - W1, col[8*5]); | |
362 MAC16(b2, + W7, col[8*5]); | |
363 MAC16(b3, + W3, col[8*5]); | |
2979 | 364 } |
633 | 365 |
2979 | 366 if(col[8*6]){ |
633 | 367 a0 += + W6*col[8*6]; |
368 a1 += - W2*col[8*6]; | |
369 a2 += + W2*col[8*6]; | |
370 a3 += - W6*col[8*6]; | |
2979 | 371 } |
633 | 372 |
2979 | 373 if (col[8*7]) { |
633 | 374 MAC16(b0, + W7, col[8*7]); |
375 MAC16(b1, - W5, col[8*7]); | |
376 MAC16(b2, + W3, col[8*7]); | |
377 MAC16(b3, - W1, col[8*7]); | |
2979 | 378 } |
633 | 379 |
380 col[0 ] = ((a0 + b0) >> COL_SHIFT); | |
381 col[8 ] = ((a1 + b1) >> COL_SHIFT); | |
382 col[16] = ((a2 + b2) >> COL_SHIFT); | |
383 col[24] = ((a3 + b3) >> COL_SHIFT); | |
384 col[32] = ((a3 - b3) >> COL_SHIFT); | |
385 col[40] = ((a2 - b2) >> COL_SHIFT); | |
386 col[48] = ((a1 - b1) >> COL_SHIFT); | |
387 col[56] = ((a0 - b0) >> COL_SHIFT); | |
388 } | |
389 | |
1064 | 390 void simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block) |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
391 { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
392 int i; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
393 for(i=0; i<8; i++) |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
394 idctRowCondDC(block + i*8); |
2967 | 395 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
396 for(i=0; i<8; i++) |
479 | 397 idctSparseColPut(dest + i, line_size, block + i); |
398 } | |
399 | |
1064 | 400 void simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block) |
479 | 401 { |
402 int i; | |
403 for(i=0; i<8; i++) | |
404 idctRowCondDC(block + i*8); | |
2967 | 405 |
479 | 406 for(i=0; i<8; i++) |
407 idctSparseColAdd(dest + i, line_size, block + i); | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
408 } |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
409 |
1008 | 410 void simple_idct(DCTELEM *block) |
633 | 411 { |
412 int i; | |
413 for(i=0; i<8; i++) | |
414 idctRowCondDC(block + i*8); | |
2967 | 415 |
633 | 416 for(i=0; i<8; i++) |
417 idctSparseCol(block + i); | |
418 } | |
419 | |
719 | 420 /* 2x4x8 idct */ |
421 | |
422 #define CN_SHIFT 12 | |
423 #define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5)) | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
424 #define C1 C_FIX(0.6532814824) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
425 #define C2 C_FIX(0.2705980501) |
719 | 426 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
427 /* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
428 and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
429 #define C_SHIFT (4+1+12) |
719 | 430 |
1064 | 431 static inline void idct4col(uint8_t *dest, int line_size, const DCTELEM *col) |
719 | 432 { |
433 int c0, c1, c2, c3, a0, a1, a2, a3; | |
4176 | 434 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
719 | 435 |
436 a0 = col[8*0]; | |
437 a1 = col[8*2]; | |
438 a2 = col[8*4]; | |
439 a3 = col[8*6]; | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
440 c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
441 c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
719 | 442 c1 = a1 * C1 + a3 * C2; |
443 c3 = a1 * C2 - a3 * C1; | |
444 dest[0] = cm[(c0 + c1) >> C_SHIFT]; | |
445 dest += line_size; | |
446 dest[0] = cm[(c2 + c3) >> C_SHIFT]; | |
447 dest += line_size; | |
448 dest[0] = cm[(c2 - c3) >> C_SHIFT]; | |
449 dest += line_size; | |
450 dest[0] = cm[(c0 - c1) >> C_SHIFT]; | |
451 } | |
452 | |
453 #define BF(k) \ | |
454 {\ | |
455 int a0, a1;\ | |
456 a0 = ptr[k];\ | |
457 a1 = ptr[8 + k];\ | |
458 ptr[k] = a0 + a1;\ | |
459 ptr[8 + k] = a0 - a1;\ | |
460 } | |
461 | |
462 /* only used by DV codec. The input must be interlaced. 128 is added | |
463 to the pixels before clamping to avoid systematic error | |
464 (1024*sqrt(2)) offset would be needed otherwise. */ | |
465 /* XXX: I think a 1.0/sqrt(2) normalization should be needed to | |
466 compensate the extra butterfly stage - I don't have the full DV | |
467 specification */ | |
1064 | 468 void simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block) |
719 | 469 { |
470 int i; | |
1008 | 471 DCTELEM *ptr; |
2967 | 472 |
719 | 473 /* butterfly */ |
474 ptr = block; | |
475 for(i=0;i<4;i++) { | |
476 BF(0); | |
477 BF(1); | |
478 BF(2); | |
479 BF(3); | |
480 BF(4); | |
481 BF(5); | |
482 BF(6); | |
483 BF(7); | |
484 ptr += 2 * 8; | |
485 } | |
486 | |
487 /* IDCT8 on each line */ | |
488 for(i=0; i<8; i++) { | |
489 idctRowCondDC(block + i*8); | |
490 } | |
491 | |
492 /* IDCT4 and store */ | |
493 for(i=0;i<8;i++) { | |
494 idct4col(dest + i, 2 * line_size, block + i); | |
495 idct4col(dest + line_size + i, 2 * line_size, block + 8 + i); | |
496 } | |
497 } | |
936 | 498 |
499 /* 8x4 & 4x8 WMV2 IDCT */ | |
500 #undef CN_SHIFT | |
501 #undef C_SHIFT | |
502 #undef C_FIX | |
503 #undef C1 | |
504 #undef C2 | |
505 #define CN_SHIFT 12 | |
506 #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5)) | |
507 #define C1 C_FIX(0.6532814824) | |
508 #define C2 C_FIX(0.2705980501) | |
509 #define C3 C_FIX(0.5) | |
510 #define C_SHIFT (4+1+12) | |
1064 | 511 static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col) |
936 | 512 { |
513 int c0, c1, c2, c3, a0, a1, a2, a3; | |
4176 | 514 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
936 | 515 |
516 a0 = col[8*0]; | |
517 a1 = col[8*1]; | |
518 a2 = col[8*2]; | |
519 a3 = col[8*3]; | |
520 c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1)); | |
521 c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); | |
522 c1 = a1 * C1 + a3 * C2; | |
523 c3 = a1 * C2 - a3 * C1; | |
524 dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)]; | |
525 dest += line_size; | |
526 dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)]; | |
527 dest += line_size; | |
528 dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)]; | |
529 dest += line_size; | |
530 dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)]; | |
531 } | |
532 | |
533 #define RN_SHIFT 15 | |
534 #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5)) | |
535 #define R1 R_FIX(0.6532814824) | |
536 #define R2 R_FIX(0.2705980501) | |
537 #define R3 R_FIX(0.5) | |
538 #define R_SHIFT 11 | |
1008 | 539 static inline void idct4row(DCTELEM *row) |
936 | 540 { |
541 int c0, c1, c2, c3, a0, a1, a2, a3; | |
4176 | 542 //const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
936 | 543 |
544 a0 = row[0]; | |
545 a1 = row[1]; | |
546 a2 = row[2]; | |
547 a3 = row[3]; | |
548 c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1)); | |
549 c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1)); | |
550 c1 = a1 * R1 + a3 * R2; | |
551 c3 = a1 * R2 - a3 * R1; | |
552 row[0]= (c0 + c1) >> R_SHIFT; | |
553 row[1]= (c2 + c3) >> R_SHIFT; | |
554 row[2]= (c2 - c3) >> R_SHIFT; | |
555 row[3]= (c0 - c1) >> R_SHIFT; | |
556 } | |
557 | |
1064 | 558 void simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block) |
936 | 559 { |
560 int i; | |
561 | |
562 /* IDCT8 on each line */ | |
563 for(i=0; i<4; i++) { | |
564 idctRowCondDC(block + i*8); | |
565 } | |
566 | |
567 /* IDCT4 and store */ | |
568 for(i=0;i<8;i++) { | |
569 idct4col_add(dest + i, line_size, block + i); | |
570 } | |
571 } | |
572 | |
1064 | 573 void simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block) |
936 | 574 { |
575 int i; | |
576 | |
577 /* IDCT4 on each line */ | |
578 for(i=0; i<8; i++) { | |
579 idct4row(block + i*8); | |
580 } | |
581 | |
582 /* IDCT8 and store */ | |
583 for(i=0; i<4; i++){ | |
584 idctSparseColAdd(dest + i, line_size, block + i); | |
585 } | |
586 } | |
587 |