Mercurial > libavcodec.hg
annotate simple_idct.c @ 742:7f77968553de libavcodec
corrected pixel format display
author | bellard |
---|---|
date | Fri, 11 Oct 2002 09:16:35 +0000 |
parents | ff90043f4a2d |
children | 2f7da29ede37 |
rev | line source |
---|---|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
1 /* |
429 | 2 * Simple IDCT |
3 * | |
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5 * | |
6 * This library is free software; you can redistribute it and/or | |
7 * modify it under the terms of the GNU Lesser General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2 of the License, or (at your option) any later version. | |
10 * | |
11 * This library is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
17 * License along with this library; if not, write to the Free Software | |
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
19 */ | |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
20 /* |
429 | 21 based upon some outcommented c code from mpeg2dec (idct_mmx.c |
22 written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) | |
23 */ | |
396
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
352
diff
changeset
|
24 #include "avcodec.h" |
479 | 25 #include "dsputil.h" |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
26 #include "simple_idct.h" |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
27 |
633 | 28 //#define ARCH_ALPHA |
29 | |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
30 #if 0 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
31 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
32 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
33 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
34 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
35 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
36 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
37 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
38 #define ROW_SHIFT 8 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
39 #define COL_SHIFT 17 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
40 #else |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
41 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
42 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
43 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
352
5a8eb5cf9f92
C4=16383 for the c version too and even for some outcommented code
michaelni
parents:
215
diff
changeset
|
44 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
45 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
46 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
47 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
48 #define ROW_SHIFT 11 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
49 #define COL_SHIFT 20 // 6 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
50 #endif |
205 | 51 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
52 #ifdef ARCH_ALPHA |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
53 #define FAST_64BIT |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
54 #endif |
205 | 55 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
56 #if defined(ARCH_POWERPC_405) |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
57 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
58 /* signed 16x16 -> 32 multiply add accumulate */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
59 #define MAC16(rt, ra, rb) \ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
60 asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
61 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
62 /* signed 16x16 -> 32 multiply */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
63 #define MUL16(rt, ra, rb) \ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
64 asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
65 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
66 #else |
205 | 67 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
68 /* signed 16x16 -> 32 multiply add accumulate */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
69 #define MAC16(rt, ra, rb) rt += (ra) * (rb) |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
70 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
71 /* signed 16x16 -> 32 multiply */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
72 #define MUL16(rt, ra, rb) rt = (ra) * (rb) |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
73 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
74 #endif |
205 | 75 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
76 #ifdef ARCH_ALPHA |
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
77 /* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */ |
464
9b73bce5071a
gcc 3.1 warning fix (patch by Felix Buenemann <atmosfear at users.sourceforge.net>)
michaelni
parents:
440
diff
changeset
|
78 static inline int idctRowCondDC(int16_t *row) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
79 { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
80 int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
81 uint64_t *lrow = (uint64_t *) row; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
82 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
83 if (lrow[1] == 0) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
84 if (lrow[0] == 0) |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
85 return 0; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
86 if ((lrow[0] & ~0xffffULL) == 0) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
87 uint64_t v; |
633 | 88 #if 1 //is ok if |a0| < 1024 than theres an +-1 error (for the *W4 case for W4=16383 !!!) |
89 a0 = row[0]<<3; | |
90 #else | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
91 a0 = W4 * row[0]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
92 a0 += 1 << (ROW_SHIFT - 1); |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
93 a0 >>= ROW_SHIFT; |
633 | 94 #endif |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
95 v = (uint16_t) a0; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
96 v += v << 16; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
97 v += v << 32; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
98 lrow[0] = v; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
99 lrow[1] = v; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
100 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
101 return 1; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
102 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
103 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
104 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
105 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
106 a1 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
107 a2 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
108 a3 = a0; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
109 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
110 if (row[2]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
111 a0 += W2 * row[2]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
112 a1 += W6 * row[2]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
113 a2 -= W6 * row[2]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
114 a3 -= W2 * row[2]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
115 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
116 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
117 if (row[4]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
118 a0 += W4 * row[4]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
119 a1 -= W4 * row[4]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
120 a2 -= W4 * row[4]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
121 a3 += W4 * row[4]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
122 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
123 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
124 if (row[6]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
125 a0 += W6 * row[6]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
126 a1 -= W2 * row[6]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
127 a2 += W2 * row[6]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
128 a3 -= W6 * row[6]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
129 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
130 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
131 if (row[1]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
132 b0 = W1 * row[1]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
133 b1 = W3 * row[1]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
134 b2 = W5 * row[1]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
135 b3 = W7 * row[1]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
136 } else { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
137 b0 = 0; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
138 b1 = 0; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
139 b2 = 0; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
140 b3 = 0; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
141 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
142 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
143 if (row[3]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
144 b0 += W3 * row[3]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
145 b1 -= W7 * row[3]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
146 b2 -= W1 * row[3]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
147 b3 -= W5 * row[3]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
148 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
149 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
150 if (row[5]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
151 b0 += W5 * row[5]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
152 b1 -= W1 * row[5]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
153 b2 += W7 * row[5]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
154 b3 += W3 * row[5]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
155 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
156 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
157 if (row[7]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
158 b0 += W7 * row[7]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
159 b1 -= W5 * row[7]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
160 b2 += W3 * row[7]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
161 b3 -= W1 * row[7]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
162 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
163 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
164 row[0] = (a0 + b0) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
165 row[1] = (a1 + b1) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
166 row[2] = (a2 + b2) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
167 row[3] = (a3 + b3) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
168 row[4] = (a3 - b3) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
169 row[5] = (a2 - b2) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
170 row[6] = (a1 - b1) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
171 row[7] = (a0 - b0) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
172 |
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
173 return 2; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
174 } |
503
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
175 |
642 | 176 inline static void idctSparseCol2(int16_t *col) |
503
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
177 { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
178 int a0, a1, a2, a3, b0, b1, b2, b3; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
179 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
180 col[0] += (1 << (COL_SHIFT - 1)) / W4; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
181 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
182 a0 = W4 * col[8 * 0]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
183 a1 = W4 * col[8 * 0]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
184 a2 = W4 * col[8 * 0]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
185 a3 = W4 * col[8 * 0]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
186 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
187 if (col[8 * 2]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
188 a0 += W2 * col[8 * 2]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
189 a1 += W6 * col[8 * 2]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
190 a2 -= W6 * col[8 * 2]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
191 a3 -= W2 * col[8 * 2]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
192 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
193 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
194 if (col[8 * 4]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
195 a0 += W4 * col[8 * 4]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
196 a1 -= W4 * col[8 * 4]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
197 a2 -= W4 * col[8 * 4]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
198 a3 += W4 * col[8 * 4]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
199 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
200 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
201 if (col[8 * 6]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
202 a0 += W6 * col[8 * 6]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
203 a1 -= W2 * col[8 * 6]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
204 a2 += W2 * col[8 * 6]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
205 a3 -= W6 * col[8 * 6]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
206 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
207 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
208 if (col[8 * 1]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
209 b0 = W1 * col[8 * 1]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
210 b1 = W3 * col[8 * 1]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
211 b2 = W5 * col[8 * 1]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
212 b3 = W7 * col[8 * 1]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
213 } else { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
214 b0 = b1 = b2 = b3 = 0; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
215 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
216 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
217 if (col[8 * 3]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
218 b0 += W3 * col[8 * 3]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
219 b1 -= W7 * col[8 * 3]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
220 b2 -= W1 * col[8 * 3]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
221 b3 -= W5 * col[8 * 3]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
222 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
223 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
224 if (col[8 * 5]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
225 b0 += W5 * col[8 * 5]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
226 b1 -= W1 * col[8 * 5]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
227 b2 += W7 * col[8 * 5]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
228 b3 += W3 * col[8 * 5]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
229 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
230 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
231 if (col[8 * 7]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
232 b0 += W7 * col[8 * 7]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
233 b1 -= W5 * col[8 * 7]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
234 b2 += W3 * col[8 * 7]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
235 b3 -= W1 * col[8 * 7]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
236 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
237 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
238 col[8 * 0] = (a0 + b0) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
239 col[8 * 7] = (a0 - b0) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
240 col[8 * 1] = (a1 + b1) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
241 col[8 * 6] = (a1 - b1) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
242 col[8 * 2] = (a2 + b2) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
243 col[8 * 5] = (a2 - b2) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
244 col[8 * 3] = (a3 + b3) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
245 col[8 * 4] = (a3 - b3) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
246 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
247 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
248 #else /* not ARCH_ALPHA */ |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
249 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
250 static inline void idctRowCondDC (int16_t * row) |
205 | 251 { |
252 int a0, a1, a2, a3, b0, b1, b2, b3; | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
253 #ifdef FAST_64BIT |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
254 uint64_t temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
255 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
256 uint32_t temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
257 #endif |
205 | 258 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
259 #ifdef FAST_64BIT |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
260 #ifdef WORDS_BIGENDIAN |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
261 #define ROW0_MASK 0xffff000000000000LL |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
262 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
263 #define ROW0_MASK 0xffffLL |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
264 #endif |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
265 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
266 ((uint64_t *)row)[1]) == 0) { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
267 temp = (row[0] << 3) & 0xffff; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
268 temp += temp << 16; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
269 temp += temp << 32; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
270 ((uint64_t *)row)[0] = temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
271 ((uint64_t *)row)[1] = temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
272 return; |
205 | 273 } |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
274 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
275 if (!(((uint32_t*)row)[1] | |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
276 ((uint32_t*)row)[2] | |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
277 ((uint32_t*)row)[3] | |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
278 row[1])) { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
279 temp = (row[0] << 3) & 0xffff; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
280 temp += temp << 16; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
281 ((uint32_t*)row)[0]=((uint32_t*)row)[1] = |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
282 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
283 return; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
284 } |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
285 #endif |
205 | 286 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
287 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
288 a1 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
289 a2 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
290 a3 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
291 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
292 /* no need to optimize : gcc does it */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
293 a0 += W2 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
294 a1 += W6 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
295 a2 -= W6 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
296 a3 -= W2 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
297 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
298 MUL16(b0, W1, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
299 MAC16(b0, W3, row[3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
300 MUL16(b1, W3, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
301 MAC16(b1, -W7, row[3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
302 MUL16(b2, W5, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
303 MAC16(b2, -W1, row[3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
304 MUL16(b3, W7, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
305 MAC16(b3, -W5, row[3]); |
205 | 306 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
307 #ifdef FAST_64BIT |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
308 temp = ((uint64_t*)row)[1]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
309 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
310 temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
311 #endif |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
312 if (temp != 0) { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
313 a0 += W4*row[4] + W6*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
314 a1 += - W4*row[4] - W2*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
315 a2 += - W4*row[4] + W2*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
316 a3 += W4*row[4] - W6*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
317 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
318 MAC16(b0, W5, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
319 MAC16(b0, W7, row[7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
320 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
321 MAC16(b1, -W1, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
322 MAC16(b1, -W5, row[7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
323 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
324 MAC16(b2, W7, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
325 MAC16(b2, W3, row[7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
326 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
327 MAC16(b3, W3, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
328 MAC16(b3, -W1, row[7]); |
205 | 329 } |
330 | |
331 row[0] = (a0 + b0) >> ROW_SHIFT; | |
332 row[7] = (a0 - b0) >> ROW_SHIFT; | |
333 row[1] = (a1 + b1) >> ROW_SHIFT; | |
334 row[6] = (a1 - b1) >> ROW_SHIFT; | |
335 row[2] = (a2 + b2) >> ROW_SHIFT; | |
336 row[5] = (a2 - b2) >> ROW_SHIFT; | |
337 row[3] = (a3 + b3) >> ROW_SHIFT; | |
338 row[4] = (a3 - b3) >> ROW_SHIFT; | |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
339 } |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
340 #endif /* not ARCH_ALPHA */ |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
341 |
479 | 342 static inline void idctSparseColPut (UINT8 *dest, int line_size, |
343 int16_t * col) | |
205 | 344 { |
345 int a0, a1, a2, a3, b0, b1, b2, b3; | |
479 | 346 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
347 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
348 /* XXX: I did that only to give same values as previous code */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
349 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
350 a1 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
351 a2 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
352 a3 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
353 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
354 a0 += + W2*col[8*2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
355 a1 += + W6*col[8*2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
356 a2 += - W6*col[8*2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
357 a3 += - W2*col[8*2]; |
205 | 358 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
359 MUL16(b0, W1, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
360 MUL16(b1, W3, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
361 MUL16(b2, W5, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
362 MUL16(b3, W7, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
363 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
364 MAC16(b0, + W3, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
365 MAC16(b1, - W7, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
366 MAC16(b2, - W1, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
367 MAC16(b3, - W5, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
368 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
369 if(col[8*4]){ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
370 a0 += + W4*col[8*4]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
371 a1 += - W4*col[8*4]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
372 a2 += - W4*col[8*4]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
373 a3 += + W4*col[8*4]; |
205 | 374 } |
375 | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
376 if (col[8*5]) { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
377 MAC16(b0, + W5, col[8*5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
378 MAC16(b1, - W1, col[8*5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
379 MAC16(b2, + W7, col[8*5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
380 MAC16(b3, + W3, col[8*5]); |
205 | 381 } |
382 | |
383 if(col[8*6]){ | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
384 a0 += + W6*col[8*6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
385 a1 += - W2*col[8*6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
386 a2 += + W2*col[8*6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
387 a3 += - W6*col[8*6]; |
205 | 388 } |
389 | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
390 if (col[8*7]) { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
391 MAC16(b0, + W7, col[8*7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
392 MAC16(b1, - W5, col[8*7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
393 MAC16(b2, + W3, col[8*7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
394 MAC16(b3, - W1, col[8*7]); |
205 | 395 } |
396 | |
479 | 397 dest[0] = cm[(a0 + b0) >> COL_SHIFT]; |
398 dest += line_size; | |
399 dest[0] = cm[(a1 + b1) >> COL_SHIFT]; | |
400 dest += line_size; | |
401 dest[0] = cm[(a2 + b2) >> COL_SHIFT]; | |
402 dest += line_size; | |
403 dest[0] = cm[(a3 + b3) >> COL_SHIFT]; | |
404 dest += line_size; | |
405 dest[0] = cm[(a3 - b3) >> COL_SHIFT]; | |
406 dest += line_size; | |
407 dest[0] = cm[(a2 - b2) >> COL_SHIFT]; | |
408 dest += line_size; | |
409 dest[0] = cm[(a1 - b1) >> COL_SHIFT]; | |
410 dest += line_size; | |
411 dest[0] = cm[(a0 - b0) >> COL_SHIFT]; | |
412 } | |
413 | |
414 static inline void idctSparseColAdd (UINT8 *dest, int line_size, | |
415 int16_t * col) | |
416 { | |
417 int a0, a1, a2, a3, b0, b1, b2, b3; | |
418 UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
419 | |
420 /* XXX: I did that only to give same values as previous code */ | |
421 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); | |
422 a1 = a0; | |
423 a2 = a0; | |
424 a3 = a0; | |
425 | |
426 a0 += + W2*col[8*2]; | |
427 a1 += + W6*col[8*2]; | |
428 a2 += - W6*col[8*2]; | |
429 a3 += - W2*col[8*2]; | |
430 | |
431 MUL16(b0, W1, col[8*1]); | |
432 MUL16(b1, W3, col[8*1]); | |
433 MUL16(b2, W5, col[8*1]); | |
434 MUL16(b3, W7, col[8*1]); | |
435 | |
436 MAC16(b0, + W3, col[8*3]); | |
437 MAC16(b1, - W7, col[8*3]); | |
438 MAC16(b2, - W1, col[8*3]); | |
439 MAC16(b3, - W5, col[8*3]); | |
440 | |
441 if(col[8*4]){ | |
442 a0 += + W4*col[8*4]; | |
443 a1 += - W4*col[8*4]; | |
444 a2 += - W4*col[8*4]; | |
445 a3 += + W4*col[8*4]; | |
446 } | |
447 | |
448 if (col[8*5]) { | |
449 MAC16(b0, + W5, col[8*5]); | |
450 MAC16(b1, - W1, col[8*5]); | |
451 MAC16(b2, + W7, col[8*5]); | |
452 MAC16(b3, + W3, col[8*5]); | |
453 } | |
454 | |
455 if(col[8*6]){ | |
456 a0 += + W6*col[8*6]; | |
457 a1 += - W2*col[8*6]; | |
458 a2 += + W2*col[8*6]; | |
459 a3 += - W6*col[8*6]; | |
460 } | |
461 | |
462 if (col[8*7]) { | |
463 MAC16(b0, + W7, col[8*7]); | |
464 MAC16(b1, - W5, col[8*7]); | |
465 MAC16(b2, + W3, col[8*7]); | |
466 MAC16(b3, - W1, col[8*7]); | |
467 } | |
468 | |
469 dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; | |
470 dest += line_size; | |
471 dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; | |
472 dest += line_size; | |
473 dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; | |
474 dest += line_size; | |
475 dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; | |
476 dest += line_size; | |
477 dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)]; | |
478 dest += line_size; | |
479 dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)]; | |
480 dest += line_size; | |
481 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; | |
482 dest += line_size; | |
483 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; | |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
484 } |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
485 |
633 | 486 static inline void idctSparseCol (int16_t * col) |
487 { | |
488 int a0, a1, a2, a3, b0, b1, b2, b3; | |
489 | |
490 /* XXX: I did that only to give same values as previous code */ | |
491 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); | |
492 a1 = a0; | |
493 a2 = a0; | |
494 a3 = a0; | |
495 | |
496 a0 += + W2*col[8*2]; | |
497 a1 += + W6*col[8*2]; | |
498 a2 += - W6*col[8*2]; | |
499 a3 += - W2*col[8*2]; | |
500 | |
501 MUL16(b0, W1, col[8*1]); | |
502 MUL16(b1, W3, col[8*1]); | |
503 MUL16(b2, W5, col[8*1]); | |
504 MUL16(b3, W7, col[8*1]); | |
505 | |
506 MAC16(b0, + W3, col[8*3]); | |
507 MAC16(b1, - W7, col[8*3]); | |
508 MAC16(b2, - W1, col[8*3]); | |
509 MAC16(b3, - W5, col[8*3]); | |
510 | |
511 if(col[8*4]){ | |
512 a0 += + W4*col[8*4]; | |
513 a1 += - W4*col[8*4]; | |
514 a2 += - W4*col[8*4]; | |
515 a3 += + W4*col[8*4]; | |
516 } | |
517 | |
518 if (col[8*5]) { | |
519 MAC16(b0, + W5, col[8*5]); | |
520 MAC16(b1, - W1, col[8*5]); | |
521 MAC16(b2, + W7, col[8*5]); | |
522 MAC16(b3, + W3, col[8*5]); | |
523 } | |
524 | |
525 if(col[8*6]){ | |
526 a0 += + W6*col[8*6]; | |
527 a1 += - W2*col[8*6]; | |
528 a2 += + W2*col[8*6]; | |
529 a3 += - W6*col[8*6]; | |
530 } | |
531 | |
532 if (col[8*7]) { | |
533 MAC16(b0, + W7, col[8*7]); | |
534 MAC16(b1, - W5, col[8*7]); | |
535 MAC16(b2, + W3, col[8*7]); | |
536 MAC16(b3, - W1, col[8*7]); | |
537 } | |
538 | |
539 col[0 ] = ((a0 + b0) >> COL_SHIFT); | |
540 col[8 ] = ((a1 + b1) >> COL_SHIFT); | |
541 col[16] = ((a2 + b2) >> COL_SHIFT); | |
542 col[24] = ((a3 + b3) >> COL_SHIFT); | |
543 col[32] = ((a3 - b3) >> COL_SHIFT); | |
544 col[40] = ((a2 - b2) >> COL_SHIFT); | |
545 col[48] = ((a1 - b1) >> COL_SHIFT); | |
546 col[56] = ((a0 - b0) >> COL_SHIFT); | |
547 } | |
548 | |
549 | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
550 #ifdef ARCH_ALPHA |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
551 /* If all rows but the first one are zero after row transformation, |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
552 all rows will be identical after column transformation. */ |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
553 static inline void idctCol2(int16_t *col) |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
554 { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
555 int i; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
556 uint64_t l, r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
557 uint64_t *lcol = (uint64_t *) col; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
558 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
559 for (i = 0; i < 8; ++i) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
560 int a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
561 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
562 a0 *= W4; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
563 col[0] = a0 >> COL_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
564 ++col; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
565 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
566 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
567 l = lcol[0]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
568 r = lcol[1]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
569 lcol[ 2] = l; lcol[ 3] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
570 lcol[ 4] = l; lcol[ 5] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
571 lcol[ 6] = l; lcol[ 7] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
572 lcol[ 8] = l; lcol[ 9] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
573 lcol[10] = l; lcol[11] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
574 lcol[12] = l; lcol[13] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
575 lcol[14] = l; lcol[15] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
576 } |
205 | 577 |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
578 void simple_idct (short *block) |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
579 { |
205 | 580 |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
581 int i; |
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
582 int rowsZero = 1; /* all rows except row 0 zero */ |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
583 int rowsConstant = 1; /* all rows consist of a constant value */ |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
584 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
585 for (i = 0; i < 8; i++) { |
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
586 int sparseness = idctRowCondDC(block + 8 * i); |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
587 |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
588 if (i > 0 && sparseness > 0) |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
589 rowsZero = 0; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
590 if (sparseness == 2) |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
591 rowsConstant = 0; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
592 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
593 |
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
594 if (rowsZero) { |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
595 idctCol2(block); |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
596 } else if (rowsConstant) { |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
597 uint64_t *lblock = (uint64_t *) block; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
598 |
642 | 599 idctSparseCol2(block); |
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
600 for (i = 0; i < 8; i++) { |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
601 uint64_t v = (uint16_t) block[i * 8]; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
602 |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
603 v += v << 16; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
604 v += v << 32; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
605 lblock[0] = v; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
606 lblock[1] = v; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
607 lblock += 2; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
608 } |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
609 } else { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
610 for (i = 0; i < 8; i++) |
642 | 611 idctSparseCol2(block + i); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
612 } |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
613 } |
440
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
429
diff
changeset
|
614 |
479 | 615 /* XXX: suppress this mess */ |
616 void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block) | |
617 { | |
618 simple_idct(block); | |
619 put_pixels_clamped(block, dest, line_size); | |
620 } | |
621 | |
622 void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block) | |
623 { | |
624 simple_idct(block); | |
625 add_pixels_clamped(block, dest, line_size); | |
626 } | |
627 | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
628 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
629 |
479 | 630 void simple_idct_put(UINT8 *dest, int line_size, INT16 *block) |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
631 { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
632 int i; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
633 for(i=0; i<8; i++) |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
634 idctRowCondDC(block + i*8); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
635 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
636 for(i=0; i<8; i++) |
479 | 637 idctSparseColPut(dest + i, line_size, block + i); |
638 } | |
639 | |
640 void simple_idct_add(UINT8 *dest, int line_size, INT16 *block) | |
641 { | |
642 int i; | |
643 for(i=0; i<8; i++) | |
644 idctRowCondDC(block + i*8); | |
645 | |
646 for(i=0; i<8; i++) | |
647 idctSparseColAdd(dest + i, line_size, block + i); | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
648 } |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
649 |
633 | 650 void simple_idct(INT16 *block) |
651 { | |
652 int i; | |
653 for(i=0; i<8; i++) | |
654 idctRowCondDC(block + i*8); | |
655 | |
656 for(i=0; i<8; i++) | |
657 idctSparseCol(block + i); | |
658 } | |
659 | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
660 #endif |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
661 |
719 | 662 /* 2x4x8 idct */ |
663 | |
664 #define CN_SHIFT 12 | |
665 #define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5)) | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
666 #define C1 C_FIX(0.6532814824) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
667 #define C2 C_FIX(0.2705980501) |
719 | 668 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
669 /* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
670 and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
671 #define C_SHIFT (4+1+12) |
719 | 672 |
673 static inline void idct4col(UINT8 *dest, int line_size, const INT16 *col) | |
674 { | |
675 int c0, c1, c2, c3, a0, a1, a2, a3; | |
676 const UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
677 | |
678 a0 = col[8*0]; | |
679 a1 = col[8*2]; | |
680 a2 = col[8*4]; | |
681 a3 = col[8*6]; | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
682 c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
719
diff
changeset
|
683 c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
719 | 684 c1 = a1 * C1 + a3 * C2; |
685 c3 = a1 * C2 - a3 * C1; | |
686 dest[0] = cm[(c0 + c1) >> C_SHIFT]; | |
687 dest += line_size; | |
688 dest[0] = cm[(c2 + c3) >> C_SHIFT]; | |
689 dest += line_size; | |
690 dest[0] = cm[(c2 - c3) >> C_SHIFT]; | |
691 dest += line_size; | |
692 dest[0] = cm[(c0 - c1) >> C_SHIFT]; | |
693 } | |
694 | |
695 #define BF(k) \ | |
696 {\ | |
697 int a0, a1;\ | |
698 a0 = ptr[k];\ | |
699 a1 = ptr[8 + k];\ | |
700 ptr[k] = a0 + a1;\ | |
701 ptr[8 + k] = a0 - a1;\ | |
702 } | |
703 | |
704 /* only used by DV codec. The input must be interlaced. 128 is added | |
705 to the pixels before clamping to avoid systematic error | |
706 (1024*sqrt(2)) offset would be needed otherwise. */ | |
707 /* XXX: I think a 1.0/sqrt(2) normalization should be needed to | |
708 compensate the extra butterfly stage - I don't have the full DV | |
709 specification */ | |
710 void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block) | |
711 { | |
712 int i; | |
713 INT16 *ptr; | |
714 | |
715 /* butterfly */ | |
716 ptr = block; | |
717 for(i=0;i<4;i++) { | |
718 BF(0); | |
719 BF(1); | |
720 BF(2); | |
721 BF(3); | |
722 BF(4); | |
723 BF(5); | |
724 BF(6); | |
725 BF(7); | |
726 ptr += 2 * 8; | |
727 } | |
728 | |
729 /* IDCT8 on each line */ | |
730 for(i=0; i<8; i++) { | |
731 idctRowCondDC(block + i*8); | |
732 } | |
733 | |
734 /* IDCT4 and store */ | |
735 for(i=0;i<8;i++) { | |
736 idct4col(dest + i, 2 * line_size, block + i); | |
737 idct4col(dest + line_size + i, 2 * line_size, block + 8 + i); | |
738 } | |
739 } |