Mercurial > libavcodec.hg
annotate simple_idct.c @ 554:3eb6fe38019a libavcodec
4mv & qpel edge emu
author | michaelni |
---|---|
date | Mon, 15 Jul 2002 00:25:53 +0000 |
parents | 2bf17a142cf4 |
children | e7b72c1dfa1b |
rev | line source |
---|---|
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
1 /* |
429 | 2 * Simple IDCT |
3 * | |
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5 * | |
6 * This library is free software; you can redistribute it and/or | |
7 * modify it under the terms of the GNU Lesser General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2 of the License, or (at your option) any later version. | |
10 * | |
11 * This library is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
17 * License along with this library; if not, write to the Free Software | |
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
19 */ | |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
20 /* |
429 | 21 based upon some outcommented c code from mpeg2dec (idct_mmx.c |
22 written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) | |
23 */ | |
396
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
352
diff
changeset
|
24 #include "avcodec.h" |
479 | 25 #include "dsputil.h" |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
26 #include "simple_idct.h" |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
27 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
28 #if 0 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
29 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
30 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
31 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
32 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
33 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
34 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
35 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
36 #define ROW_SHIFT 8 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
37 #define COL_SHIFT 17 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
38 #else |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
39 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
40 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
41 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
352
5a8eb5cf9f92
C4=16383 for the c version too and even for some outcommented code
michaelni
parents:
215
diff
changeset
|
42 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
43 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
44 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
45 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
46 #define ROW_SHIFT 11 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
47 #define COL_SHIFT 20 // 6 |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
48 #endif |
205 | 49 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
50 #ifdef ARCH_ALPHA |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
51 #define FAST_64BIT |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
52 #endif |
205 | 53 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
54 #if defined(ARCH_POWERPC_405) |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
55 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
56 /* signed 16x16 -> 32 multiply add accumulate */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
57 #define MAC16(rt, ra, rb) \ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
58 asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
59 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
60 /* signed 16x16 -> 32 multiply */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
61 #define MUL16(rt, ra, rb) \ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
62 asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
63 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
64 #else |
205 | 65 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
66 /* signed 16x16 -> 32 multiply add accumulate */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
67 #define MAC16(rt, ra, rb) rt += (ra) * (rb) |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
68 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
69 /* signed 16x16 -> 32 multiply */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
70 #define MUL16(rt, ra, rb) rt = (ra) * (rb) |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
71 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
72 #endif |
205 | 73 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
74 #ifdef ARCH_ALPHA |
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
75 /* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */ |
464
9b73bce5071a
gcc 3.1 warning fix (patch by Felix Buenemann <atmosfear at users.sourceforge.net>)
michaelni
parents:
440
diff
changeset
|
76 static inline int idctRowCondDC(int16_t *row) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
77 { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
78 int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
79 uint64_t *lrow = (uint64_t *) row; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
80 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
81 if (lrow[1] == 0) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
82 if (lrow[0] == 0) |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
83 return 0; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
84 if ((lrow[0] & ~0xffffULL) == 0) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
85 uint64_t v; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
86 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
87 a0 = W4 * row[0]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
88 a0 += 1 << (ROW_SHIFT - 1); |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
89 a0 >>= ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
90 v = (uint16_t) a0; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
91 v += v << 16; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
92 v += v << 32; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
93 lrow[0] = v; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
94 lrow[1] = v; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
95 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
96 return 1; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
97 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
98 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
99 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
100 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
101 a1 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
102 a2 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
103 a3 = a0; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
104 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
105 if (row[2]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
106 a0 += W2 * row[2]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
107 a1 += W6 * row[2]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
108 a2 -= W6 * row[2]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
109 a3 -= W2 * row[2]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
110 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
111 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
112 if (row[4]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
113 a0 += W4 * row[4]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
114 a1 -= W4 * row[4]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
115 a2 -= W4 * row[4]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
116 a3 += W4 * row[4]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
117 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
118 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
119 if (row[6]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
120 a0 += W6 * row[6]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
121 a1 -= W2 * row[6]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
122 a2 += W2 * row[6]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
123 a3 -= W6 * row[6]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
124 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
125 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
126 if (row[1]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
127 b0 = W1 * row[1]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
128 b1 = W3 * row[1]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
129 b2 = W5 * row[1]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
130 b3 = W7 * row[1]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
131 } else { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
132 b0 = 0; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
133 b1 = 0; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
134 b2 = 0; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
135 b3 = 0; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
136 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
137 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
138 if (row[3]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
139 b0 += W3 * row[3]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
140 b1 -= W7 * row[3]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
141 b2 -= W1 * row[3]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
142 b3 -= W5 * row[3]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
143 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
144 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
145 if (row[5]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
146 b0 += W5 * row[5]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
147 b1 -= W1 * row[5]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
148 b2 += W7 * row[5]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
149 b3 += W3 * row[5]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
150 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
151 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
152 if (row[7]) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
153 b0 += W7 * row[7]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
154 b1 -= W5 * row[7]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
155 b2 += W3 * row[7]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
156 b3 -= W1 * row[7]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
157 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
158 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
159 row[0] = (a0 + b0) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
160 row[1] = (a1 + b1) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
161 row[2] = (a2 + b2) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
162 row[3] = (a3 + b3) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
163 row[4] = (a3 - b3) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
164 row[5] = (a2 - b2) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
165 row[6] = (a1 - b1) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
166 row[7] = (a0 - b0) >> ROW_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
167 |
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
168 return 2; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
169 } |
503
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
170 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
171 inline static void idctSparseCol(int16_t *col) |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
172 { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
173 int a0, a1, a2, a3, b0, b1, b2, b3; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
174 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
175 col[0] += (1 << (COL_SHIFT - 1)) / W4; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
176 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
177 a0 = W4 * col[8 * 0]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
178 a1 = W4 * col[8 * 0]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
179 a2 = W4 * col[8 * 0]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
180 a3 = W4 * col[8 * 0]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
181 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
182 if (col[8 * 2]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
183 a0 += W2 * col[8 * 2]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
184 a1 += W6 * col[8 * 2]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
185 a2 -= W6 * col[8 * 2]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
186 a3 -= W2 * col[8 * 2]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
187 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
188 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
189 if (col[8 * 4]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
190 a0 += W4 * col[8 * 4]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
191 a1 -= W4 * col[8 * 4]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
192 a2 -= W4 * col[8 * 4]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
193 a3 += W4 * col[8 * 4]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
194 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
195 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
196 if (col[8 * 6]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
197 a0 += W6 * col[8 * 6]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
198 a1 -= W2 * col[8 * 6]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
199 a2 += W2 * col[8 * 6]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
200 a3 -= W6 * col[8 * 6]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
201 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
202 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
203 if (col[8 * 1]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
204 b0 = W1 * col[8 * 1]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
205 b1 = W3 * col[8 * 1]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
206 b2 = W5 * col[8 * 1]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
207 b3 = W7 * col[8 * 1]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
208 } else { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
209 b0 = b1 = b2 = b3 = 0; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
210 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
211 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
212 if (col[8 * 3]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
213 b0 += W3 * col[8 * 3]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
214 b1 -= W7 * col[8 * 3]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
215 b2 -= W1 * col[8 * 3]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
216 b3 -= W5 * col[8 * 3]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
217 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
218 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
219 if (col[8 * 5]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
220 b0 += W5 * col[8 * 5]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
221 b1 -= W1 * col[8 * 5]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
222 b2 += W7 * col[8 * 5]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
223 b3 += W3 * col[8 * 5]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
224 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
225 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
226 if (col[8 * 7]) { |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
227 b0 += W7 * col[8 * 7]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
228 b1 -= W5 * col[8 * 7]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
229 b2 += W3 * col[8 * 7]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
230 b3 -= W1 * col[8 * 7]; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
231 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
232 |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
233 col[8 * 0] = (a0 + b0) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
234 col[8 * 7] = (a0 - b0) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
235 col[8 * 1] = (a1 + b1) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
236 col[8 * 6] = (a1 - b1) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
237 col[8 * 2] = (a2 + b2) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
238 col[8 * 5] = (a2 - b2) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
239 col[8 * 3] = (a3 + b3) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
240 col[8 * 4] = (a3 - b3) >> COL_SHIFT; |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
241 } |
2bf17a142cf4
Reintroduce lost idctSparseCol for Alpha. Sorry for adding even more
mellum
parents:
479
diff
changeset
|
242 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
243 #else /* not ARCH_ALPHA */ |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
244 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
245 static inline void idctRowCondDC (int16_t * row) |
205 | 246 { |
247 int a0, a1, a2, a3, b0, b1, b2, b3; | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
248 #ifdef FAST_64BIT |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
249 uint64_t temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
250 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
251 uint32_t temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
252 #endif |
205 | 253 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
254 #ifdef FAST_64BIT |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
255 #ifdef WORDS_BIGENDIAN |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
256 #define ROW0_MASK 0xffff000000000000LL |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
257 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
258 #define ROW0_MASK 0xffffLL |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
259 #endif |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
260 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
261 ((uint64_t *)row)[1]) == 0) { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
262 temp = (row[0] << 3) & 0xffff; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
263 temp += temp << 16; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
264 temp += temp << 32; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
265 ((uint64_t *)row)[0] = temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
266 ((uint64_t *)row)[1] = temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
267 return; |
205 | 268 } |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
269 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
270 if (!(((uint32_t*)row)[1] | |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
271 ((uint32_t*)row)[2] | |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
272 ((uint32_t*)row)[3] | |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
273 row[1])) { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
274 temp = (row[0] << 3) & 0xffff; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
275 temp += temp << 16; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
276 ((uint32_t*)row)[0]=((uint32_t*)row)[1] = |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
277 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
278 return; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
279 } |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
280 #endif |
205 | 281 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
282 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
283 a1 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
284 a2 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
285 a3 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
286 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
287 /* no need to optimize : gcc does it */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
288 a0 += W2 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
289 a1 += W6 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
290 a2 -= W6 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
291 a3 -= W2 * row[2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
292 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
293 MUL16(b0, W1, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
294 MAC16(b0, W3, row[3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
295 MUL16(b1, W3, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
296 MAC16(b1, -W7, row[3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
297 MUL16(b2, W5, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
298 MAC16(b2, -W1, row[3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
299 MUL16(b3, W7, row[1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
300 MAC16(b3, -W5, row[3]); |
205 | 301 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
302 #ifdef FAST_64BIT |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
303 temp = ((uint64_t*)row)[1]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
304 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
305 temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
306 #endif |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
307 if (temp != 0) { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
308 a0 += W4*row[4] + W6*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
309 a1 += - W4*row[4] - W2*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
310 a2 += - W4*row[4] + W2*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
311 a3 += W4*row[4] - W6*row[6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
312 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
313 MAC16(b0, W5, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
314 MAC16(b0, W7, row[7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
315 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
316 MAC16(b1, -W1, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
317 MAC16(b1, -W5, row[7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
318 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
319 MAC16(b2, W7, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
320 MAC16(b2, W3, row[7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
321 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
322 MAC16(b3, W3, row[5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
323 MAC16(b3, -W1, row[7]); |
205 | 324 } |
325 | |
326 row[0] = (a0 + b0) >> ROW_SHIFT; | |
327 row[7] = (a0 - b0) >> ROW_SHIFT; | |
328 row[1] = (a1 + b1) >> ROW_SHIFT; | |
329 row[6] = (a1 - b1) >> ROW_SHIFT; | |
330 row[2] = (a2 + b2) >> ROW_SHIFT; | |
331 row[5] = (a2 - b2) >> ROW_SHIFT; | |
332 row[3] = (a3 + b3) >> ROW_SHIFT; | |
333 row[4] = (a3 - b3) >> ROW_SHIFT; | |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
334 } |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
335 #endif /* not ARCH_ALPHA */ |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
336 |
479 | 337 static inline void idctSparseColPut (UINT8 *dest, int line_size, |
338 int16_t * col) | |
205 | 339 { |
340 int a0, a1, a2, a3, b0, b1, b2, b3; | |
479 | 341 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
342 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
343 /* XXX: I did that only to give same values as previous code */ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
344 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
345 a1 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
346 a2 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
347 a3 = a0; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
348 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
349 a0 += + W2*col[8*2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
350 a1 += + W6*col[8*2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
351 a2 += - W6*col[8*2]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
352 a3 += - W2*col[8*2]; |
205 | 353 |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
354 MUL16(b0, W1, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
355 MUL16(b1, W3, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
356 MUL16(b2, W5, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
357 MUL16(b3, W7, col[8*1]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
358 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
359 MAC16(b0, + W3, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
360 MAC16(b1, - W7, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
361 MAC16(b2, - W1, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
362 MAC16(b3, - W5, col[8*3]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
363 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
364 if(col[8*4]){ |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
365 a0 += + W4*col[8*4]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
366 a1 += - W4*col[8*4]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
367 a2 += - W4*col[8*4]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
368 a3 += + W4*col[8*4]; |
205 | 369 } |
370 | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
371 if (col[8*5]) { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
372 MAC16(b0, + W5, col[8*5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
373 MAC16(b1, - W1, col[8*5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
374 MAC16(b2, + W7, col[8*5]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
375 MAC16(b3, + W3, col[8*5]); |
205 | 376 } |
377 | |
378 if(col[8*6]){ | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
379 a0 += + W6*col[8*6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
380 a1 += - W2*col[8*6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
381 a2 += + W2*col[8*6]; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
382 a3 += - W6*col[8*6]; |
205 | 383 } |
384 | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
385 if (col[8*7]) { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
386 MAC16(b0, + W7, col[8*7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
387 MAC16(b1, - W5, col[8*7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
388 MAC16(b2, + W3, col[8*7]); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
389 MAC16(b3, - W1, col[8*7]); |
205 | 390 } |
391 | |
479 | 392 dest[0] = cm[(a0 + b0) >> COL_SHIFT]; |
393 dest += line_size; | |
394 dest[0] = cm[(a1 + b1) >> COL_SHIFT]; | |
395 dest += line_size; | |
396 dest[0] = cm[(a2 + b2) >> COL_SHIFT]; | |
397 dest += line_size; | |
398 dest[0] = cm[(a3 + b3) >> COL_SHIFT]; | |
399 dest += line_size; | |
400 dest[0] = cm[(a3 - b3) >> COL_SHIFT]; | |
401 dest += line_size; | |
402 dest[0] = cm[(a2 - b2) >> COL_SHIFT]; | |
403 dest += line_size; | |
404 dest[0] = cm[(a1 - b1) >> COL_SHIFT]; | |
405 dest += line_size; | |
406 dest[0] = cm[(a0 - b0) >> COL_SHIFT]; | |
407 } | |
408 | |
409 static inline void idctSparseColAdd (UINT8 *dest, int line_size, | |
410 int16_t * col) | |
411 { | |
412 int a0, a1, a2, a3, b0, b1, b2, b3; | |
413 UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
414 | |
415 /* XXX: I did that only to give same values as previous code */ | |
416 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); | |
417 a1 = a0; | |
418 a2 = a0; | |
419 a3 = a0; | |
420 | |
421 a0 += + W2*col[8*2]; | |
422 a1 += + W6*col[8*2]; | |
423 a2 += - W6*col[8*2]; | |
424 a3 += - W2*col[8*2]; | |
425 | |
426 MUL16(b0, W1, col[8*1]); | |
427 MUL16(b1, W3, col[8*1]); | |
428 MUL16(b2, W5, col[8*1]); | |
429 MUL16(b3, W7, col[8*1]); | |
430 | |
431 MAC16(b0, + W3, col[8*3]); | |
432 MAC16(b1, - W7, col[8*3]); | |
433 MAC16(b2, - W1, col[8*3]); | |
434 MAC16(b3, - W5, col[8*3]); | |
435 | |
436 if(col[8*4]){ | |
437 a0 += + W4*col[8*4]; | |
438 a1 += - W4*col[8*4]; | |
439 a2 += - W4*col[8*4]; | |
440 a3 += + W4*col[8*4]; | |
441 } | |
442 | |
443 if (col[8*5]) { | |
444 MAC16(b0, + W5, col[8*5]); | |
445 MAC16(b1, - W1, col[8*5]); | |
446 MAC16(b2, + W7, col[8*5]); | |
447 MAC16(b3, + W3, col[8*5]); | |
448 } | |
449 | |
450 if(col[8*6]){ | |
451 a0 += + W6*col[8*6]; | |
452 a1 += - W2*col[8*6]; | |
453 a2 += + W2*col[8*6]; | |
454 a3 += - W6*col[8*6]; | |
455 } | |
456 | |
457 if (col[8*7]) { | |
458 MAC16(b0, + W7, col[8*7]); | |
459 MAC16(b1, - W5, col[8*7]); | |
460 MAC16(b2, + W3, col[8*7]); | |
461 MAC16(b3, - W1, col[8*7]); | |
462 } | |
463 | |
464 dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; | |
465 dest += line_size; | |
466 dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; | |
467 dest += line_size; | |
468 dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; | |
469 dest += line_size; | |
470 dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; | |
471 dest += line_size; | |
472 dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)]; | |
473 dest += line_size; | |
474 dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)]; | |
475 dest += line_size; | |
476 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; | |
477 dest += line_size; | |
478 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; | |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
479 } |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
480 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
481 #ifdef ARCH_ALPHA |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
482 /* If all rows but the first one are zero after row transformation, |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
483 all rows will be identical after column transformation. */ |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
484 static inline void idctCol2(int16_t *col) |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
485 { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
486 int i; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
487 uint64_t l, r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
488 uint64_t *lcol = (uint64_t *) col; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
489 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
490 for (i = 0; i < 8; ++i) { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
491 int a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
492 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
493 a0 *= W4; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
494 col[0] = a0 >> COL_SHIFT; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
495 ++col; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
496 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
497 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
498 l = lcol[0]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
499 r = lcol[1]; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
500 lcol[ 2] = l; lcol[ 3] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
501 lcol[ 4] = l; lcol[ 5] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
502 lcol[ 6] = l; lcol[ 7] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
503 lcol[ 8] = l; lcol[ 9] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
504 lcol[10] = l; lcol[11] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
505 lcol[12] = l; lcol[13] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
506 lcol[14] = l; lcol[15] = r; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
507 } |
205 | 508 |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
509 void simple_idct (short *block) |
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
510 { |
205 | 511 |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
512 int i; |
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
513 int rowsZero = 1; /* all rows except row 0 zero */ |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
514 int rowsConstant = 1; /* all rows consist of a constant value */ |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
515 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
516 for (i = 0; i < 8; i++) { |
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
517 int sparseness = idctRowCondDC(block + 8 * i); |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
518 |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
519 if (i > 0 && sparseness > 0) |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
520 rowsZero = 0; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
521 if (sparseness == 2) |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
522 rowsConstant = 0; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
523 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
524 |
215
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
525 if (rowsZero) { |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
526 idctCol2(block); |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
527 } else if (rowsConstant) { |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
528 uint64_t *lblock = (uint64_t *) block; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
529 |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
530 idctSparseCol(block); |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
531 for (i = 0; i < 8; i++) { |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
532 uint64_t v = (uint16_t) block[i * 8]; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
533 |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
534 v += v << 16; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
535 v += v << 32; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
536 lblock[0] = v; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
537 lblock[1] = v; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
538 lblock += 2; |
1fe6b64feefb
Small simple idct improvement for Alpha by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
214
diff
changeset
|
539 } |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
540 } else { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
541 for (i = 0; i < 8; i++) |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
542 idctSparseCol(block + i); |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
205
diff
changeset
|
543 } |
175
bd77d3cbb233
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff
changeset
|
544 } |
440
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
429
diff
changeset
|
545 |
479 | 546 /* XXX: suppress this mess */ |
547 void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block) | |
548 { | |
549 simple_idct(block); | |
550 put_pixels_clamped(block, dest, line_size); | |
551 } | |
552 | |
553 void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block) | |
554 { | |
555 simple_idct(block); | |
556 add_pixels_clamped(block, dest, line_size); | |
557 } | |
558 | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
559 #else |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
560 |
479 | 561 void simple_idct_put(UINT8 *dest, int line_size, INT16 *block) |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
562 { |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
563 int i; |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
564 for(i=0; i<8; i++) |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
565 idctRowCondDC(block + i*8); |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
566 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
567 for(i=0; i<8; i++) |
479 | 568 idctSparseColPut(dest + i, line_size, block + i); |
569 } | |
570 | |
571 void simple_idct_add(UINT8 *dest, int line_size, INT16 *block) | |
572 { | |
573 int i; | |
574 for(i=0; i<8; i++) | |
575 idctRowCondDC(block + i*8); | |
576 | |
577 for(i=0; i<8; i++) | |
578 idctSparseColAdd(dest + i, line_size, block + i); | |
476
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
579 } |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
580 |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
581 #endif |
ec13b0a726c3
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
bellard
parents:
464
diff
changeset
|
582 |
440
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
429
diff
changeset
|
583 #undef COL_SHIFT |