annotate i386/simple_idct_mmx.c @ 1795:920e6381e1fe libavcodec

2 byte shorter userdata for mpeg4 in the past it was startcode,string,00,7F,startcode now it is startcode,string,stratcode both are mpeg4 compliant, as according to the standard the userdata lasts until the next 00 00 01 (startcode prefix) but some very primitive decoders which simply skip until the first 00 byte and then expect the next valid startcode might fail with the old variant, just a theory though (didnt test if quicktime can decode it now)
author michael
date Sun, 08 Feb 2004 22:52:35 +0000
parents 43ceb6e34b06
children 3054613980a8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1 /*
429
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
2 * Simple IDCT MMX
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
3 *
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
4 * Copyright (c) 2001, 2002 Michael Niedermayer <michaelni@gmx.at>
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
5 *
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
6 * This library is free software; you can redistribute it and/or
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
8 * License as published by the Free Software Foundation; either
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
9 * version 2 of the License, or (at your option) any later version.
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
10 *
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
11 * This library is distributed in the hope that it will be useful,
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
14 * Lesser General Public License for more details.
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
15 *
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
17 * License along with this library; if not, write to the Free Software
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
19 */
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
20 #include "../dsputil.h"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
21
351
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
22 /*
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
23 23170.475006
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
24 22725.260826
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
25 21406.727617
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
26 19265.545870
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
27 16384.000000
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
28 12872.826198
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
29 8866.956905
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
30 4520.335430
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
31 */
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
32 #define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
33 #define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
34 #define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
35 #define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
351
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
36 #if 0
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
37 #define C4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
351
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
38 #else
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
39 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
40 #endif
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
41 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
42 #define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
43 #define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
44
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
45 #define ROW_SHIFT 11
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
46 #define COL_SHIFT 20 // 6
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
47
350
6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now.
pulento
parents: 213
diff changeset
48 static const uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000ULL;
6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now.
pulento
parents: 213
diff changeset
49 static const uint64_t __attribute__((aligned(8))) d40000= 0x0000000000040000ULL;
1469
43ceb6e34b06 another non const static, maybe its thread save now
michaelni
parents: 1064
diff changeset
50
43ceb6e34b06 another non const static, maybe its thread save now
michaelni
parents: 1064
diff changeset
51 static const int16_t __attribute__((aligned(8))) coeffs[]= {
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
52 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
53 // 1<<(COL_SHIFT-1), 0, 1<<(COL_SHIFT-1), 0,
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
54 // 0, 1<<(COL_SHIFT-1-16), 0, 1<<(COL_SHIFT-1-16),
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
55 1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0,
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
56 // the 1 = ((1<<(COL_SHIFT-1))/C4)<<ROW_SHIFT :)
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
57 // 0, 0, 0, 0,
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
58 // 0, 0, 0, 0,
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
59
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
60 C4, C4, C4, C4,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
61 C4, -C4, C4, -C4,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
62
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
63 C2, C6, C2, C6,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
64 C6, -C2, C6, -C2,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
65
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
66 C1, C3, C1, C3,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
67 C5, C7, C5, C7,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
68
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
69 C3, -C7, C3, -C7,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
70 -C1, -C5, -C1, -C5,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
71
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
72 C5, -C1, C5, -C1,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
73 C7, C3, C7, C3,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
74
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
75 C7, -C5, C7, -C5,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
76 C3, -C1, C3, -C1
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
77 };
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
78
350
6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now.
pulento
parents: 213
diff changeset
79 #if 0
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
80 static void unused_var_killer(){
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
81 int a= wm1010 + d40000;
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
82 temp[0]=a;
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
83 }
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
84
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
85 static void inline idctCol (int16_t * col, int16_t *input)
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
86 {
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
87 #undef C0
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
88 #undef C1
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
89 #undef C2
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
90 #undef C3
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
91 #undef C4
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
92 #undef C5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
93 #undef C6
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
94 #undef C7
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
95 int a0, a1, a2, a3, b0, b1, b2, b3;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
96 const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
97 const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
98 const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
99 const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
352
5a8eb5cf9f92 C4=16383 for the c version too and even for some outcommented code
michaelni
parents: 351
diff changeset
100 const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
101 const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
102 const int C6 = 8867; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
103 const int C7 = 4520; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
104 /*
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
105 if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) {
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
106 col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] =
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
107 col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
108 return;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
109 }*/
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
110
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
111 col[8*0] = input[8*0 + 0];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
112 col[8*1] = input[8*2 + 0];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
113 col[8*2] = input[8*0 + 1];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
114 col[8*3] = input[8*2 + 1];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
115 col[8*4] = input[8*4 + 0];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
116 col[8*5] = input[8*6 + 0];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
117 col[8*6] = input[8*4 + 1];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
118 col[8*7] = input[8*6 + 1];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
119
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
120 a0 = C4*col[8*0] + C2*col[8*2] + C4*col[8*4] + C6*col[8*6] + (1<<(COL_SHIFT-1));
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
121 a1 = C4*col[8*0] + C6*col[8*2] - C4*col[8*4] - C2*col[8*6] + (1<<(COL_SHIFT-1));
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
122 a2 = C4*col[8*0] - C6*col[8*2] - C4*col[8*4] + C2*col[8*6] + (1<<(COL_SHIFT-1));
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
123 a3 = C4*col[8*0] - C2*col[8*2] + C4*col[8*4] - C6*col[8*6] + (1<<(COL_SHIFT-1));
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
124
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
125 b0 = C1*col[8*1] + C3*col[8*3] + C5*col[8*5] + C7*col[8*7];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
126 b1 = C3*col[8*1] - C7*col[8*3] - C1*col[8*5] - C5*col[8*7];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
127 b2 = C5*col[8*1] - C1*col[8*3] + C7*col[8*5] + C3*col[8*7];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
128 b3 = C7*col[8*1] - C5*col[8*3] + C3*col[8*5] - C1*col[8*7];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
129
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
130 col[8*0] = (a0 + b0) >> COL_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
131 col[8*1] = (a1 + b1) >> COL_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
132 col[8*2] = (a2 + b2) >> COL_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
133 col[8*3] = (a3 + b3) >> COL_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
134 col[8*4] = (a3 - b3) >> COL_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
135 col[8*5] = (a2 - b2) >> COL_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
136 col[8*6] = (a1 - b1) >> COL_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
137 col[8*7] = (a0 - b0) >> COL_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
138 }
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
139
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
140 static void inline idctRow (int16_t * output, int16_t * input)
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
141 {
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
142 int16_t row[8];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
143
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
144 int a0, a1, a2, a3, b0, b1, b2, b3;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
145 const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
146 const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
147 const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
148 const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
352
5a8eb5cf9f92 C4=16383 for the c version too and even for some outcommented code
michaelni
parents: 351
diff changeset
149 const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
150 const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
151 const int C6 = 8867; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
152 const int C7 = 4520; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
153
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
154 row[0] = input[0];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
155 row[2] = input[1];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
156 row[4] = input[4];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
157 row[6] = input[5];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
158 row[1] = input[8];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
159 row[3] = input[9];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
160 row[5] = input[12];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
161 row[7] = input[13];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
162
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
163 if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7]) ) {
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
164 row[0] = row[1] = row[2] = row[3] = row[4] =
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
165 row[5] = row[6] = row[7] = row[0]<<3;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
166 output[0] = row[0];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
167 output[2] = row[1];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
168 output[4] = row[2];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
169 output[6] = row[3];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
170 output[8] = row[4];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
171 output[10] = row[5];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
172 output[12] = row[6];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
173 output[14] = row[7];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
174 return;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
175 }
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
176
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
177 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1));
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
178 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1));
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
179 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1));
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
180 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1));
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
181
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
182 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
183 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
184 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
185 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
186
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
187 row[0] = (a0 + b0) >> ROW_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
188 row[1] = (a1 + b1) >> ROW_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
189 row[2] = (a2 + b2) >> ROW_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
190 row[3] = (a3 + b3) >> ROW_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
191 row[4] = (a3 - b3) >> ROW_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
192 row[5] = (a2 - b2) >> ROW_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
193 row[6] = (a1 - b1) >> ROW_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
194 row[7] = (a0 - b0) >> ROW_SHIFT;
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
195
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
196 output[0] = row[0];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
197 output[2] = row[1];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
198 output[4] = row[2];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
199 output[6] = row[3];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
200 output[8] = row[4];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
201 output[10] = row[5];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
202 output[12] = row[6];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
203 output[14] = row[7];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
204 }
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
205 #endif
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
206
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
207 static inline void idct(int16_t *block)
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
208 {
1469
43ceb6e34b06 another non const static, maybe its thread save now
michaelni
parents: 1064
diff changeset
209 int64_t __attribute__((aligned(8))) align_tmp[16];
43ceb6e34b06 another non const static, maybe its thread save now
michaelni
parents: 1064
diff changeset
210 int16_t * const temp= (int16_t*)align_tmp;
43ceb6e34b06 another non const static, maybe its thread save now
michaelni
parents: 1064
diff changeset
211
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
212 asm volatile(
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
213 #if 0 //Alternative, simpler variant
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
214
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
215 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
216 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
217 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
218 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
219 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
220 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
221 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
222 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
223 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
224 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
225 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
226 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
227 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
228 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
229 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
230 #rounder ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
231 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
232 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
233 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
234 "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
235 "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
236 #rounder ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
237 "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
238 "paddd %%mm0, %%mm0 \n\t" \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
239 "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
240 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
241 "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
242 "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
243 "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
244 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
245 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
246 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
247 "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
248 "psrad $" #shift ", %%mm7 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
249 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
250 "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
251 "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
252 "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
253 "psrad $" #shift ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
254 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
255 "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
256 "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
257 "movq %%mm7, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
258 "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
259 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
260 "movq %%mm2, 24+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
261 "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
262 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
263 "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
264 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
265 "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
266 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
267 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
268 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
269 "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
270 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
271 "psrad $" #shift ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
272 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
273 "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
274 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
275 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
276 "psrad $" #shift ", %%mm6 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
277 "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
278 "movq %%mm2, 8+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
279 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
280 "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
281 "movq %%mm4, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
282
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
283 #define COL_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
284 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
285 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
286 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
287 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
288 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
289 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
290 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
291 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
292 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
293 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
294 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
295 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
296 #rounder ", %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
297 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
298 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
299 #rounder ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
300 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
301 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
302 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
303 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
304 "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
305 "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
306 "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
307 "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
308 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
309 "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
310 "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
311 "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
312 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
313 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
314 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
315 "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
316 "psrad $" #shift ", %%mm7 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
317 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
318 "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
319 "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
320 "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
321 "psrad $" #shift ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
322 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
323 "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
324 "movd %%mm7, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
325 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
326 "movd %%mm0, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
327 "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
328 "movd %%mm2, 96+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
329 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
330 "movd %%mm4, 112+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
331 "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
332 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
333 "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
334 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
335 "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
336 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
337 "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
338 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
339 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
340 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
341 "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
342 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
343 "psrad $" #shift ", %%mm5 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
344 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
345 "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
346 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
347 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
348 "psrad $" #shift ", %%mm6 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
349 "psrad $" #shift ", %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
350 "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
351 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
352 "movd %%mm2, 32+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
353 "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
354 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
355 "movd %%mm6, 48+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
356 "movd %%mm4, 64+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
357 "movd %%mm5, 80+" #dst " \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
358
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
359
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
360 #define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
361 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
362 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
363 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
364 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
213
e80ad397d30e Cygwin's mangling by Felix Buenemann <atmosfear@users.sourceforge.net>
nickols_k
parents: 209
diff changeset
365 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
366 "pand %%mm0, %%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
367 "por %%mm1, %%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
368 "por %%mm2, %%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
369 "por %%mm3, %%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
370 "packssdw %%mm4,%%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
371 "movd %%mm4, %%eax \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
372 "orl %%eax, %%eax \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
373 "jz 1f \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
374 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
375 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
376 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
377 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
378 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
379 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
380 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
381 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
382 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
383 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
384 #rounder ", %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
385 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
386 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
387 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
388 "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
389 "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
390 #rounder ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
391 "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
392 "paddd %%mm0, %%mm0 \n\t" \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
393 "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
394 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
395 "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
396 "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
397 "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
398 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
399 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
400 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
401 "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
402 "psrad $" #shift ", %%mm7 \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
403 "psrad $" #shift ", %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
404 "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
405 "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
406 "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
407 "psrad $" #shift ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
408 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
409 "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
410 "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
411 "movq %%mm7, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
412 "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
413 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
414 "movq %%mm2, 24+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
415 "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
416 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
417 "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
418 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
419 "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
420 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
421 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
422 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
423 "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
424 "psrad $" #shift ", %%mm2 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
425 "psrad $" #shift ", %%mm0 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
426 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
427 "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
428 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
429 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
430 "psrad $" #shift ", %%mm6 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
431 "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
432 "movq %%mm2, 8+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
433 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
434 "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
435 "movq %%mm4, 16+" #dst " \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
436 "jmp 2f \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
437 "1: \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
438 "pslld $16, %%mm0 \n\t"\
213
e80ad397d30e Cygwin's mangling by Felix Buenemann <atmosfear@users.sourceforge.net>
nickols_k
parents: 209
diff changeset
439 "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
440 "psrad $13, %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
441 "packssdw %%mm0, %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
442 "movq %%mm0, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
443 "movq %%mm0, 8+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
444 "movq %%mm0, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
445 "movq %%mm0, 24+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
446 "2: \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
447
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
448
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
449 //IDCT( src0, src4, src1, src5, dst, rounder, shift)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
450 ROW_IDCT( (%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
451 /*ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1), paddd (%2), 11)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
452 ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1), paddd (%2), 11)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
453 ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1), paddd (%2), 11)*/
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
454
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
455 DC_COND_ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
456 DC_COND_ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
457 DC_COND_ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
458
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
459
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
460 //IDCT( src0, src4, src1, src5, dst, rounder, shift)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
461 COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
462 COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
463 COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
464 COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
465
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
466 #else
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
467
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
468 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
469 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
470 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
471 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
472 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
213
e80ad397d30e Cygwin's mangling by Felix Buenemann <atmosfear@users.sourceforge.net>
nickols_k
parents: 209
diff changeset
473 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
474 "pand %%mm0, %%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
475 "por %%mm1, %%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
476 "por %%mm2, %%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
477 "por %%mm3, %%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
478 "packssdw %%mm4,%%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
479 "movd %%mm4, %%eax \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
480 "orl %%eax, %%eax \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
481 "jz 1f \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
482 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
483 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
484 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
485 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
486 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
487 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
488 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
489 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
490 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
491 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
492 #rounder ", %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
493 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
494 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
495 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
496 "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
497 "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
498 #rounder ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
499 "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
500 "paddd %%mm0, %%mm0 \n\t" \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
501 "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
502 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
503 "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
504 "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
505 "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
506 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
507 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
508 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
509 "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
510 "psrad $" #shift ", %%mm7 \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
511 "psrad $" #shift ", %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
512 "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
513 "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
514 "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
515 "psrad $" #shift ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
516 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
517 "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
518 "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
519 "movq %%mm7, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
520 "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
521 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
522 "movq %%mm2, 24+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
523 "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
524 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
525 "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
526 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
527 "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
528 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
529 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
530 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
531 "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
532 "psrad $" #shift ", %%mm2 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
533 "psrad $" #shift ", %%mm0 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
534 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
535 "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
536 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
537 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
538 "psrad $" #shift ", %%mm6 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
539 "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
540 "movq %%mm2, 8+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
541 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
542 "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
543 "movq %%mm4, 16+" #dst " \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
544 "jmp 2f \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
545 "1: \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
546 "pslld $16, %%mm0 \n\t"\
213
e80ad397d30e Cygwin's mangling by Felix Buenemann <atmosfear@users.sourceforge.net>
nickols_k
parents: 209
diff changeset
547 "paddd "MANGLE(d40000)", %%mm0 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
548 "psrad $13, %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
549 "packssdw %%mm0, %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
550 "movq %%mm0, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
551 "movq %%mm0, 8+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
552 "movq %%mm0, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
553 "movq %%mm0, 24+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
554 "2: \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
555
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
556 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
557 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
558 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
559 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
560 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
561 "movq %%mm0, %%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
562 "por %%mm1, %%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
563 "por %%mm2, %%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
564 "por %%mm3, %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
565 "packssdw %%mm4,%%mm4 \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
566 "movd %%mm4, %%eax \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
567 "orl %%eax, %%eax \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
568 "jz " #bt " \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
569 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
570 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
571 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
572 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
573 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
574 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
575 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
576 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
577 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
578 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
579 #rounder ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
580 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
581 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
582 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
583 "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
584 "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
585 #rounder ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
586 "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
587 "paddd %%mm0, %%mm0 \n\t" \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
588 "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
589 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
590 "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
591 "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
592 "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
593 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
594 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
595 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
596 "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
597 "psrad $" #shift ", %%mm7 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
598 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
599 "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
600 "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
601 "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
602 "psrad $" #shift ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
603 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
604 "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
605 "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
606 "movq %%mm7, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
607 "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
608 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
609 "movq %%mm2, 24+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
610 "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
611 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
612 "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
613 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
614 "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
615 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
616 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
617 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
618 "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
619 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
620 "psrad $" #shift ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
621 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
622 "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
623 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
624 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
625 "psrad $" #shift ", %%mm6 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
626 "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
627 "movq %%mm2, 8+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
628 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
629 "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
630 "movq %%mm4, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
631
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
632 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
633 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
634 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
635 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
636 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
637 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
638 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
639 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
640 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
641 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
642 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
643 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
644 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
645 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
646 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
647 #rounder ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
648 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
649 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
650 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
651 "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
652 "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
653 #rounder ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
654 "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
655 "paddd %%mm0, %%mm0 \n\t" \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
656 "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
657 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
658 "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
659 "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
660 "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
661 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
662 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
663 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
664 "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
665 "psrad $" #shift ", %%mm7 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
666 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
667 "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
668 "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
669 "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
670 "psrad $" #shift ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
671 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
672 "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
673 "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
674 "movq %%mm7, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
675 "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
676 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
677 "movq %%mm2, 24+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
678 "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
679 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
680 "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
681 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
682 "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
683 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
684 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
685 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
686 "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
687 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
688 "psrad $" #shift ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
689 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
690 "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
691 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
692 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
693 "psrad $" #shift ", %%mm6 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
694 "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
695 "movq %%mm2, 8+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
696 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
697 "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
698 "movq %%mm4, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
699
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
700 //IDCT( src0, src4, src1, src5, dst, rounder, shift)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
701 DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
702 Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
703 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
704 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
705
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
706 #undef IDCT
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
707 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
708 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
709 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
710 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
711 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
712 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
713 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
714 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
715 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
716 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
717 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
718 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
719 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
720 #rounder ", %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
721 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
722 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
723 #rounder ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
724 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
725 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
726 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
727 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
728 "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
729 "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
730 "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
731 "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
732 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
733 "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
734 "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
735 "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
736 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
737 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
738 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
739 "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
740 "psrad $" #shift ", %%mm7 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
741 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
742 "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
743 "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
744 "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
745 "psrad $" #shift ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
746 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
747 "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
748 "movd %%mm7, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
749 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
750 "movd %%mm0, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
751 "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
752 "movd %%mm2, 96+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
753 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
754 "movd %%mm4, 112+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
755 "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
756 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
757 "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
758 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
759 "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
760 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
761 "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
762 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
763 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
764 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
765 "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
766 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
767 "psrad $" #shift ", %%mm5 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
768 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
769 "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
770 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
771 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
772 "psrad $" #shift ", %%mm6 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
773 "psrad $" #shift ", %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
774 "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
775 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
776 "movd %%mm2, 32+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
777 "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
778 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
779 "movd %%mm6, 48+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
780 "movd %%mm4, 64+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
781 "movd %%mm5, 80+" #dst " \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
782
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
783
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
784 //IDCT( src0, src4, src1, src5, dst, rounder, shift)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
785 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
786 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
787 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
788 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
789 "jmp 9f \n\t"
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
790
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
791 "#.balign 16 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
792 "4: \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
793 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
794 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
795
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
796 #undef IDCT
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
797 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
798 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
799 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
800 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
801 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
802 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
803 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
804 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
805 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
806 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
807 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
808 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
809 #rounder ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
810 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
811 #rounder ", %%mm0 \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
812 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
813 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
814 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
815 "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
816 "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
817 "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
818 "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
819 "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
820 "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
821 "paddd %%mm4, %%mm1 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
822 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
823 "psubd %%mm1, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
824 "psrad $" #shift ", %%mm1 \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
825 "psrad $" #shift ", %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
826 "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
827 "paddd %%mm7, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
828 "psubd %%mm7, %%mm2 \n\t" /* A1-B1 a1-b1 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
829 "psrad $" #shift ", %%mm0 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
830 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
831 "packssdw %%mm1, %%mm1 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
832 "movd %%mm1, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
833 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
834 "movd %%mm0, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
835 "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
836 "movd %%mm2, 96+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
837 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
838 "movd %%mm4, 112+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
839 "movq 88(%2), %%mm1 \n\t" /* C3 C7 C3 C7 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
840 "pmaddwd %%mm3, %%mm1 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
841 "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
842 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
843 "paddd %%mm1, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
844 "psubd %%mm1, %%mm5 \n\t" /* a2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
845 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
846 "psrad $" #shift ", %%mm5 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
847 "movq %%mm6, %%mm1 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
848 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
849 "psubd %%mm3, %%mm1 \n\t" /* a3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
850 "psrad $" #shift ", %%mm6 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
851 "psrad $" #shift ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
852 "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
853 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
854 "movd %%mm2, 32+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
855 "packssdw %%mm1, %%mm1 \n\t" /* A3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
856 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
857 "movd %%mm6, 48+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
858 "movd %%mm1, 64+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
859 "movd %%mm5, 80+" #dst " \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
860
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
861 //IDCT( src0, src4, src1, src5, dst, rounder, shift)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
862 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
863 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
864 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
865 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
866 "jmp 9f \n\t"
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
867
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
868 "#.balign 16 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
869 "6: \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
870 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
871
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
872 #undef IDCT
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
873 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
874 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
875 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
876 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
877 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
878 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
879 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
880 #rounder ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
881 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
882 #rounder ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
883 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
884 "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
885 "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
886 "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
887 "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
888 "paddd %%mm4, %%mm1 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
889 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
890 "psubd %%mm1, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
891 "psrad $" #shift ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
892 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
893 "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
894 "paddd %%mm7, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
895 "psubd %%mm7, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
896 "psrad $" #shift ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
897 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
898 "packssdw %%mm1, %%mm1 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
899 "movd %%mm1, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
900 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
901 "movd %%mm0, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
902 "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
903 "movd %%mm2, 96+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
904 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
905 "movd %%mm4, 112+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
906 "movq 88(%2), %%mm1 \n\t" /* C3 C7 C3 C7 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
907 "pmaddwd %%mm3, %%mm1 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
908 "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
909 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
910 "paddd %%mm1, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
911 "psubd %%mm1, %%mm5 \n\t" /* a2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
912 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
913 "psrad $" #shift ", %%mm5 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
914 "movq %%mm6, %%mm1 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
915 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
916 "psubd %%mm3, %%mm1 \n\t" /* a3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
917 "psrad $" #shift ", %%mm6 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
918 "psrad $" #shift ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
919 "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
920 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
921 "movd %%mm2, 32+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
922 "packssdw %%mm1, %%mm1 \n\t" /* A3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
923 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
924 "movd %%mm6, 48+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
925 "movd %%mm1, 64+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
926 "movd %%mm5, 80+" #dst " \n\t"
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
927
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
928
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
929 //IDCT( src0, src4, src1, src5, dst, rounder, shift)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
930 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
931 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
932 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
933 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
934 "jmp 9f \n\t"
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
935
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
936 "#.balign 16 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
937 "2: \n\t"
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
938 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
939
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
940 #undef IDCT
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
941 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
942 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
943 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
944 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
945 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
946 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
947 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
948 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
949 #rounder ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
950 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
951 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
952 #rounder ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
953 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
954 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
955 "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
956 "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
957 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
958 "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
959 "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
960 "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
961 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
962 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
963 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
964 "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
965 "psrad $" #shift ", %%mm7 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
966 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
967 "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
968 "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
969 "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
970 "psrad $" #shift ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
971 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
972 "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
973 "movd %%mm7, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
974 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
975 "movd %%mm0, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
976 "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
977 "movd %%mm2, 96+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
978 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
979 "movd %%mm4, 112+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
980 "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
981 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
982 "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
983 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
984 "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
985 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
986 "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
987 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
988 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
989 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
990 "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
991 "psrad $" #shift ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
992 "psrad $" #shift ", %%mm5 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
993 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
994 "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
995 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
996 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
997 "psrad $" #shift ", %%mm6 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
998 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
999 "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1000 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1001 "movd %%mm2, 32+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1002 "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1003 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1004 "movd %%mm6, 48+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1005 "movd %%mm4, 64+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1006 "movd %%mm5, 80+" #dst " \n\t"
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1007
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1008 //IDCT( src0, src4, src1, src5, dst, rounder, shift)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1009 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1010 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1011 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1012 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1013 "jmp 9f \n\t"
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1014
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1015 "#.balign 16 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1016 "3: \n\t"
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1017 #undef IDCT
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1018 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1019 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1020 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1021 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1022 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1023 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1024 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1025 #rounder ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1026 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1027 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1028 #rounder ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1029 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1030 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1031 "movq 64(%2), %%mm3 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1032 "pmaddwd %%mm2, %%mm3 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1033 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1034 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1035 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1036 "psrad $" #shift ", %%mm7 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1037 "psrad $" #shift ", %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1038 "movq %%mm0, %%mm1 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1039 "paddd %%mm3, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1040 "psubd %%mm3, %%mm1 \n\t" /* A1-B1 a1-b1 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1041 "psrad $" #shift ", %%mm0 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1042 "psrad $" #shift ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1043 "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1044 "movd %%mm7, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1045 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1046 "movd %%mm0, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1047 "packssdw %%mm1, %%mm1 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1048 "movd %%mm1, 96+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1049 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1050 "movd %%mm4, 112+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1051 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1052 "pmaddwd %%mm2, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1053 "pmaddwd 96(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1054 "movq %%mm5, %%mm1 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1055 "paddd %%mm4, %%mm1 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1056 "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1057 "psrad $" #shift ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1058 "psrad $" #shift ", %%mm5 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1059 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1060 "paddd %%mm2, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1061 "psubd %%mm2, %%mm4 \n\t" /* a3-B3 a3-b3 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1062 "psrad $" #shift ", %%mm6 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1063 "psrad $" #shift ", %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1064 "packssdw %%mm1, %%mm1 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1065 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1066 "movd %%mm1, 32+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1067 "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1068 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1069 "movd %%mm6, 48+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1070 "movd %%mm4, 64+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1071 "movd %%mm5, 80+" #dst " \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1072
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1073
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1074 //IDCT( src0, src4, src1, src5, dst, rounder, shift)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1075 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1076 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1077 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1078 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1079 "jmp 9f \n\t"
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1080
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1081 "#.balign 16 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1082 "5: \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1083 #undef IDCT
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1084 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1085 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1086 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1087 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1088 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1089 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1090 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1091 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1092 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1093 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1094 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1095 #rounder ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1096 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1097 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1098 #rounder ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1099 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1100 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1101 "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1102 "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1103 "movq 8+" #src0 ", %%mm2 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1104 "movq 8+" #src4 ", %%mm3 \n\t" /* R6 R2 r6 r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1105 "movq 16(%2), %%mm1 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1106 "pmaddwd %%mm2, %%mm1 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1107 "movq 24(%2), %%mm7 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1108 "pmaddwd %%mm7, %%mm2 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1109 "movq 32(%2), %%mm7 \n\t" /* C6 C2 C6 C2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1110 "pmaddwd %%mm3, %%mm7 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1111 "pmaddwd 40(%2), %%mm3 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1112 #rounder ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1113 "paddd %%mm1, %%mm7 \n\t" /* A0 a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1114 "paddd %%mm1, %%mm1 \n\t" /* 2C0 2c0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1115 #rounder ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1116 "psubd %%mm7, %%mm1 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1117 "paddd %%mm2, %%mm3 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1118 "paddd %%mm2, %%mm2 \n\t" /* 2C1 2c1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1119 "psubd %%mm3, %%mm2 \n\t" /* A2 a2 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1120 "psrad $" #shift ", %%mm4 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1121 "psrad $" #shift ", %%mm7 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1122 "psrad $" #shift ", %%mm3 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1123 "packssdw %%mm7, %%mm4 \n\t" /* A0 a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1124 "movq %%mm4, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1125 "psrad $" #shift ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1126 "packssdw %%mm3, %%mm0 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1127 "movq %%mm0, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1128 "movq %%mm0, 96+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1129 "movq %%mm4, 112+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1130 "psrad $" #shift ", %%mm5 \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1131 "psrad $" #shift ", %%mm6 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1132 "psrad $" #shift ", %%mm2 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1133 "packssdw %%mm2, %%mm5 \n\t" /* A2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1134 "movq %%mm5, 32+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1135 "psrad $" #shift ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1136 "packssdw %%mm1, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1137 "movq %%mm6, 48+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1138 "movq %%mm6, 64+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1139 "movq %%mm5, 80+" #dst " \n\t"
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1140
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1141
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1142 //IDCT( src0, src4, src1, src5, dst, rounder, shift)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1143 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1144 //IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1145 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1146 //IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1147 "jmp 9f \n\t"
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1148
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1149
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1150 "#.balign 16 \n\t"\
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1151 "1: \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1152 #undef IDCT
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1153 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1154 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1155 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1156 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1157 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1158 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1159 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1160 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1161 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1162 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1163 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1164 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1165 #rounder ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1166 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1167 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1168 #rounder ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1169 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1170 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1171 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1172 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1173 "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1174 "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1175 "movq 64(%2), %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1176 "pmaddwd %%mm2, %%mm1 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1177 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1178 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1179 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1180 "psrad $" #shift ", %%mm7 \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1181 "psrad $" #shift ", %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1182 "movq %%mm0, %%mm3 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1183 "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1184 "psubd %%mm1, %%mm3 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1185 "psrad $" #shift ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1186 "psrad $" #shift ", %%mm3 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1187 "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1188 "movd %%mm7, " #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1189 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1190 "movd %%mm0, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1191 "packssdw %%mm3, %%mm3 \n\t" /* A1-B1 a1-b1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1192 "movd %%mm3, 96+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1193 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1194 "movd %%mm4, 112+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1195 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1196 "pmaddwd %%mm2, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1197 "pmaddwd 96(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1198 "movq %%mm5, %%mm3 \n\t" /* A2 a2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1199 "paddd %%mm4, %%mm3 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1200 "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1201 "psrad $" #shift ", %%mm3 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1202 "psrad $" #shift ", %%mm5 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1203 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1204 "paddd %%mm2, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1205 "psubd %%mm2, %%mm4 \n\t" /* a3-B3 a3-b3 */\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1206 "psrad $" #shift ", %%mm6 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1207 "packssdw %%mm3, %%mm3 \n\t" /* A2+B2 a2+b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1208 "movd %%mm3, 32+" #dst " \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1209 "psrad $" #shift ", %%mm4 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1210 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1211 "movd %%mm6, 48+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1212 "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1213 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1214 "movd %%mm4, 64+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1215 "movd %%mm5, 80+" #dst " \n\t"
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1216
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1217
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1218 //IDCT( src0, src4, src1, src5, dst, rounder, shift)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1219 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1220 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1221 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1222 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1223 "jmp 9f \n\t"
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1224
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1225
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1226 "#.balign 16 \n\t"
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1227 "7: \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1228 #undef IDCT
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1229 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1230 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1231 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1232 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1233 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1234 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1235 #rounder ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1236 #rounder ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1237 "psrad $" #shift ", %%mm4 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1238 "psrad $" #shift ", %%mm0 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1239 "movq 8+" #src0 ", %%mm2 \n\t" /* R4 R0 r4 r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1240 "movq 16(%2), %%mm1 \n\t" /* C4 C4 C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1241 "pmaddwd %%mm2, %%mm1 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1242 "movq 24(%2), %%mm7 \n\t" /* -C4 C4 -C4 C4 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1243 "pmaddwd %%mm7, %%mm2 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1244 "movq 32(%2), %%mm7 \n\t" /* C6 C2 C6 C2 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1245 #rounder ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1246 #rounder ", %%mm2 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1247 "psrad $" #shift ", %%mm1 \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1248 "packssdw %%mm1, %%mm4 \n\t" /* A0 a0 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1249 "movq %%mm4, " #dst " \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1250 "psrad $" #shift ", %%mm2 \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1251 "packssdw %%mm2, %%mm0 \n\t" /* A1 a1 */\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1252 "movq %%mm0, 16+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1253 "movq %%mm0, 96+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1254 "movq %%mm4, 112+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1255 "movq %%mm0, 32+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1256 "movq %%mm4, 48+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1257 "movq %%mm4, 64+" #dst " \n\t"\
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1258 "movq %%mm0, 80+" #dst " \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1259
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1260 //IDCT( src0, src4, src1, src5, dst, rounder, shift)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1261 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1262 //IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1263 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1264 //IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1265
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1266
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1267 #endif
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1268
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1269 /*
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1270 Input
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1271 00 40 04 44 20 60 24 64
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1272 10 30 14 34 50 70 54 74
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1273 01 41 03 43 21 61 23 63
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1274 11 31 13 33 51 71 53 73
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1275 02 42 06 46 22 62 26 66
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1276 12 32 16 36 52 72 56 76
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1277 05 45 07 47 25 65 27 67
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1278 15 35 17 37 55 75 57 77
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1279
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1280 Temp
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1281 00 04 10 14 20 24 30 34
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1282 40 44 50 54 60 64 70 74
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1283 01 03 11 13 21 23 31 33
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1284 41 43 51 53 61 63 71 73
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1285 02 06 12 16 22 26 32 36
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1286 42 46 52 56 62 66 72 76
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1287 05 07 15 17 25 27 35 37
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1288 45 47 55 57 65 67 75 77
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1289 */
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1290
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1291 "9: \n\t"
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1292 :: "r" (block), "r" (temp), "r" (coeffs)
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1293 : "%eax"
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1294 );
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1295 }
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1296
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1297 void ff_simple_idct_mmx(int16_t *block)
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1298 {
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1299 idct(block);
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1300 }
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1301
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1302 //FIXME merge add/put into the idct
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1303
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 854
diff changeset
1304 void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1305 {
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1306 idct(block);
854
3034f1816596 * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 706
diff changeset
1307 put_pixels_clamped_mmx(block, dest, line_size);
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1308 }
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 854
diff changeset
1309 void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1310 {
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1311 idct(block);
854
3034f1816596 * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 706
diff changeset
1312 add_pixels_clamped_mmx(block, dest, line_size);
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1313 }