annotate i386/simple_idct_mmx.c @ 5757:ace63c809071 libavcodec

Remove uses of SIGILL for CPU extension detection, that method is not acceptable in a library. Should not change anything for PPC, the autodetection is currently pointless due to other code being compiled with -maltivec as well (and detection for OSX and AmigaOS remains in place). SPARC binaries built with VIS support can now only run on systems with VIS.
author reimar
date Tue, 02 Oct 2007 18:18:35 +0000
parents d5ba514e3f4a
children f3da7b2592aa
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1 /*
429
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
2 * Simple IDCT MMX
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
3 *
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
4 * Copyright (c) 2001, 2002 Michael Niedermayer <michaelni@gmx.at>
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
5 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
6 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
7 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
8 * FFmpeg is free software; you can redistribute it and/or
429
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
9 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
10 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
11 * version 2.1 of the License, or (at your option) any later version.
429
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
12 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
13 * FFmpeg is distributed in the hope that it will be useful,
429
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
16 * Lesser General Public License for more details.
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
17 *
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
18 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
19 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
429
718a22dc121f license/copyright change
glantau
parents: 352
diff changeset
21 */
5010
d5ba514e3f4a Add libavcodec to compiler include flags in order to simplify header
diego
parents: 3947
diff changeset
22 #include "dsputil.h"
d5ba514e3f4a Add libavcodec to compiler include flags in order to simplify header
diego
parents: 3947
diff changeset
23 #include "simple_idct.h"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
24
351
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
25 /*
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
26 23170.475006
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
27 22725.260826
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
28 21406.727617
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
29 19265.545870
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
30 16384.000000
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
31 12872.826198
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
32 8866.956905
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
33 4520.335430
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
34 */
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
35 #define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
36 #define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
37 #define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
38 #define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
351
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
39 #if 0
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
40 #define C4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
351
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
41 #else
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
42 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
6cef8253faab set C4 to 16383
michaelni
parents: 350
diff changeset
43 #endif
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
44 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
45 #define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
46 #define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
47
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
48 #define ROW_SHIFT 11
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
49 #define COL_SHIFT 20 // 6
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
50
1845
3054613980a8 attribute used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents: 1469
diff changeset
51 static const uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000ULL;
3054613980a8 attribute used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents: 1469
diff changeset
52 static const uint64_t attribute_used __attribute__((aligned(8))) d40000= 0x0000000000040000ULL;
1469
43ceb6e34b06 another non const static, maybe its thread save now
michaelni
parents: 1064
diff changeset
53
43ceb6e34b06 another non const static, maybe its thread save now
michaelni
parents: 1064
diff changeset
54 static const int16_t __attribute__((aligned(8))) coeffs[]= {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
55 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
56 // 1<<(COL_SHIFT-1), 0, 1<<(COL_SHIFT-1), 0,
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
57 // 0, 1<<(COL_SHIFT-1-16), 0, 1<<(COL_SHIFT-1-16),
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
58 1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0,
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
59 // the 1 = ((1<<(COL_SHIFT-1))/C4)<<ROW_SHIFT :)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
60 // 0, 0, 0, 0,
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
61 // 0, 0, 0, 0,
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
62
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
63 C4, C4, C4, C4,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
64 C4, -C4, C4, -C4,
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2024
diff changeset
65
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
66 C2, C6, C2, C6,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
67 C6, -C2, C6, -C2,
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2024
diff changeset
68
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
69 C1, C3, C1, C3,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
70 C5, C7, C5, C7,
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2024
diff changeset
71
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
72 C3, -C7, C3, -C7,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
73 -C1, -C5, -C1, -C5,
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2024
diff changeset
74
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
75 C5, -C1, C5, -C1,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
76 C7, C3, C7, C3,
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2024
diff changeset
77
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
78 C7, -C5, C7, -C5,
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
79 C3, -C1, C3, -C1
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
80 };
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
81
350
6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now.
pulento
parents: 213
diff changeset
82 #if 0
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
83 static void unused_var_killer(){
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
84 int a= wm1010 + d40000;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
85 temp[0]=a;
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
86 }
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
87
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
88 static void inline idctCol (int16_t * col, int16_t *input)
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
89 {
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
90 #undef C0
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
91 #undef C1
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
92 #undef C2
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
93 #undef C3
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
94 #undef C4
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
95 #undef C5
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
96 #undef C6
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
97 #undef C7
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
98 int a0, a1, a2, a3, b0, b1, b2, b3;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
99 const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
100 const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
101 const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
102 const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
103 const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
104 const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
105 const int C6 = 8867; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
106 const int C7 = 4520; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
107 /*
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
108 if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) {
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
109 col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] =
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
110 col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
111 return;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
112 }*/
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
113
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
114 col[8*0] = input[8*0 + 0];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
115 col[8*1] = input[8*2 + 0];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
116 col[8*2] = input[8*0 + 1];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
117 col[8*3] = input[8*2 + 1];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
118 col[8*4] = input[8*4 + 0];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
119 col[8*5] = input[8*6 + 0];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
120 col[8*6] = input[8*4 + 1];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
121 col[8*7] = input[8*6 + 1];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
122
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
123 a0 = C4*col[8*0] + C2*col[8*2] + C4*col[8*4] + C6*col[8*6] + (1<<(COL_SHIFT-1));
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
124 a1 = C4*col[8*0] + C6*col[8*2] - C4*col[8*4] - C2*col[8*6] + (1<<(COL_SHIFT-1));
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
125 a2 = C4*col[8*0] - C6*col[8*2] - C4*col[8*4] + C2*col[8*6] + (1<<(COL_SHIFT-1));
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
126 a3 = C4*col[8*0] - C2*col[8*2] + C4*col[8*4] - C6*col[8*6] + (1<<(COL_SHIFT-1));
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
127
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
128 b0 = C1*col[8*1] + C3*col[8*3] + C5*col[8*5] + C7*col[8*7];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
129 b1 = C3*col[8*1] - C7*col[8*3] - C1*col[8*5] - C5*col[8*7];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
130 b2 = C5*col[8*1] - C1*col[8*3] + C7*col[8*5] + C3*col[8*7];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
131 b3 = C7*col[8*1] - C5*col[8*3] + C3*col[8*5] - C1*col[8*7];
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
132
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
133 col[8*0] = (a0 + b0) >> COL_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
134 col[8*1] = (a1 + b1) >> COL_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
135 col[8*2] = (a2 + b2) >> COL_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
136 col[8*3] = (a3 + b3) >> COL_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
137 col[8*4] = (a3 - b3) >> COL_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
138 col[8*5] = (a2 - b2) >> COL_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
139 col[8*6] = (a1 - b1) >> COL_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
140 col[8*7] = (a0 - b0) >> COL_SHIFT;
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
141 }
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
142
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
143 static void inline idctRow (int16_t * output, int16_t * input)
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
144 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
145 int16_t row[8];
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
146
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
147 int a0, a1, a2, a3, b0, b1, b2, b3;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
148 const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
149 const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
150 const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
151 const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
152 const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
153 const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
154 const int C6 = 8867; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
155 const int C7 = 4520; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
156
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
157 row[0] = input[0];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
158 row[2] = input[1];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
159 row[4] = input[4];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
160 row[6] = input[5];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
161 row[1] = input[8];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
162 row[3] = input[9];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
163 row[5] = input[12];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
164 row[7] = input[13];
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
165
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
166 if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7]) ) {
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
167 row[0] = row[1] = row[2] = row[3] = row[4] =
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
168 row[5] = row[6] = row[7] = row[0]<<3;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
169 output[0] = row[0];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
170 output[2] = row[1];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
171 output[4] = row[2];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
172 output[6] = row[3];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
173 output[8] = row[4];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
174 output[10] = row[5];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
175 output[12] = row[6];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
176 output[14] = row[7];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
177 return;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
178 }
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
179
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
180 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1));
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
181 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1));
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
182 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1));
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
183 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1));
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
184
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
185 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
186 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
187 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
188 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
189
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
190 row[0] = (a0 + b0) >> ROW_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
191 row[1] = (a1 + b1) >> ROW_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
192 row[2] = (a2 + b2) >> ROW_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
193 row[3] = (a3 + b3) >> ROW_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
194 row[4] = (a3 - b3) >> ROW_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
195 row[5] = (a2 - b2) >> ROW_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
196 row[6] = (a1 - b1) >> ROW_SHIFT;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
197 row[7] = (a0 - b0) >> ROW_SHIFT;
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
198
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
199 output[0] = row[0];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
200 output[2] = row[1];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
201 output[4] = row[2];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
202 output[6] = row[3];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
203 output[8] = row[4];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
204 output[10] = row[5];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
205 output[12] = row[6];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
206 output[14] = row[7];
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
207 }
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
208 #endif
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
209
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
210 static inline void idct(int16_t *block)
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
211 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
212 int64_t __attribute__((aligned(8))) align_tmp[16];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
213 int16_t * const temp= (int16_t*)align_tmp;
1469
43ceb6e34b06 another non const static, maybe its thread save now
michaelni
parents: 1064
diff changeset
214
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
215 asm volatile(
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
216 #if 0 //Alternative, simpler variant
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
217
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
218 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
219 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
220 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
221 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
222 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
223 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
224 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
225 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
226 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
227 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
228 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
229 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
230 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
231 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
232 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
233 #rounder ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
234 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
235 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
236 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
237 "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
238 "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
239 #rounder ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
240 "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
241 "paddd %%mm0, %%mm0 \n\t" \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
242 "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
243 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
244 "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
245 "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
246 "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
247 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
248 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
249 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
250 "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
251 "psrad $" #shift ", %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
252 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
253 "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
254 "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
255 "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
256 "psrad $" #shift ", %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
257 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
258 "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
259 "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
260 "movq %%mm7, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
261 "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
262 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
263 "movq %%mm2, 24+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
264 "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
265 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
266 "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
267 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
268 "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
269 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
270 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
271 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
272 "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
273 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
274 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
275 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
276 "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
277 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
278 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
279 "psrad $" #shift ", %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
280 "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
281 "movq %%mm2, 8+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
282 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
283 "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
284 "movq %%mm4, 16+" #dst " \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
285
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
286 #define COL_IDCT(src0, src4, src1, src5, dst, shift) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
287 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
288 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
289 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
290 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
291 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
292 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
293 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
294 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
295 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
296 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
297 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
298 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
299 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
300 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
301 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
302 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
303 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
304 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
305 "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
306 "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
307 "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
308 "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
309 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
310 "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
311 "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
312 "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
313 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
314 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
315 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
316 "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
317 "psrad $" #shift ", %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
318 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
319 "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
320 "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
321 "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
322 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
323 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
324 "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
325 "movd %%mm7, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
326 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
327 "movd %%mm0, 16+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
328 "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
329 "movd %%mm2, 96+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
330 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
331 "movd %%mm4, 112+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
332 "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
333 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
334 "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
335 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
336 "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
337 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
338 "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
339 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
340 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
341 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
342 "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
343 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
344 "psrad $" #shift ", %%mm5 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
345 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
346 "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
347 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
348 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
349 "psrad $" #shift ", %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
350 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
351 "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
352 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
353 "movd %%mm2, 32+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
354 "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
355 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
356 "movd %%mm6, 48+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
357 "movd %%mm4, 64+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
358 "movd %%mm5, 80+" #dst " \n\t"\
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
359
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2024
diff changeset
360
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
361 #define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
362 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
363 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
364 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
365 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
366 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
367 "pand %%mm0, %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
368 "por %%mm1, %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
369 "por %%mm2, %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
370 "por %%mm3, %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
371 "packssdw %%mm4,%%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
372 "movd %%mm4, %%eax \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
373 "orl %%eax, %%eax \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
374 "jz 1f \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
375 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
376 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
377 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
378 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
379 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
380 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
381 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
382 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
383 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
384 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
385 #rounder ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
386 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
387 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
388 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
389 "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
390 "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
391 #rounder ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
392 "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
393 "paddd %%mm0, %%mm0 \n\t" \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
394 "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
395 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
396 "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
397 "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
398 "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
399 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
400 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
401 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
402 "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
403 "psrad $" #shift ", %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
404 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
405 "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
406 "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
407 "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
408 "psrad $" #shift ", %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
409 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
410 "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
411 "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
412 "movq %%mm7, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
413 "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
414 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
415 "movq %%mm2, 24+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
416 "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
417 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
418 "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
419 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
420 "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
421 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
422 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
423 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
424 "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
425 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
426 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
427 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
428 "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
429 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
430 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
431 "psrad $" #shift ", %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
432 "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
433 "movq %%mm2, 8+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
434 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
435 "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
436 "movq %%mm4, 16+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
437 "jmp 2f \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
438 "1: \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
439 "pslld $16, %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
440 "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
441 "psrad $13, %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
442 "packssdw %%mm0, %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
443 "movq %%mm0, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
444 "movq %%mm0, 8+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
445 "movq %%mm0, 16+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
446 "movq %%mm0, 24+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
447 "2: \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
448
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
449
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
450 //IDCT( src0, src4, src1, src5, dst, rounder, shift)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
451 ROW_IDCT( (%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
452 /*ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1), paddd (%2), 11)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
453 ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1), paddd (%2), 11)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
454 ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1), paddd (%2), 11)*/
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
455
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
456 DC_COND_ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
457 DC_COND_ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
458 DC_COND_ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
459
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
460
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
461 //IDCT( src0, src4, src1, src5, dst, shift)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
462 COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
463 COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
464 COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
465 COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
466
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
467 #else
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
468
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
469 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
470 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
471 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
472 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
473 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
474 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
475 "pand %%mm0, %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
476 "por %%mm1, %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
477 "por %%mm2, %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
478 "por %%mm3, %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
479 "packssdw %%mm4,%%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
480 "movd %%mm4, %%eax \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
481 "orl %%eax, %%eax \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
482 "jz 1f \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
483 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
484 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
485 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
486 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
487 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
488 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
489 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
490 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
491 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
492 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
493 #rounder ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
494 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
495 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
496 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
497 "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
498 "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
499 #rounder ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
500 "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
501 "paddd %%mm0, %%mm0 \n\t" \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
502 "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
503 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
504 "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
505 "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
506 "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
507 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
508 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
509 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
510 "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
511 "psrad $" #shift ", %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
512 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
513 "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
514 "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
515 "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
516 "psrad $" #shift ", %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
517 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
518 "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
519 "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
520 "movq %%mm7, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
521 "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
522 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
523 "movq %%mm2, 24+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
524 "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
525 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
526 "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
527 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
528 "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
529 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
530 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
531 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
532 "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
533 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
534 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
535 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
536 "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
537 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
538 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
539 "psrad $" #shift ", %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
540 "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
541 "movq %%mm2, 8+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
542 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
543 "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
544 "movq %%mm4, 16+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
545 "jmp 2f \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
546 "1: \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
547 "pslld $16, %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
548 "paddd "MANGLE(d40000)", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
549 "psrad $13, %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
550 "packssdw %%mm0, %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
551 "movq %%mm0, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
552 "movq %%mm0, 8+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
553 "movq %%mm0, 16+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
554 "movq %%mm0, 24+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
555 "2: \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
556
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
557 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
558 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
559 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
560 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
561 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
562 "movq %%mm0, %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
563 "por %%mm1, %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
564 "por %%mm2, %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
565 "por %%mm3, %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
566 "packssdw %%mm4,%%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
567 "movd %%mm4, %%eax \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
568 "orl %%eax, %%eax \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
569 "jz " #bt " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
570 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
571 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
572 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
573 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
574 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
575 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
576 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
577 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
578 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
579 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
580 #rounder ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
581 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
582 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
583 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
584 "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
585 "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
586 #rounder ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
587 "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
588 "paddd %%mm0, %%mm0 \n\t" \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
589 "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
590 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
591 "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
592 "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
593 "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
594 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
595 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
596 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
597 "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
598 "psrad $" #shift ", %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
599 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
600 "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
601 "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
602 "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
603 "psrad $" #shift ", %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
604 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
605 "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
606 "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
607 "movq %%mm7, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
608 "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
609 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
610 "movq %%mm2, 24+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
611 "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
612 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
613 "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
614 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
615 "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
616 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
617 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
618 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
619 "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
620 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
621 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
622 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
623 "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
624 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
625 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
626 "psrad $" #shift ", %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
627 "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
628 "movq %%mm2, 8+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
629 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
630 "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
631 "movq %%mm4, 16+" #dst " \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
632
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
633 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
634 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
635 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
636 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
637 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
638 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
639 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
640 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
641 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
642 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
643 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
644 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
645 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
646 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
647 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
648 #rounder ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
649 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
650 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
651 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
652 "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
653 "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
654 #rounder ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
655 "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
656 "paddd %%mm0, %%mm0 \n\t" \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
657 "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
658 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
659 "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
660 "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
661 "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
662 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
663 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
664 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
665 "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
666 "psrad $" #shift ", %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
667 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
668 "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
669 "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
670 "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
671 "psrad $" #shift ", %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
672 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
673 "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
674 "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
675 "movq %%mm7, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
676 "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
677 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
678 "movq %%mm2, 24+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
679 "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
680 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
681 "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
682 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
683 "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
684 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
685 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
686 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
687 "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
688 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
689 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
690 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
691 "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
692 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
693 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
694 "psrad $" #shift ", %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
695 "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
696 "movq %%mm2, 8+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
697 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
698 "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
699 "movq %%mm4, 16+" #dst " \n\t"\
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
700
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
701 //IDCT( src0, src4, src1, src5, dst, rounder, shift)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
702 DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
703 Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
704 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
705 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
706
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
707 #undef IDCT
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
708 #define IDCT(src0, src4, src1, src5, dst, shift) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
709 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
710 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
711 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
712 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
713 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
714 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
715 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
716 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
717 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
718 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
719 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
720 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
721 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
722 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
723 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
724 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
725 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
726 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
727 "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
728 "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
729 "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
730 "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
731 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
732 "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
733 "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
734 "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
735 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
736 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
737 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
738 "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
739 "psrad $" #shift ", %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
740 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
741 "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
742 "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
743 "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
744 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
745 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
746 "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
747 "movd %%mm7, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
748 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
749 "movd %%mm0, 16+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
750 "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
751 "movd %%mm2, 96+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
752 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
753 "movd %%mm4, 112+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
754 "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
755 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
756 "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
757 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
758 "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
759 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
760 "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
761 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
762 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
763 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
764 "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
765 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
766 "psrad $" #shift ", %%mm5 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
767 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
768 "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
769 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
770 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
771 "psrad $" #shift ", %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
772 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
773 "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
774 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
775 "movd %%mm2, 32+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
776 "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
777 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
778 "movd %%mm6, 48+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
779 "movd %%mm4, 64+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
780 "movd %%mm5, 80+" #dst " \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
781
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
782
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
783 //IDCT( src0, src4, src1, src5, dst, shift)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
784 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
785 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
786 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
787 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
788 "jmp 9f \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
789
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3565
diff changeset
790 "#" ASMALIGN(4) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
791 "4: \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
792 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
793 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
794
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
795 #undef IDCT
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
796 #define IDCT(src0, src4, src1, src5, dst, shift) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
797 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
798 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
799 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
800 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
801 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
802 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
803 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
804 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
805 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
806 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
807 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
808 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
809 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
810 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
811 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
812 "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
813 "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
814 "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
815 "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
816 "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
817 "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
818 "paddd %%mm4, %%mm1 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
819 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
820 "psubd %%mm1, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
821 "psrad $" #shift ", %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
822 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
823 "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
824 "paddd %%mm7, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
825 "psubd %%mm7, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
826 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
827 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
828 "packssdw %%mm1, %%mm1 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
829 "movd %%mm1, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
830 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
831 "movd %%mm0, 16+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
832 "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
833 "movd %%mm2, 96+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
834 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
835 "movd %%mm4, 112+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
836 "movq 88(%2), %%mm1 \n\t" /* C3 C7 C3 C7 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
837 "pmaddwd %%mm3, %%mm1 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
838 "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
839 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
840 "paddd %%mm1, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
841 "psubd %%mm1, %%mm5 \n\t" /* a2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
842 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
843 "psrad $" #shift ", %%mm5 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
844 "movq %%mm6, %%mm1 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
845 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
846 "psubd %%mm3, %%mm1 \n\t" /* a3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
847 "psrad $" #shift ", %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
848 "psrad $" #shift ", %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
849 "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
850 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
851 "movd %%mm2, 32+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
852 "packssdw %%mm1, %%mm1 \n\t" /* A3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
853 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
854 "movd %%mm6, 48+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
855 "movd %%mm1, 64+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
856 "movd %%mm5, 80+" #dst " \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
857
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
858 //IDCT( src0, src4, src1, src5, dst, shift)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
859 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
860 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
861 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
862 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
863 "jmp 9f \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
864
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3565
diff changeset
865 "#" ASMALIGN(4) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
866 "6: \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
867 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
868
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
869 #undef IDCT
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
870 #define IDCT(src0, src4, src1, src5, dst, shift) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
871 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
872 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
873 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
874 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
875 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
876 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
877 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
878 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
879 "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
880 "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
881 "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
882 "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
883 "paddd %%mm4, %%mm1 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
884 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
885 "psubd %%mm1, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
886 "psrad $" #shift ", %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
887 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
888 "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
889 "paddd %%mm7, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
890 "psubd %%mm7, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
891 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
892 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
893 "packssdw %%mm1, %%mm1 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
894 "movd %%mm1, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
895 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
896 "movd %%mm0, 16+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
897 "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
898 "movd %%mm2, 96+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
899 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
900 "movd %%mm4, 112+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
901 "movq 88(%2), %%mm1 \n\t" /* C3 C7 C3 C7 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
902 "pmaddwd %%mm3, %%mm1 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
903 "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
904 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
905 "paddd %%mm1, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
906 "psubd %%mm1, %%mm5 \n\t" /* a2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
907 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
908 "psrad $" #shift ", %%mm5 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
909 "movq %%mm6, %%mm1 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
910 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
911 "psubd %%mm3, %%mm1 \n\t" /* a3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
912 "psrad $" #shift ", %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
913 "psrad $" #shift ", %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
914 "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
915 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
916 "movd %%mm2, 32+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
917 "packssdw %%mm1, %%mm1 \n\t" /* A3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
918 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
919 "movd %%mm6, 48+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
920 "movd %%mm1, 64+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
921 "movd %%mm5, 80+" #dst " \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
922
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
923
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
924 //IDCT( src0, src4, src1, src5, dst, shift)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
925 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
926 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
927 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
928 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
929 "jmp 9f \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
930
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3565
diff changeset
931 "#" ASMALIGN(4) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
932 "2: \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
933 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
934
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
935 #undef IDCT
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
936 #define IDCT(src0, src4, src1, src5, dst, shift) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
937 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
938 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
939 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
940 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
941 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
942 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
943 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
944 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
945 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
946 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
947 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
948 "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
949 "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
950 "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
951 "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
952 "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
953 "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
954 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
955 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
956 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
957 "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
958 "psrad $" #shift ", %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
959 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
960 "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
961 "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
962 "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
963 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
964 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
965 "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
966 "movd %%mm7, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
967 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
968 "movd %%mm0, 16+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
969 "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
970 "movd %%mm2, 96+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
971 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
972 "movd %%mm4, 112+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
973 "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
974 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
975 "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
976 "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
977 "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
978 "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
979 "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
980 "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
981 "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
982 "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
983 "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
984 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
985 "psrad $" #shift ", %%mm5 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
986 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
987 "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
988 "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
989 "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
990 "psrad $" #shift ", %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
991 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
992 "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
993 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
994 "movd %%mm2, 32+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
995 "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
996 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
997 "movd %%mm6, 48+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
998 "movd %%mm4, 64+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
999 "movd %%mm5, 80+" #dst " \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1000
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1001 //IDCT( src0, src4, src1, src5, dst, shift)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1002 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1003 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1004 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1005 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1006 "jmp 9f \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1007
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3565
diff changeset
1008 "#" ASMALIGN(4) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1009 "3: \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1010 #undef IDCT
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1011 #define IDCT(src0, src4, src1, src5, dst, shift) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1012 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1013 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1014 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1015 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1016 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1017 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1018 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1019 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1020 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1021 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1022 "movq 64(%2), %%mm3 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1023 "pmaddwd %%mm2, %%mm3 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1024 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1025 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1026 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1027 "psrad $" #shift ", %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1028 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1029 "movq %%mm0, %%mm1 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1030 "paddd %%mm3, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1031 "psubd %%mm3, %%mm1 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1032 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1033 "psrad $" #shift ", %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1034 "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1035 "movd %%mm7, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1036 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1037 "movd %%mm0, 16+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1038 "packssdw %%mm1, %%mm1 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1039 "movd %%mm1, 96+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1040 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1041 "movd %%mm4, 112+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1042 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1043 "pmaddwd %%mm2, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1044 "pmaddwd 96(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1045 "movq %%mm5, %%mm1 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1046 "paddd %%mm4, %%mm1 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1047 "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1048 "psrad $" #shift ", %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1049 "psrad $" #shift ", %%mm5 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1050 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1051 "paddd %%mm2, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1052 "psubd %%mm2, %%mm4 \n\t" /* a3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1053 "psrad $" #shift ", %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1054 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1055 "packssdw %%mm1, %%mm1 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1056 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1057 "movd %%mm1, 32+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1058 "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1059 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1060 "movd %%mm6, 48+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1061 "movd %%mm4, 64+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1062 "movd %%mm5, 80+" #dst " \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1063
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1064
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1065 //IDCT( src0, src4, src1, src5, dst, shift)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1066 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1067 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1068 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1069 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1070 "jmp 9f \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1071
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3565
diff changeset
1072 "#" ASMALIGN(4) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1073 "5: \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1074 #undef IDCT
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1075 #define IDCT(src0, src4, src1, src5, dst, shift) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1076 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1077 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1078 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1079 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1080 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1081 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1082 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1083 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1084 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1085 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1086 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1087 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1088 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1089 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1090 "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1091 "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1092 "movq 8+" #src0 ", %%mm2 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1093 "movq 8+" #src4 ", %%mm3 \n\t" /* R6 R2 r6 r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1094 "movq 16(%2), %%mm1 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1095 "pmaddwd %%mm2, %%mm1 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1096 "movq 24(%2), %%mm7 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1097 "pmaddwd %%mm7, %%mm2 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1098 "movq 32(%2), %%mm7 \n\t" /* C6 C2 C6 C2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1099 "pmaddwd %%mm3, %%mm7 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1100 "pmaddwd 40(%2), %%mm3 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1101 "paddd %%mm1, %%mm7 \n\t" /* A0 a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1102 "paddd %%mm1, %%mm1 \n\t" /* 2C0 2c0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1103 "psubd %%mm7, %%mm1 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1104 "paddd %%mm2, %%mm3 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1105 "paddd %%mm2, %%mm2 \n\t" /* 2C1 2c1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1106 "psubd %%mm3, %%mm2 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1107 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1108 "psrad $" #shift ", %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1109 "psrad $" #shift ", %%mm3 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1110 "packssdw %%mm7, %%mm4 \n\t" /* A0 a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1111 "movq %%mm4, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1112 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1113 "packssdw %%mm3, %%mm0 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1114 "movq %%mm0, 16+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1115 "movq %%mm0, 96+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1116 "movq %%mm4, 112+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1117 "psrad $" #shift ", %%mm5 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1118 "psrad $" #shift ", %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1119 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1120 "packssdw %%mm2, %%mm5 \n\t" /* A2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1121 "movq %%mm5, 32+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1122 "psrad $" #shift ", %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1123 "packssdw %%mm1, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1124 "movq %%mm6, 48+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1125 "movq %%mm6, 64+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1126 "movq %%mm5, 80+" #dst " \n\t"
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2024
diff changeset
1127
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1128
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1129 //IDCT( src0, src4, src1, src5, dst, shift)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1130 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1131 //IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1132 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1133 //IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1134 "jmp 9f \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1135
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1136
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3565
diff changeset
1137 "#" ASMALIGN(4) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1138 "1: \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1139 #undef IDCT
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1140 #define IDCT(src0, src4, src1, src5, dst, shift) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1141 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1142 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1143 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1144 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1145 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1146 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1147 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1148 "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1149 "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1150 "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1151 "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1152 "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1153 "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1154 "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1155 "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1156 "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1157 "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1158 "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1159 "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1160 "movq 64(%2), %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1161 "pmaddwd %%mm2, %%mm1 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1162 "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1163 "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1164 "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1165 "psrad $" #shift ", %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1166 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1167 "movq %%mm0, %%mm3 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1168 "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1169 "psubd %%mm1, %%mm3 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1170 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1171 "psrad $" #shift ", %%mm3 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1172 "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1173 "movd %%mm7, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1174 "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1175 "movd %%mm0, 16+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1176 "packssdw %%mm3, %%mm3 \n\t" /* A1-B1 a1-b1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1177 "movd %%mm3, 96+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1178 "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1179 "movd %%mm4, 112+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1180 "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1181 "pmaddwd %%mm2, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1182 "pmaddwd 96(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1183 "movq %%mm5, %%mm3 \n\t" /* A2 a2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1184 "paddd %%mm4, %%mm3 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1185 "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1186 "psrad $" #shift ", %%mm3 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1187 "psrad $" #shift ", %%mm5 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1188 "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1189 "paddd %%mm2, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1190 "psubd %%mm2, %%mm4 \n\t" /* a3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1191 "psrad $" #shift ", %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1192 "packssdw %%mm3, %%mm3 \n\t" /* A2+B2 a2+b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1193 "movd %%mm3, 32+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1194 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1195 "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1196 "movd %%mm6, 48+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1197 "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1198 "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1199 "movd %%mm4, 64+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1200 "movd %%mm5, 80+" #dst " \n\t"
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2024
diff changeset
1201
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1202
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1203 //IDCT( src0, src4, src1, src5, dst, shift)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1204 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1205 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1206 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1207 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1208 "jmp 9f \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1209
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1210
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3565
diff changeset
1211 "#" ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1212 "7: \n\t"
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1213 #undef IDCT
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1214 #define IDCT(src0, src4, src1, src5, dst, shift) \
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1215 "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1216 "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1217 "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1218 "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1219 "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1220 "psrad $" #shift ", %%mm4 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1221 "psrad $" #shift ", %%mm0 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1222 "movq 8+" #src0 ", %%mm2 \n\t" /* R4 R0 r4 r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1223 "movq 16(%2), %%mm1 \n\t" /* C4 C4 C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1224 "pmaddwd %%mm2, %%mm1 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1225 "movq 24(%2), %%mm7 \n\t" /* -C4 C4 -C4 C4 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1226 "pmaddwd %%mm7, %%mm2 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1227 "movq 32(%2), %%mm7 \n\t" /* C6 C2 C6 C2 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1228 "psrad $" #shift ", %%mm1 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1229 "packssdw %%mm1, %%mm4 \n\t" /* A0 a0 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1230 "movq %%mm4, " #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1231 "psrad $" #shift ", %%mm2 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1232 "packssdw %%mm2, %%mm0 \n\t" /* A1 a1 */\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1233 "movq %%mm0, 16+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1234 "movq %%mm0, 96+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1235 "movq %%mm4, 112+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1236 "movq %%mm0, 32+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1237 "movq %%mm4, 48+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1238 "movq %%mm4, 64+" #dst " \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1239 "movq %%mm0, 80+" #dst " \n\t"
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1240
3565
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1241 //IDCT( src0, src4, src1, src5, dst, shift)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1242 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1243 //IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1244 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
f086f8868bb6 Support for MacIntel, take xx: '/nop' illegal for old versions of GAS
gpoirier
parents: 3036
diff changeset
1245 //IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1246
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1247
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1248 #endif
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1249
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1250 /*
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1251 Input
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1252 00 40 04 44 20 60 24 64
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1253 10 30 14 34 50 70 54 74
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1254 01 41 03 43 21 61 23 63
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1255 11 31 13 33 51 71 53 73
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1256 02 42 06 46 22 62 26 66
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1257 12 32 16 36 52 72 56 76
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1258 05 45 07 47 25 65 27 67
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1259 15 35 17 37 55 75 57 77
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2024
diff changeset
1260
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1261 Temp
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1262 00 04 10 14 20 24 30 34
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1263 40 44 50 54 60 64 70 74
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1264 01 03 11 13 21 23 31 33
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1265 41 43 51 53 61 63 71 73
209
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1266 02 06 12 16 22 26 32 36
c0d8ecae7ac5 (commit by michael)
arpi_esp
parents: 175
diff changeset
1267 42 46 52 56 62 66 72 76
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1268 05 07 15 17 25 27 35 37
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1269 45 47 55 57 65 67 75 77
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1270 */
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1271
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1272 "9: \n\t"
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1273 :: "r" (block), "r" (temp), "r" (coeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1274 : "%eax"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
1275 );
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1276 }
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1277
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1278 void ff_simple_idct_mmx(int16_t *block)
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1279 {
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1280 idct(block);
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1281 }
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1282
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1283 //FIXME merge add/put into the idct
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1284
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 854
diff changeset
1285 void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1286 {
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1287 idct(block);
854
3034f1816596 * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 706
diff changeset
1288 put_pixels_clamped_mmx(block, dest, line_size);
175
bd77d3cbb233 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
diff changeset
1289 }
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 854
diff changeset
1290 void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1291 {
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1292 idct(block);
854
3034f1816596 * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 706
diff changeset
1293 add_pixels_clamped_mmx(block, dest, line_size);
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 429
diff changeset
1294 }