annotate i386/idct_mmx.c @ 5875:5a61e8e2f65c libavcodec

Remove libvorbis Vorbis decoding support. Our native decoder is complete and has no known bugs, any remaining issues will hopefully be uncovered now.
author diego
date Sun, 04 Nov 2007 12:55:32 +0000
parents 0244bba24b43
children 80103098c797
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
1 /*
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
2 * idct_mmx.c
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
3 * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
4 *
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
5 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
6 *
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
7 * mpeg2dec is free software; you can redistribute it and/or modify
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
8 * it under the terms of the GNU General Public License as published by
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
9 * the Free Software Foundation; either version 2 of the License, or
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
10 * (at your option) any later version.
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
11 *
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
12 * mpeg2dec is distributed in the hope that it will be useful,
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
15 * GNU General Public License for more details.
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
16 *
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
17 * You should have received a copy of the GNU General Public License
4384
65fd98452a4e Fix some more license headers.
diego
parents: 3717
diff changeset
18 * along with mpeg2dec; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
20 */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
21
2817
b128802eb77b libavutil: Utility code from libavcodec moved to a separate library.
al
parents: 2754
diff changeset
22 #include "common.h"
5010
d5ba514e3f4a Add libavcodec to compiler include flags in order to simplify header
diego
parents: 4384
diff changeset
23 #include "dsputil.h"
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
24
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
25 #include "mmx.h"
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
26
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
27 #define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
28
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
29 #define ROW_SHIFT 11
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
30 #define COL_SHIFT 6
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
31
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
32 #define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
33 #define rounder(bias) {round (bias), round (bias)}
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
34
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
35 #if 0
5129
0244bba24b43 misc typo fixes
diego
parents: 5010
diff changeset
36 /* C row IDCT - it is just here to document the MMXEXT and MMX versions */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
37 static inline void idct_row (int16_t * row, int offset,
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
38 int16_t * table, int32_t * rounder)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
39 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
40 int C1, C2, C3, C4, C5, C6, C7;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
41 int a0, a1, a2, a3, b0, b1, b2, b3;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
42
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
43 row += offset;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
44
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
45 C1 = table[1];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
46 C2 = table[2];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
47 C3 = table[3];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
48 C4 = table[4];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
49 C5 = table[5];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
50 C6 = table[6];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
51 C7 = table[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
52
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
53 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
54 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
55 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
56 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
57
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
58 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
59 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
60 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
61 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
62
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
63 row[0] = (a0 + b0) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
64 row[1] = (a1 + b1) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
65 row[2] = (a2 + b2) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
66 row[3] = (a3 + b3) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
67 row[4] = (a3 - b3) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
68 row[5] = (a2 - b2) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
69 row[6] = (a1 - b1) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
70 row[7] = (a0 - b0) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
71 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
72 #endif
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
73
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
74
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
75 /* MMXEXT row IDCT */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
76
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
77 #define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
78 c4, c6, c4, c6, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
79 c1, c3, -c1, -c5, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
80 c5, c7, c3, -c7, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
81 c4, -c6, c4, -c6, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
82 -c4, c2, c4, -c2, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
83 c5, -c1, c3, -c1, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
84 c7, c3, c7, -c5 }
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
85
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
86 static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
87 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
88 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
89
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
90 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
91 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
92
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
93 movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
94 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
95
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
96 movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
97 pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
98
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
99 pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
100 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
101
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
102 static inline void mmxext_row (const int16_t * table, const int32_t * rounder)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
103 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
104 movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
105 pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
106
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
107 pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
108 pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
109
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
110 movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
111 pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
112
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
113 paddd_m2r (*rounder, mm3); // mm3 += rounder
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
114 pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
115
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
116 pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
117 paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
118
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
119 pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
120 movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
121
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
122 pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
123 paddd_r2r (mm7, mm1); // mm1 = b1 b0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
124
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
125 paddd_m2r (*rounder, mm0); // mm0 += rounder
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
126 psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
127
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
128 psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
129 paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
130
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
131 paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
132 psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
133
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
134 paddd_r2r (mm6, mm5); // mm5 = b3 b2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
135 movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
136
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
137 paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
138 psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
139 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
140
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
141 static inline void mmxext_row_tail (int16_t * row, int store)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
142 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
143 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
144
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
145 psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
146
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
147 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
148
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
149 packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
150
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
151 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
152 pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
153
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
154 /* slot */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
155
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
156 movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
157 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
158
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
159 static inline void mmxext_row_mid (int16_t * row, int store,
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
160 int offset, const int16_t * table)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
161 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
162 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
163 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
164
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
165 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
166 psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
167
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
168 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
169 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
170
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
171 packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
172 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
173
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
174 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
175 pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
176
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
177 movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
178 movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
179
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
180 pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
181
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
182 movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
183 pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
184 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
185
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
186
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
187 /* MMX row IDCT */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
188
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
189 #define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
190 c4, c6, -c4, -c2, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
191 c1, c3, c3, -c7, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
192 c5, c7, -c1, -c5, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
193 c4, -c6, c4, -c2, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
194 -c4, c2, c4, -c6, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
195 c5, -c1, c7, -c5, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
196 c7, c3, c3, -c1 }
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
197
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
198 static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
199 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
200 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
201
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
202 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
203 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
204
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
205 movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
206 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
207
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
208 punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
209
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
210 movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
211 pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
212
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
213 movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
214 punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
215 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
216
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
217 static inline void mmx_row (const int16_t * table, const int32_t * rounder)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
218 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
219 pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
220 punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
221
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
222 pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
223 punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
224
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
225 movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
226 pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
227
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
228 paddd_m2r (*rounder, mm3); // mm3 += rounder
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
229 pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
230
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
231 pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
232 paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
233
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
234 pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
235 movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
236
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
237 pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
238 paddd_r2r (mm7, mm1); // mm1 = b1 b0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
239
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
240 paddd_m2r (*rounder, mm0); // mm0 += rounder
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
241 psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
242
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
243 psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
244 paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
245
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
246 paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
247 psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
248
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
249 paddd_r2r (mm6, mm5); // mm5 = b3 b2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
250 movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
251
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
252 paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
253 psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
254 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
255
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
256 static inline void mmx_row_tail (int16_t * row, int store)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
257 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
258 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
259
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
260 psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
261
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
262 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
263
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
264 packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
265
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
266 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
267 movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
268
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
269 pslld_i2r (16, mm7); // mm7 = y7 0 y5 0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
270
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
271 psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
272
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
273 por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
274
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
275 /* slot */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
276
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
277 movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
278 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
279
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
280 static inline void mmx_row_mid (int16_t * row, int store,
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
281 int offset, const int16_t * table)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
282 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
283 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
284 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
285
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
286 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
287 psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
288
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
289 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
290 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
291
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
292 packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
293 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
294
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
295 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
296 movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
297
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
298 punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
299 psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
300
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
301 movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
302 pslld_i2r (16, mm1); // mm1 = y7 0 y5 0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
303
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
304 movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
305 por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
306
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
307 movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
308 punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
309
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
310 movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
311 pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
312 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
313
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
314
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
315 #if 0
5129
0244bba24b43 misc typo fixes
diego
parents: 5010
diff changeset
316 // C column IDCT - it is just here to document the MMXEXT and MMX versions
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
317 static inline void idct_col (int16_t * col, int offset)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
318 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
319 /* multiplication - as implemented on mmx */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
320 #define F(c,x) (((c) * (x)) >> 16)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
321
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
322 /* saturation - it helps us handle torture test cases */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
323 #define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
324
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
325 int16_t x0, x1, x2, x3, x4, x5, x6, x7;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
326 int16_t y0, y1, y2, y3, y4, y5, y6, y7;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
327 int16_t a0, a1, a2, a3, b0, b1, b2, b3;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
328 int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
329
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
330 col += offset;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
331
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
332 x0 = col[0*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
333 x1 = col[1*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
334 x2 = col[2*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
335 x3 = col[3*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
336 x4 = col[4*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
337 x5 = col[5*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
338 x6 = col[6*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
339 x7 = col[7*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
340
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
341 u04 = S (x0 + x4);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
342 v04 = S (x0 - x4);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
343 u26 = S (F (T2, x6) + x2);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
344 v26 = S (F (T2, x2) - x6);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
345
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
346 a0 = S (u04 + u26);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
347 a1 = S (v04 + v26);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
348 a2 = S (v04 - v26);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
349 a3 = S (u04 - u26);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
350
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
351 u17 = S (F (T1, x7) + x1);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
352 v17 = S (F (T1, x1) - x7);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
353 u35 = S (F (T3, x5) + x3);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
354 v35 = S (F (T3, x3) - x5);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
355
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
356 b0 = S (u17 + u35);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
357 b3 = S (v17 - v35);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
358 u12 = S (u17 - u35);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
359 v12 = S (v17 + v35);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
360 u12 = S (2 * F (C4, u12));
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
361 v12 = S (2 * F (C4, v12));
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
362 b1 = S (u12 + v12);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
363 b2 = S (u12 - v12);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
364
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
365 y0 = S (a0 + b0) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
366 y1 = S (a1 + b1) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
367 y2 = S (a2 + b2) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
368 y3 = S (a3 + b3) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
369
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
370 y4 = S (a3 - b3) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
371 y5 = S (a2 - b2) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
372 y6 = S (a1 - b1) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
373 y7 = S (a0 - b0) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
374
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
375 col[0*8] = y0;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
376 col[1*8] = y1;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
377 col[2*8] = y2;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
378 col[3*8] = y3;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
379 col[4*8] = y4;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
380 col[5*8] = y5;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
381 col[6*8] = y6;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
382 col[7*8] = y7;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
383 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
384 #endif
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
385
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
386
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
387 // MMX column IDCT
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
388 static inline void idct_col (int16_t * col, int offset)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
389 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
390 #define T1 13036
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
391 #define T2 27146
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
392 #define T3 43790
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
393 #define C4 23170
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
394
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
395 static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
396 static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
397 static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
398 static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
399
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
400 /* column code adapted from peter gubanov */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
401 /* http://www.elecard.com/peter/idct.shtml */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
402
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
403 movq_m2r (*_T1, mm0); // mm0 = T1
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
404
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
405 movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
406 movq_r2r (mm0, mm2); // mm2 = T1
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
407
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
408 movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
409 pmulhw_r2r (mm1, mm0); // mm0 = T1*x1
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
410
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
411 movq_m2r (*_T3, mm5); // mm5 = T3
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
412 pmulhw_r2r (mm4, mm2); // mm2 = T1*x7
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
413
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
414 movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
415 movq_r2r (mm5, mm7); // mm7 = T3-1
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
416
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
417 movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
418 psubsw_r2r (mm4, mm0); // mm0 = v17
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
419
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
420 movq_m2r (*_T2, mm4); // mm4 = T2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
421 pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
422
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
423 paddsw_r2r (mm2, mm1); // mm1 = u17
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
424 pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
425
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
426 /* slot */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
427
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
428 movq_r2r (mm4, mm2); // mm2 = T2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
429 paddsw_r2r (mm3, mm5); // mm5 = T3*x3
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
430
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
431 pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
432 paddsw_r2r (mm6, mm7); // mm7 = T3*x5
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
433
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
434 psubsw_r2r (mm6, mm5); // mm5 = v35
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
435 paddsw_r2r (mm3, mm7); // mm7 = u35
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
436
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
437 movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
438 movq_r2r (mm0, mm6); // mm6 = v17
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
439
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
440 pmulhw_r2r (mm3, mm2); // mm2 = T2*x6
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
441 psubsw_r2r (mm5, mm0); // mm0 = b3
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
442
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
443 psubsw_r2r (mm3, mm4); // mm4 = v26
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
444 paddsw_r2r (mm6, mm5); // mm5 = v12
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
445
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
446 movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
447 movq_r2r (mm1, mm6); // mm6 = u17
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
448
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
449 paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
450 paddsw_r2r (mm7, mm6); // mm6 = b0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
451
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
452 psubsw_r2r (mm7, mm1); // mm1 = u12
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
453 movq_r2r (mm1, mm7); // mm7 = u12
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
454
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
455 movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
456 paddsw_r2r (mm5, mm1); // mm1 = u12+v12
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
457
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
458 movq_m2r (*_C4, mm0); // mm0 = C4/2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
459 psubsw_r2r (mm5, mm7); // mm7 = u12-v12
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
460
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
461 movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
462 pmulhw_r2r (mm0, mm1); // mm1 = b1/2
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
463
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
464 movq_r2r (mm4, mm6); // mm6 = v26
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
465 pmulhw_r2r (mm0, mm7); // mm7 = b2/2
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
466
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
467 movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
468 movq_r2r (mm3, mm0); // mm0 = x0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
469
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
470 psubsw_r2r (mm5, mm3); // mm3 = v04
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
471 paddsw_r2r (mm5, mm0); // mm0 = u04
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
472
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
473 paddsw_r2r (mm3, mm4); // mm4 = a1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
474 movq_r2r (mm0, mm5); // mm5 = u04
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
475
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
476 psubsw_r2r (mm6, mm3); // mm3 = a2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
477 paddsw_r2r (mm2, mm5); // mm5 = a0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
478
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
479 paddsw_r2r (mm1, mm1); // mm1 = b1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
480 psubsw_r2r (mm2, mm0); // mm0 = a3
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
481
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
482 paddsw_r2r (mm7, mm7); // mm7 = b2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
483 movq_r2r (mm3, mm2); // mm2 = a2
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
484
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
485 movq_r2r (mm4, mm6); // mm6 = a1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
486 paddsw_r2r (mm7, mm3); // mm3 = a2+b2
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
487
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
488 psraw_i2r (COL_SHIFT, mm3); // mm3 = y2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
489 paddsw_r2r (mm1, mm4); // mm4 = a1+b1
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
490
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
491 psraw_i2r (COL_SHIFT, mm4); // mm4 = y1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
492 psubsw_r2r (mm1, mm6); // mm6 = a1-b1
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
493
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
494 movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
495 psubsw_r2r (mm7, mm2); // mm2 = a2-b2
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
496
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
497 psraw_i2r (COL_SHIFT, mm6); // mm6 = y6
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
498 movq_r2r (mm5, mm7); // mm7 = a0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
499
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
500 movq_r2m (mm4, *(col+offset+1*8)); // save y1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
501 psraw_i2r (COL_SHIFT, mm2); // mm2 = y5
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
502
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
503 movq_r2m (mm3, *(col+offset+2*8)); // save y2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
504 paddsw_r2r (mm1, mm5); // mm5 = a0+b0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
505
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
506 movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
507 psubsw_r2r (mm1, mm7); // mm7 = a0-b0
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
508
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
509 psraw_i2r (COL_SHIFT, mm5); // mm5 = y0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
510 movq_r2r (mm0, mm3); // mm3 = a3
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
511
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
512 movq_r2m (mm2, *(col+offset+5*8)); // save y5
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
513 psubsw_r2r (mm4, mm3); // mm3 = a3-b3
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
514
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
515 psraw_i2r (COL_SHIFT, mm7); // mm7 = y7
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
516 paddsw_r2r (mm0, mm4); // mm4 = a3+b3
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
517
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
518 movq_r2m (mm5, *(col+offset+0*8)); // save y0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
519 psraw_i2r (COL_SHIFT, mm3); // mm3 = y4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
520
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
521 movq_r2m (mm6, *(col+offset+6*8)); // save y6
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
522 psraw_i2r (COL_SHIFT, mm4); // mm4 = y3
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
523
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
524 movq_r2m (mm7, *(col+offset+7*8)); // save y7
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
525
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
526 movq_r2m (mm3, *(col+offset+4*8)); // save y4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
527
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
528 movq_r2m (mm4, *(col+offset+3*8)); // save y3
436
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
529
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
530 #undef T1
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
531 #undef T2
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
532 #undef T3
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
533 #undef C4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
534 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
535
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
536 static const int32_t rounder0[] ATTR_ALIGN(8) =
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
537 rounder ((1 << (COL_SHIFT - 1)) - 0.5);
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
538 static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
539 static const int32_t rounder1[] ATTR_ALIGN(8) =
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
540 rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
541 static const int32_t rounder7[] ATTR_ALIGN(8) =
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
542 rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
543 static const int32_t rounder2[] ATTR_ALIGN(8) =
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
544 rounder (0.60355339059); /* C2 * (C6+C2)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
545 static const int32_t rounder6[] ATTR_ALIGN(8) =
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
546 rounder (-0.25); /* C2 * (C6-C2)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
547 static const int32_t rounder3[] ATTR_ALIGN(8) =
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
548 rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
549 static const int32_t rounder5[] ATTR_ALIGN(8) =
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
550 rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
551
436
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
552 #undef COL_SHIFT
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
553 #undef ROW_SHIFT
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
554
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
555 #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
556 void idct (int16_t * block) \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
557 { \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
558 static const int16_t table04[] ATTR_ALIGN(16) = \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
559 table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
560 static const int16_t table17[] ATTR_ALIGN(16) = \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
561 table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
562 static const int16_t table26[] ATTR_ALIGN(16) = \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
563 table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
564 static const int16_t table35[] ATTR_ALIGN(16) = \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
565 table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
566 \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
567 idct_row_head (block, 0*8, table04); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
568 idct_row (table04, rounder0); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
569 idct_row_mid (block, 0*8, 4*8, table04); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
570 idct_row (table04, rounder4); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
571 idct_row_mid (block, 4*8, 1*8, table17); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
572 idct_row (table17, rounder1); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
573 idct_row_mid (block, 1*8, 7*8, table17); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
574 idct_row (table17, rounder7); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
575 idct_row_mid (block, 7*8, 2*8, table26); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
576 idct_row (table26, rounder2); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
577 idct_row_mid (block, 2*8, 6*8, table26); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
578 idct_row (table26, rounder6); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
579 idct_row_mid (block, 6*8, 3*8, table35); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
580 idct_row (table35, rounder3); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
581 idct_row_mid (block, 3*8, 5*8, table35); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
582 idct_row (table35, rounder5); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
583 idct_row_tail (block, 5*8); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
584 \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
585 idct_col (block, 0); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
586 idct_col (block, 4); \
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
587 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
588
2024
f65d87bfdd5a some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents: 436
diff changeset
589 void ff_mmx_idct(DCTELEM *block);
f65d87bfdd5a some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents: 436
diff changeset
590 void ff_mmxext_idct(DCTELEM *block);
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
591
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
592 declare_idct (ff_mmxext_idct, mmxext_table,
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
593 mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
594
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
595 declare_idct (ff_mmx_idct, mmx_table,
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
596 mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
2745
42d3e9068e32 MMX for H.264 iDCT (adapted from x264)
lorenm
parents: 2024
diff changeset
597