annotate i386/idct_mmx.c @ 2918:13dcd22f0816 libavcodec

Add DTS_INC to the CFLAGS for DTS. This is only set by external configure programs (such as the MPlayer one) and thus somewhat hackish. We already do this for things like MLIB_INC and IPP_INC so it should be acceptable.
author diego
date Sun, 23 Oct 2005 18:16:53 +0000
parents 00ff749b33b6
children bfabfdf9ce55
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
1 /*
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
2 * Note: For libavcodec, this code can also be used under the LGPL license
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
3 */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
4 /*
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
5 * idct_mmx.c
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
6 * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
7 *
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
8 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
9 *
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
10 * mpeg2dec is free software; you can redistribute it and/or modify
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
11 * it under the terms of the GNU General Public License as published by
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
12 * the Free Software Foundation; either version 2 of the License, or
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
13 * (at your option) any later version.
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
14 *
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
15 * mpeg2dec is distributed in the hope that it will be useful,
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
18 * GNU General Public License for more details.
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
19 *
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
20 * You should have received a copy of the GNU General Public License
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
21 * along with this program; if not, write to the Free Software
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
23 */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
24
2817
b128802eb77b libavutil: Utility code from libavcodec moved to a separate library.
al
parents: 2754
diff changeset
25 #include "common.h"
2024
f65d87bfdd5a some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents: 436
diff changeset
26 #include "../dsputil.h"
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
27
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
28 #include "mmx.h"
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
29
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
30 #define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
31
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
32 #define ROW_SHIFT 11
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
33 #define COL_SHIFT 6
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
34
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
35 #define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
36 #define rounder(bias) {round (bias), round (bias)}
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
37
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
38 #if 0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
39 /* C row IDCT - its just here to document the MMXEXT and MMX versions */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
40 static inline void idct_row (int16_t * row, int offset,
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
41 int16_t * table, int32_t * rounder)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
42 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
43 int C1, C2, C3, C4, C5, C6, C7;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
44 int a0, a1, a2, a3, b0, b1, b2, b3;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
45
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
46 row += offset;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
47
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
48 C1 = table[1];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
49 C2 = table[2];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
50 C3 = table[3];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
51 C4 = table[4];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
52 C5 = table[5];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
53 C6 = table[6];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
54 C7 = table[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
55
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
56 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
57 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
58 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
59 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
60
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
61 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
62 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
63 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
64 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
65
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
66 row[0] = (a0 + b0) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
67 row[1] = (a1 + b1) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
68 row[2] = (a2 + b2) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
69 row[3] = (a3 + b3) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
70 row[4] = (a3 - b3) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
71 row[5] = (a2 - b2) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
72 row[6] = (a1 - b1) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
73 row[7] = (a0 - b0) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
74 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
75 #endif
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
76
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
77
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
78 /* MMXEXT row IDCT */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
79
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
80 #define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
81 c4, c6, c4, c6, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
82 c1, c3, -c1, -c5, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
83 c5, c7, c3, -c7, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
84 c4, -c6, c4, -c6, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
85 -c4, c2, c4, -c2, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
86 c5, -c1, c3, -c1, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
87 c7, c3, c7, -c5 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
88
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
89 static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
90 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
91 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
92
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
93 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
94 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
95
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
96 movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
97 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
98
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
99 movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
100 pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
101
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
102 pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
103 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
104
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
105 static inline void mmxext_row (const int16_t * table, const int32_t * rounder)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
106 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
107 movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
108 pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
109
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
110 pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
111 pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
112
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
113 movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
114 pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
115
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
116 paddd_m2r (*rounder, mm3); // mm3 += rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
117 pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
118
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
119 pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
120 paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
121
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
122 pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
123 movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
124
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
125 pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
126 paddd_r2r (mm7, mm1); // mm1 = b1 b0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
127
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
128 paddd_m2r (*rounder, mm0); // mm0 += rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
129 psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
130
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
131 psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
132 paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
133
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
134 paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
135 psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
136
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
137 paddd_r2r (mm6, mm5); // mm5 = b3 b2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
138 movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
139
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
140 paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
141 psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
142 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
143
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
144 static inline void mmxext_row_tail (int16_t * row, int store)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
145 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
146 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
147
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
148 psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
149
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
150 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
151
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
152 packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
153
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
154 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
155 pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
156
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
157 /* slot */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
158
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
159 movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
160 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
161
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
162 static inline void mmxext_row_mid (int16_t * row, int store,
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
163 int offset, const int16_t * table)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
164 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
165 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
166 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
167
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
168 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
169 psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
170
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
171 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
172 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
173
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
174 packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
175 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
176
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
177 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
178 pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
179
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
180 movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
181 movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
182
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
183 pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
184
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
185 movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
186 pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
187 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
188
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
189
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
190 /* MMX row IDCT */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
191
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
192 #define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
193 c4, c6, -c4, -c2, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
194 c1, c3, c3, -c7, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
195 c5, c7, -c1, -c5, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
196 c4, -c6, c4, -c2, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
197 -c4, c2, c4, -c6, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
198 c5, -c1, c7, -c5, \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
199 c7, c3, c3, -c1 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
200
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
201 static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
202 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
203 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
204
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
205 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
206 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
207
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
208 movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
209 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
210
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
211 punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
212
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
213 movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
214 pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
215
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
216 movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
217 punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
218 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
219
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
220 static inline void mmx_row (const int16_t * table, const int32_t * rounder)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
221 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
222 pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
223 punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
224
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
225 pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
226 punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
227
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
228 movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
229 pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
230
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
231 paddd_m2r (*rounder, mm3); // mm3 += rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
232 pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
233
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
234 pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
235 paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
236
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
237 pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
238 movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
239
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
240 pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
241 paddd_r2r (mm7, mm1); // mm1 = b1 b0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
242
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
243 paddd_m2r (*rounder, mm0); // mm0 += rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
244 psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
245
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
246 psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
247 paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
248
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
249 paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
250 psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
251
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
252 paddd_r2r (mm6, mm5); // mm5 = b3 b2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
253 movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
254
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
255 paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
256 psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
257 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
258
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
259 static inline void mmx_row_tail (int16_t * row, int store)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
260 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
261 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
262
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
263 psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
264
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
265 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
266
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
267 packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
268
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
269 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
270 movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
271
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
272 pslld_i2r (16, mm7); // mm7 = y7 0 y5 0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
273
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
274 psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
275
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
276 por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
277
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
278 /* slot */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
279
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
280 movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
281 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
282
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
283 static inline void mmx_row_mid (int16_t * row, int store,
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
284 int offset, const int16_t * table)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
285 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
286 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
287 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
288
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
289 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
290 psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
291
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
292 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
293 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
294
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
295 packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
296 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
297
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
298 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
299 movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
300
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
301 punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
302 psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
303
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
304 movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
305 pslld_i2r (16, mm1); // mm1 = y7 0 y5 0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
306
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
307 movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
308 por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
309
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
310 movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
311 punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
312
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
313 movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
314 pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
315 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
316
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
317
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
318 #if 0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
319 // C column IDCT - its just here to document the MMXEXT and MMX versions
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
320 static inline void idct_col (int16_t * col, int offset)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
321 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
322 /* multiplication - as implemented on mmx */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
323 #define F(c,x) (((c) * (x)) >> 16)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
324
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
325 /* saturation - it helps us handle torture test cases */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
326 #define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
327
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
328 int16_t x0, x1, x2, x3, x4, x5, x6, x7;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
329 int16_t y0, y1, y2, y3, y4, y5, y6, y7;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
330 int16_t a0, a1, a2, a3, b0, b1, b2, b3;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
331 int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
332
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
333 col += offset;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
334
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
335 x0 = col[0*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
336 x1 = col[1*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
337 x2 = col[2*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
338 x3 = col[3*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
339 x4 = col[4*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
340 x5 = col[5*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
341 x6 = col[6*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
342 x7 = col[7*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
343
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
344 u04 = S (x0 + x4);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
345 v04 = S (x0 - x4);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
346 u26 = S (F (T2, x6) + x2);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
347 v26 = S (F (T2, x2) - x6);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
348
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
349 a0 = S (u04 + u26);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
350 a1 = S (v04 + v26);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
351 a2 = S (v04 - v26);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
352 a3 = S (u04 - u26);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
353
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
354 u17 = S (F (T1, x7) + x1);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
355 v17 = S (F (T1, x1) - x7);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
356 u35 = S (F (T3, x5) + x3);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
357 v35 = S (F (T3, x3) - x5);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
358
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
359 b0 = S (u17 + u35);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
360 b3 = S (v17 - v35);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
361 u12 = S (u17 - u35);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
362 v12 = S (v17 + v35);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
363 u12 = S (2 * F (C4, u12));
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
364 v12 = S (2 * F (C4, v12));
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
365 b1 = S (u12 + v12);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
366 b2 = S (u12 - v12);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
367
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
368 y0 = S (a0 + b0) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
369 y1 = S (a1 + b1) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
370 y2 = S (a2 + b2) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
371 y3 = S (a3 + b3) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
372
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
373 y4 = S (a3 - b3) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
374 y5 = S (a2 - b2) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
375 y6 = S (a1 - b1) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
376 y7 = S (a0 - b0) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
377
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
378 col[0*8] = y0;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
379 col[1*8] = y1;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
380 col[2*8] = y2;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
381 col[3*8] = y3;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
382 col[4*8] = y4;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
383 col[5*8] = y5;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
384 col[6*8] = y6;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
385 col[7*8] = y7;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
386 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
387 #endif
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
388
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
389
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
390 // MMX column IDCT
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
391 static inline void idct_col (int16_t * col, int offset)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
392 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
393 #define T1 13036
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
394 #define T2 27146
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
395 #define T3 43790
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
396 #define C4 23170
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
397
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
398 static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
399 static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
400 static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
401 static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
402
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
403 /* column code adapted from peter gubanov */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
404 /* http://www.elecard.com/peter/idct.shtml */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
405
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
406 movq_m2r (*_T1, mm0); // mm0 = T1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
407
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
408 movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
409 movq_r2r (mm0, mm2); // mm2 = T1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
410
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
411 movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
412 pmulhw_r2r (mm1, mm0); // mm0 = T1*x1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
413
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
414 movq_m2r (*_T3, mm5); // mm5 = T3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
415 pmulhw_r2r (mm4, mm2); // mm2 = T1*x7
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
416
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
417 movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
418 movq_r2r (mm5, mm7); // mm7 = T3-1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
419
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
420 movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
421 psubsw_r2r (mm4, mm0); // mm0 = v17
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
422
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
423 movq_m2r (*_T2, mm4); // mm4 = T2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
424 pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
425
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
426 paddsw_r2r (mm2, mm1); // mm1 = u17
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
427 pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
428
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
429 /* slot */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
430
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
431 movq_r2r (mm4, mm2); // mm2 = T2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
432 paddsw_r2r (mm3, mm5); // mm5 = T3*x3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
433
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
434 pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
435 paddsw_r2r (mm6, mm7); // mm7 = T3*x5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
436
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
437 psubsw_r2r (mm6, mm5); // mm5 = v35
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
438 paddsw_r2r (mm3, mm7); // mm7 = u35
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
439
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
440 movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
441 movq_r2r (mm0, mm6); // mm6 = v17
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
442
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
443 pmulhw_r2r (mm3, mm2); // mm2 = T2*x6
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
444 psubsw_r2r (mm5, mm0); // mm0 = b3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
445
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
446 psubsw_r2r (mm3, mm4); // mm4 = v26
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
447 paddsw_r2r (mm6, mm5); // mm5 = v12
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
448
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
449 movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
450 movq_r2r (mm1, mm6); // mm6 = u17
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
451
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
452 paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
453 paddsw_r2r (mm7, mm6); // mm6 = b0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
454
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
455 psubsw_r2r (mm7, mm1); // mm1 = u12
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
456 movq_r2r (mm1, mm7); // mm7 = u12
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
457
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
458 movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
459 paddsw_r2r (mm5, mm1); // mm1 = u12+v12
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
460
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
461 movq_m2r (*_C4, mm0); // mm0 = C4/2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
462 psubsw_r2r (mm5, mm7); // mm7 = u12-v12
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
463
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
464 movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
465 pmulhw_r2r (mm0, mm1); // mm1 = b1/2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
466
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
467 movq_r2r (mm4, mm6); // mm6 = v26
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
468 pmulhw_r2r (mm0, mm7); // mm7 = b2/2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
469
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
470 movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
471 movq_r2r (mm3, mm0); // mm0 = x0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
472
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
473 psubsw_r2r (mm5, mm3); // mm3 = v04
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
474 paddsw_r2r (mm5, mm0); // mm0 = u04
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
475
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
476 paddsw_r2r (mm3, mm4); // mm4 = a1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
477 movq_r2r (mm0, mm5); // mm5 = u04
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
478
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
479 psubsw_r2r (mm6, mm3); // mm3 = a2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
480 paddsw_r2r (mm2, mm5); // mm5 = a0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
481
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
482 paddsw_r2r (mm1, mm1); // mm1 = b1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
483 psubsw_r2r (mm2, mm0); // mm0 = a3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
484
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
485 paddsw_r2r (mm7, mm7); // mm7 = b2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
486 movq_r2r (mm3, mm2); // mm2 = a2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
487
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
488 movq_r2r (mm4, mm6); // mm6 = a1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
489 paddsw_r2r (mm7, mm3); // mm3 = a2+b2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
490
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
491 psraw_i2r (COL_SHIFT, mm3); // mm3 = y2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
492 paddsw_r2r (mm1, mm4); // mm4 = a1+b1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
493
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
494 psraw_i2r (COL_SHIFT, mm4); // mm4 = y1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
495 psubsw_r2r (mm1, mm6); // mm6 = a1-b1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
496
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
497 movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
498 psubsw_r2r (mm7, mm2); // mm2 = a2-b2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
499
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
500 psraw_i2r (COL_SHIFT, mm6); // mm6 = y6
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
501 movq_r2r (mm5, mm7); // mm7 = a0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
502
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
503 movq_r2m (mm4, *(col+offset+1*8)); // save y1
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
504 psraw_i2r (COL_SHIFT, mm2); // mm2 = y5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
505
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
506 movq_r2m (mm3, *(col+offset+2*8)); // save y2
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
507 paddsw_r2r (mm1, mm5); // mm5 = a0+b0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
508
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
509 movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
510 psubsw_r2r (mm1, mm7); // mm7 = a0-b0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
511
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
512 psraw_i2r (COL_SHIFT, mm5); // mm5 = y0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
513 movq_r2r (mm0, mm3); // mm3 = a3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
514
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
515 movq_r2m (mm2, *(col+offset+5*8)); // save y5
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
516 psubsw_r2r (mm4, mm3); // mm3 = a3-b3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
517
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
518 psraw_i2r (COL_SHIFT, mm7); // mm7 = y7
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
519 paddsw_r2r (mm0, mm4); // mm4 = a3+b3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
520
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
521 movq_r2m (mm5, *(col+offset+0*8)); // save y0
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
522 psraw_i2r (COL_SHIFT, mm3); // mm3 = y4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
523
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
524 movq_r2m (mm6, *(col+offset+6*8)); // save y6
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
525 psraw_i2r (COL_SHIFT, mm4); // mm4 = y3
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
526
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
527 movq_r2m (mm7, *(col+offset+7*8)); // save y7
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
528
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
529 movq_r2m (mm3, *(col+offset+4*8)); // save y4
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
530
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
531 movq_r2m (mm4, *(col+offset+3*8)); // save y3
436
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
532
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
533 #undef T1
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
534 #undef T2
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
535 #undef T3
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
536 #undef C4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
537 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
538
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
539 static const int32_t rounder0[] ATTR_ALIGN(8) =
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
540 rounder ((1 << (COL_SHIFT - 1)) - 0.5);
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
541 static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
542 static const int32_t rounder1[] ATTR_ALIGN(8) =
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
543 rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
544 static const int32_t rounder7[] ATTR_ALIGN(8) =
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
545 rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
546 static const int32_t rounder2[] ATTR_ALIGN(8) =
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
547 rounder (0.60355339059); /* C2 * (C6+C2)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
548 static const int32_t rounder6[] ATTR_ALIGN(8) =
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
549 rounder (-0.25); /* C2 * (C6-C2)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
550 static const int32_t rounder3[] ATTR_ALIGN(8) =
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
551 rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
552 static const int32_t rounder5[] ATTR_ALIGN(8) =
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
553 rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
554
436
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
555 #undef COL_SHIFT
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
556 #undef ROW_SHIFT
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
557
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
558 #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
559 void idct (int16_t * block) \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
560 { \
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
561 static const int16_t table04[] ATTR_ALIGN(16) = \
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
562 table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
563 static const int16_t table17[] ATTR_ALIGN(16) = \
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
564 table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
565 static const int16_t table26[] ATTR_ALIGN(16) = \
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
566 table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
567 static const int16_t table35[] ATTR_ALIGN(16) = \
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
568 table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
569 \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
570 idct_row_head (block, 0*8, table04); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
571 idct_row (table04, rounder0); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
572 idct_row_mid (block, 0*8, 4*8, table04); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
573 idct_row (table04, rounder4); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
574 idct_row_mid (block, 4*8, 1*8, table17); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
575 idct_row (table17, rounder1); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
576 idct_row_mid (block, 1*8, 7*8, table17); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
577 idct_row (table17, rounder7); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
578 idct_row_mid (block, 7*8, 2*8, table26); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
579 idct_row (table26, rounder2); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
580 idct_row_mid (block, 2*8, 6*8, table26); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
581 idct_row (table26, rounder6); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
582 idct_row_mid (block, 6*8, 3*8, table35); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
583 idct_row (table35, rounder3); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
584 idct_row_mid (block, 3*8, 5*8, table35); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
585 idct_row (table35, rounder5); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
586 idct_row_tail (block, 5*8); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
587 \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
588 idct_col (block, 0); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
589 idct_col (block, 4); \
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
590 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
591
2024
f65d87bfdd5a some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents: 436
diff changeset
592 void ff_mmx_idct(DCTELEM *block);
f65d87bfdd5a some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents: 436
diff changeset
593 void ff_mmxext_idct(DCTELEM *block);
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
594
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
595 declare_idct (ff_mmxext_idct, mmxext_table,
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
596 mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
597
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
598 declare_idct (ff_mmx_idct, mmx_table,
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
599 mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
2745
42d3e9068e32 MMX for H.264 iDCT (adapted from x264)
lorenm
parents: 2024
diff changeset
600