annotate libmpeg2/idct_mmx.c @ 4559:5dc383bb1c82

added mga_top_reserved module parameter to skip a configurable amount of space at the top of video memory. this is needed to prevent corruption of the kernel's console font when using the "fastfont" option with matroxfb.
author rfelker
date Thu, 07 Feb 2002 02:07:29 +0000
parents 4fa90be8da03
children 47984e3f54ce
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
1 /*
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
2 * idct_mmx.c
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
3 * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
4 *
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
5 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
6 *
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
7 * mpeg2dec is free software; you can redistribute it and/or modify
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
8 * it under the terms of the GNU General Public License as published by
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
9 * the Free Software Foundation; either version 2 of the License, or
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
10 * (at your option) any later version.
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
11 *
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
12 * mpeg2dec is distributed in the hope that it will be useful,
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
15 * GNU General Public License for more details.
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
16 *
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
17 * You should have received a copy of the GNU General Public License
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
18 * along with this program; if not, write to the Free Software
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
20 */
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
21
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
22 #include "config.h"
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
23
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
24 #ifdef ARCH_X86
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
25
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
26 #include <inttypes.h>
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
27
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
28 #include "mpeg2_internal.h"
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
29 #include "attributes.h"
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
30 #include "mmx.h"
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
31
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
32 #define ROW_SHIFT 11
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
33 #define COL_SHIFT 6
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
34
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
35 #define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
36 #define rounder(bias) {round (bias), round (bias)}
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
37
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
38
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
39 #if 0
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
40 /* C row IDCT - its just here to document the MMXEXT and MMX versions */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
41 static inline void idct_row (int16_t * row, int offset,
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
42 int16_t * table, int32_t * rounder)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
43 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
44 int C1, C2, C3, C4, C5, C6, C7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
45 int a0, a1, a2, a3, b0, b1, b2, b3;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
46
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
47 row += offset;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
48
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
49 C1 = table[1];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
50 C2 = table[2];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
51 C3 = table[3];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
52 C4 = table[4];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
53 C5 = table[5];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
54 C6 = table[6];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
55 C7 = table[7];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
56
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
57 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
58 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
59 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
60 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
61
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
62 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
63 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
64 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
65 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
66
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
67 row[0] = (a0 + b0) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
68 row[1] = (a1 + b1) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
69 row[2] = (a2 + b2) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
70 row[3] = (a3 + b3) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
71 row[4] = (a3 - b3) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
72 row[5] = (a2 - b2) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
73 row[6] = (a1 - b1) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
74 row[7] = (a0 - b0) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
75 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
76 #endif
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
77
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
78
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
79 /* MMXEXT row IDCT */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
80
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
81 #define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
82 c4, c6, c4, c6, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
83 c1, c3, -c1, -c5, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
84 c5, c7, c3, -c7, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
85 c4, -c6, c4, -c6, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
86 -c4, c2, c4, -c2, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
87 c5, -c1, c3, -c1, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
88 c7, c3, c7, -c5 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
89
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
90 static inline void mmxext_row_head (int16_t * row, int offset, int16_t * table)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
91 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
92 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
93
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
94 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
95 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
96
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
97 movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
98 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
99
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
100 movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
101 pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
102
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
103 pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
104 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
105
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
106 static inline void mmxext_row (int16_t * table, int32_t * rounder)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
107 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
108 movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
109 pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
110
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
111 pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
112 pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
113
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
114 movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
115 pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
116
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
117 paddd_m2r (*rounder, mm3); // mm3 += rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
118 pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
119
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
120 pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
121 paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
122
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
123 pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
124 movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
125
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
126 pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
127 paddd_r2r (mm7, mm1); // mm1 = b1 b0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
128
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
129 paddd_m2r (*rounder, mm0); // mm0 += rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
130 psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
131
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
132 psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
133 paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
134
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
135 paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
136 psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
137
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
138 paddd_r2r (mm6, mm5); // mm5 = b3 b2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
139 movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
140
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
141 paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
142 psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
143 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
144
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
145 static inline void mmxext_row_tail (int16_t * row, int store)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
146 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
147 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
148
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
149 psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
150
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
151 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
152
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
153 packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
154
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
155 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
156 pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
157
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
158 /* slot */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
159
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
160 movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
161 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
162
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
163 static inline void mmxext_row_mid (int16_t * row, int store,
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
164 int offset, int16_t * table)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
165 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
166 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
167 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
168
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
169 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
170 psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
171
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
172 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
173 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
174
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
175 packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
176 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
177
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
178 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
179 pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
180
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
181 movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
182 movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
183
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
184 pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
185
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
186 movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
187 pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
188 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
189
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
190
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
191 /* MMX row IDCT */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
192
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
193 #define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
194 c4, c6, -c4, -c2, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
195 c1, c3, c3, -c7, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
196 c5, c7, -c1, -c5, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
197 c4, -c6, c4, -c2, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
198 -c4, c2, c4, -c6, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
199 c5, -c1, c7, -c5, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
200 c7, c3, c3, -c1 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
201
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
202 static inline void mmx_row_head (int16_t * row, int offset, int16_t * table)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
203 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
204 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
205
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
206 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
207 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
208
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
209 movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
210 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
211
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
212 punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
213
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
214 movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
215 pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
216
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
217 movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
218 punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
219 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
220
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
221 static inline void mmx_row (int16_t * table, int32_t * rounder)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
222 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
223 pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
224 punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
225
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
226 pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
227 punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
228
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
229 movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
230 pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
231
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
232 paddd_m2r (*rounder, mm3); // mm3 += rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
233 pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
234
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
235 pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
236 paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
237
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
238 pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
239 movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
240
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
241 pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
242 paddd_r2r (mm7, mm1); // mm1 = b1 b0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
243
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
244 paddd_m2r (*rounder, mm0); // mm0 += rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
245 psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
246
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
247 psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
248 paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
249
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
250 paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
251 psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
252
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
253 paddd_r2r (mm6, mm5); // mm5 = b3 b2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
254 movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
255
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
256 paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
257 psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
258 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
259
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
260 static inline void mmx_row_tail (int16_t * row, int store)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
261 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
262 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
263
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
264 psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
265
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
266 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
267
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
268 packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
269
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
270 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
271 movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
272
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
273 pslld_i2r (16, mm7); // mm7 = y7 0 y5 0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
274
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
275 psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
276
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
277 por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
278
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
279 /* slot */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
280
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
281 movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
282 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
283
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
284 static inline void mmx_row_mid (int16_t * row, int store,
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
285 int offset, int16_t * table)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
286 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
287 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
288 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
289
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
290 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
291 psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
292
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
293 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
294 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
295
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
296 packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
297 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
298
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
299 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
300 movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
301
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
302 punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
303 psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
304
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
305 movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
306 pslld_i2r (16, mm1); // mm1 = y7 0 y5 0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
307
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
308 movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
309 por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
310
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
311 movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
312 punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
313
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
314 movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
315 pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
316 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
317
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
318
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
319 #if 0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
320 // C column IDCT - its just here to document the MMXEXT and MMX versions
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
321 static inline void idct_col (int16_t * col, int offset)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
322 {
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
323 /* multiplication - as implemented on mmx */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
324 #define F(c,x) (((c) * (x)) >> 16)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
325
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
326 /* saturation - it helps us handle torture test cases */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
327 #define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
328
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
329 int16_t x0, x1, x2, x3, x4, x5, x6, x7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
330 int16_t y0, y1, y2, y3, y4, y5, y6, y7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
331 int16_t a0, a1, a2, a3, b0, b1, b2, b3;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
332 int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
333
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
334 col += offset;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
335
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
336 x0 = col[0*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
337 x1 = col[1*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
338 x2 = col[2*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
339 x3 = col[3*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
340 x4 = col[4*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
341 x5 = col[5*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
342 x6 = col[6*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
343 x7 = col[7*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
344
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
345 u04 = S (x0 + x4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
346 v04 = S (x0 - x4);
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
347 u26 = S (F (T2, x6) + x2);
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
348 v26 = S (F (T2, x2) - x6);
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
349
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
350 a0 = S (u04 + u26);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
351 a1 = S (v04 + v26);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
352 a2 = S (v04 - v26);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
353 a3 = S (u04 - u26);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
354
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
355 u17 = S (F (T1, x7) + x1);
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
356 v17 = S (F (T1, x1) - x7);
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
357 u35 = S (F (T3, x5) + x3);
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
358 v35 = S (F (T3, x3) - x5);
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
359
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
360 b0 = S (u17 + u35);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
361 b3 = S (v17 - v35);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
362 u12 = S (u17 - u35);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
363 v12 = S (v17 + v35);
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
364 u12 = S (2 * F (C4, u12));
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
365 v12 = S (2 * F (C4, v12));
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
366 b1 = S (u12 + v12);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
367 b2 = S (u12 - v12);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
368
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
369 y0 = S (a0 + b0) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
370 y1 = S (a1 + b1) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
371 y2 = S (a2 + b2) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
372 y3 = S (a3 + b3) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
373
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
374 y4 = S (a3 - b3) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
375 y5 = S (a2 - b2) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
376 y6 = S (a1 - b1) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
377 y7 = S (a0 - b0) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
378
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
379 col[0*8] = y0;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
380 col[1*8] = y1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
381 col[2*8] = y2;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
382 col[3*8] = y3;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
383 col[4*8] = y4;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
384 col[5*8] = y5;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
385 col[6*8] = y6;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
386 col[7*8] = y7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
387 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
388 #endif
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
389
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
390
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
391 // MMX column IDCT
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
392 static inline void idct_col (int16_t * col, int offset)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
393 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
394 #define T1 13036
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
395 #define T2 27146
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
396 #define T3 43790
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
397 #define C4 23170
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
398
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
399 static short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
400 static short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
401 static short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
402 static short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
403
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
404 /* column code adapted from peter gubanov */
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
405 /* http://www.elecard.com/peter/idct.shtml */
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
406
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
407 movq_m2r (*_T1, mm0); // mm0 = T1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
408
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
409 movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
410 movq_r2r (mm0, mm2); // mm2 = T1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
411
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
412 movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
413 pmulhw_r2r (mm1, mm0); // mm0 = T1*x1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
414
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
415 movq_m2r (*_T3, mm5); // mm5 = T3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
416 pmulhw_r2r (mm4, mm2); // mm2 = T1*x7
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
417
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
418 movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
419 movq_r2r (mm5, mm7); // mm7 = T3-1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
420
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
421 movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
422 psubsw_r2r (mm4, mm0); // mm0 = v17
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
423
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
424 movq_m2r (*_T2, mm4); // mm4 = T2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
425 pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
426
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
427 paddsw_r2r (mm2, mm1); // mm1 = u17
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
428 pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
429
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
430 /* slot */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
431
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
432 movq_r2r (mm4, mm2); // mm2 = T2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
433 paddsw_r2r (mm3, mm5); // mm5 = T3*x3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
434
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
435 pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
436 paddsw_r2r (mm6, mm7); // mm7 = T3*x5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
437
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
438 psubsw_r2r (mm6, mm5); // mm5 = v35
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
439 paddsw_r2r (mm3, mm7); // mm7 = u35
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
440
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
441 movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
442 movq_r2r (mm0, mm6); // mm6 = v17
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
443
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
444 pmulhw_r2r (mm3, mm2); // mm2 = T2*x6
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
445 psubsw_r2r (mm5, mm0); // mm0 = b3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
446
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
447 psubsw_r2r (mm3, mm4); // mm4 = v26
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
448 paddsw_r2r (mm6, mm5); // mm5 = v12
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
449
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
450 movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
451 movq_r2r (mm1, mm6); // mm6 = u17
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
452
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
453 paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
454 paddsw_r2r (mm7, mm6); // mm6 = b0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
455
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
456 psubsw_r2r (mm7, mm1); // mm1 = u12
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
457 movq_r2r (mm1, mm7); // mm7 = u12
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
458
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
459 movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
460 paddsw_r2r (mm5, mm1); // mm1 = u12+v12
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
461
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
462 movq_m2r (*_C4, mm0); // mm0 = C4/2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
463 psubsw_r2r (mm5, mm7); // mm7 = u12-v12
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
464
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
465 movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
466 pmulhw_r2r (mm0, mm1); // mm1 = b1/2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
467
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
468 movq_r2r (mm4, mm6); // mm6 = v26
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
469 pmulhw_r2r (mm0, mm7); // mm7 = b2/2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
470
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
471 movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
472 movq_r2r (mm3, mm0); // mm0 = x0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
473
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
474 psubsw_r2r (mm5, mm3); // mm3 = v04
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
475 paddsw_r2r (mm5, mm0); // mm0 = u04
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
476
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
477 paddsw_r2r (mm3, mm4); // mm4 = a1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
478 movq_r2r (mm0, mm5); // mm5 = u04
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
479
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
480 psubsw_r2r (mm6, mm3); // mm3 = a2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
481 paddsw_r2r (mm2, mm5); // mm5 = a0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
482
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
483 paddsw_r2r (mm1, mm1); // mm1 = b1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
484 psubsw_r2r (mm2, mm0); // mm0 = a3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
485
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
486 paddsw_r2r (mm7, mm7); // mm7 = b2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
487 movq_r2r (mm3, mm2); // mm2 = a2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
488
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
489 movq_r2r (mm4, mm6); // mm6 = a1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
490 paddsw_r2r (mm7, mm3); // mm3 = a2+b2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
491
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
492 psraw_i2r (COL_SHIFT, mm3); // mm3 = y2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
493 paddsw_r2r (mm1, mm4); // mm4 = a1+b1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
494
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
495 psraw_i2r (COL_SHIFT, mm4); // mm4 = y1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
496 psubsw_r2r (mm1, mm6); // mm6 = a1-b1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
497
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
498 movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
499 psubsw_r2r (mm7, mm2); // mm2 = a2-b2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
500
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
501 psraw_i2r (COL_SHIFT, mm6); // mm6 = y6
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
502 movq_r2r (mm5, mm7); // mm7 = a0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
503
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
504 movq_r2m (mm4, *(col+offset+1*8)); // save y1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
505 psraw_i2r (COL_SHIFT, mm2); // mm2 = y5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
506
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
507 movq_r2m (mm3, *(col+offset+2*8)); // save y2
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
508 paddsw_r2r (mm1, mm5); // mm5 = a0+b0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
509
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
510 movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
511 psubsw_r2r (mm1, mm7); // mm7 = a0-b0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
512
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
513 psraw_i2r (COL_SHIFT, mm5); // mm5 = y0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
514 movq_r2r (mm0, mm3); // mm3 = a3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
515
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
516 movq_r2m (mm2, *(col+offset+5*8)); // save y5
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
517 psubsw_r2r (mm4, mm3); // mm3 = a3-b3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
518
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
519 psraw_i2r (COL_SHIFT, mm7); // mm7 = y7
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
520 paddsw_r2r (mm0, mm4); // mm4 = a3+b3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
521
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
522 movq_r2m (mm5, *(col+offset+0*8)); // save y0
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
523 psraw_i2r (COL_SHIFT, mm3); // mm3 = y4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
524
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
525 movq_r2m (mm6, *(col+offset+6*8)); // save y6
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
526 psraw_i2r (COL_SHIFT, mm4); // mm4 = y3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
527
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
528 movq_r2m (mm7, *(col+offset+7*8)); // save y7
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
529
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
530 movq_r2m (mm3, *(col+offset+4*8)); // save y4
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
531
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
532 movq_r2m (mm4, *(col+offset+3*8)); // save y3
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
533 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
534
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
535
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
536 static int32_t rounder0[] ATTR_ALIGN(8) =
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
537 rounder ((1 << (COL_SHIFT - 1)) - 0.5);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
538 static int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
539 static int32_t rounder1[] ATTR_ALIGN(8) =
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
540 rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
541 static int32_t rounder7[] ATTR_ALIGN(8) =
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
542 rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
543 static int32_t rounder2[] ATTR_ALIGN(8) =
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
544 rounder (0.60355339059); /* C2 * (C6+C2)/2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
545 static int32_t rounder6[] ATTR_ALIGN(8) =
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
546 rounder (-0.25); /* C2 * (C6-C2)/2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
547 static int32_t rounder3[] ATTR_ALIGN(8) =
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
548 rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
549 static int32_t rounder5[] ATTR_ALIGN(8) =
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
550 rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
551
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
552
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
553 #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
1437
4fa90be8da03 made mmx/mmxext idct public -> now libavcodec can use it
arpi
parents: 36
diff changeset
554 inline void idct (int16_t * block) \
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
555 { \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
556 static int16_t table04[] ATTR_ALIGN(16) = \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
557 table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
558 static int16_t table17[] ATTR_ALIGN(16) = \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
559 table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
560 static int16_t table26[] ATTR_ALIGN(16) = \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
561 table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
562 static int16_t table35[] ATTR_ALIGN(16) = \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
563 table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
564 \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
565 idct_row_head (block, 0*8, table04); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
566 idct_row (table04, rounder0); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
567 idct_row_mid (block, 0*8, 4*8, table04); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
568 idct_row (table04, rounder4); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
569 idct_row_mid (block, 4*8, 1*8, table17); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
570 idct_row (table17, rounder1); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
571 idct_row_mid (block, 1*8, 7*8, table17); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
572 idct_row (table17, rounder7); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
573 idct_row_mid (block, 7*8, 2*8, table26); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
574 idct_row (table26, rounder2); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
575 idct_row_mid (block, 2*8, 6*8, table26); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
576 idct_row (table26, rounder6); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
577 idct_row_mid (block, 6*8, 3*8, table35); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
578 idct_row (table35, rounder3); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
579 idct_row_mid (block, 3*8, 5*8, table35); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
580 idct_row (table35, rounder5); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
581 idct_row_tail (block, 5*8); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
582 \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
583 idct_col (block, 0); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
584 idct_col (block, 4); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
585 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
586
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
587
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
588 #define COPY_MMX(offset,r0,r1,r2) \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
589 do { \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
590 movq_m2r (*(block+offset), r0); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
591 dest += stride; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
592 movq_m2r (*(block+offset+4), r1); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
593 movq_r2m (r2, *dest); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
594 packuswb_r2r (r1, r0); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
595 } while (0)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
596
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
597 static void block_copy (int16_t * block, uint8_t * dest, int stride)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
598 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
599 movq_m2r (*(block+0*8), mm0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
600 movq_m2r (*(block+0*8+4), mm1);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
601 movq_m2r (*(block+1*8), mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
602 packuswb_r2r (mm1, mm0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
603 movq_m2r (*(block+1*8+4), mm3);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
604 movq_r2m (mm0, *dest);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
605 packuswb_r2r (mm3, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
606 COPY_MMX (2*8, mm0, mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
607 COPY_MMX (3*8, mm2, mm3, mm0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
608 COPY_MMX (4*8, mm0, mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
609 COPY_MMX (5*8, mm2, mm3, mm0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
610 COPY_MMX (6*8, mm0, mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
611 COPY_MMX (7*8, mm2, mm3, mm0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
612 movq_r2m (mm2, *(dest+stride));
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
613 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
614
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
615
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
616 #define ADD_MMX(offset,r1,r2,r3,r4) \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
617 do { \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
618 movq_m2r (*(dest+2*stride), r1); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
619 packuswb_r2r (r4, r3); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
620 movq_r2r (r1, r2); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
621 dest += stride; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
622 movq_r2m (r3, *dest); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
623 punpcklbw_r2r (mm0, r1); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
624 paddsw_m2r (*(block+offset), r1); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
625 punpckhbw_r2r (mm0, r2); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
626 paddsw_m2r (*(block+offset+4), r2); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
627 } while (0)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
628
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
629 static void block_add (int16_t * block, uint8_t * dest, int stride)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
630 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
631 movq_m2r (*dest, mm1);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
632 pxor_r2r (mm0, mm0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
633 movq_m2r (*(dest+stride), mm3);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
634 movq_r2r (mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
635 punpcklbw_r2r (mm0, mm1);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
636 movq_r2r (mm3, mm4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
637 paddsw_m2r (*(block+0*8), mm1);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
638 punpckhbw_r2r (mm0, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
639 paddsw_m2r (*(block+0*8+4), mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
640 punpcklbw_r2r (mm0, mm3);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
641 paddsw_m2r (*(block+1*8), mm3);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
642 packuswb_r2r (mm2, mm1);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
643 punpckhbw_r2r (mm0, mm4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
644 movq_r2m (mm1, *dest);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
645 paddsw_m2r (*(block+1*8+4), mm4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
646 ADD_MMX (2*8, mm1, mm2, mm3, mm4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
647 ADD_MMX (3*8, mm3, mm4, mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
648 ADD_MMX (4*8, mm1, mm2, mm3, mm4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
649 ADD_MMX (5*8, mm3, mm4, mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
650 ADD_MMX (6*8, mm1, mm2, mm3, mm4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
651 ADD_MMX (7*8, mm3, mm4, mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
652 packuswb_r2r (mm4, mm3);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
653 movq_r2m (mm3, *(dest+stride));
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
654 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
655
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
656
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
657 declare_idct (mmxext_idct, mmxext_table,
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
658 mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
659
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
660 void idct_block_copy_mmxext (int16_t * block, uint8_t * dest, int stride)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
661 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
662 mmxext_idct (block);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
663 block_copy (block, dest, stride);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
664 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
665
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
666 void idct_block_add_mmxext (int16_t * block, uint8_t * dest, int stride)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
667 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
668 mmxext_idct (block);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
669 block_add (block, dest, stride);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
670 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
671
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
672
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
673 declare_idct (mmx_idct, mmx_table,
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
674 mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
675
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
676 void idct_block_copy_mmx (int16_t * block, uint8_t * dest, int stride)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
677 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
678 mmx_idct (block);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
679 block_copy (block, dest, stride);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
680 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
681
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
682 void idct_block_add_mmx (int16_t * block, uint8_t * dest, int stride)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
683 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
684 mmx_idct (block);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
685 block_add (block, dest, stride);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
686 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
687
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
688
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
689 void idct_mmx_init (void)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
690 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
691 extern uint8_t scan_norm[64];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
692 extern uint8_t scan_alt[64];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
693 int i, j;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
694
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
695 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
696
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
697 for (i = 0; i < 64; i++) {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
698 j = scan_norm[i];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
699 scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
700 j = scan_alt[i];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
701 scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
702 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
703 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
704
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
705 #endif