annotate i386/idct_mmx.c @ 6528:28d0e7dac45d libavcodec

pld instruction are used in libavcodec/armv4l/dsputil_arm_s.S which can be used on armv4 that doesn't support this instruction. Futhermore pld is a nop on some armv5 processor like arm926. Detect if pld is supported and have the preprocessor remove it when it's not supported. Fixes issue 393. patch by matthieu castet, castet.matthieu free fr
author diego
date Mon, 24 Mar 2008 11:49:59 +0000
parents 0cd10ee0ecf4
children f7cbb7733146
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
1 /*
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
2 * idct_mmx.c
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
3 * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
4 *
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
5 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
6 *
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
7 * mpeg2dec is free software; you can redistribute it and/or modify
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
8 * it under the terms of the GNU General Public License as published by
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
9 * the Free Software Foundation; either version 2 of the License, or
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
10 * (at your option) any later version.
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
11 *
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
12 * mpeg2dec is distributed in the hope that it will be useful,
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
15 * GNU General Public License for more details.
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
16 *
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
17 * You should have received a copy of the GNU General Public License
4384
65fd98452a4e Fix some more license headers.
diego
parents: 3717
diff changeset
18 * along with mpeg2dec; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
20 */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
21
2817
b128802eb77b libavutil: Utility code from libavcodec moved to a separate library.
al
parents: 2754
diff changeset
22 #include "common.h"
5010
d5ba514e3f4a Add libavcodec to compiler include flags in order to simplify header
diego
parents: 4384
diff changeset
23 #include "dsputil.h"
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
24
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
25 #include "mmx.h"
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
26
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
27 #define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
28
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
29 #define ROW_SHIFT 11
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
30 #define COL_SHIFT 6
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
31
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
32 #define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
33 #define rounder(bias) {round (bias), round (bias)}
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
34
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
35 #if 0
5129
0244bba24b43 misc typo fixes
diego
parents: 5010
diff changeset
36 /* C row IDCT - it is just here to document the MMXEXT and MMX versions */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
37 static inline void idct_row (int16_t * row, int offset,
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
38 int16_t * table, int32_t * rounder)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
39 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
40 int C1, C2, C3, C4, C5, C6, C7;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
41 int a0, a1, a2, a3, b0, b1, b2, b3;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
42
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
43 row += offset;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
44
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
45 C1 = table[1];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
46 C2 = table[2];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
47 C3 = table[3];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
48 C4 = table[4];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
49 C5 = table[5];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
50 C6 = table[6];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
51 C7 = table[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
52
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
53 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
54 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
55 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
56 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
57
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
58 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
59 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
60 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
61 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
62
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
63 row[0] = (a0 + b0) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
64 row[1] = (a1 + b1) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
65 row[2] = (a2 + b2) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
66 row[3] = (a3 + b3) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
67 row[4] = (a3 - b3) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
68 row[5] = (a2 - b2) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
69 row[6] = (a1 - b1) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
70 row[7] = (a0 - b0) >> ROW_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
71 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
72 #endif
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
73
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
74
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
75 /* MMXEXT row IDCT */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
76
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
77 #define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
78 c4, c6, c4, c6, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
79 c1, c3, -c1, -c5, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
80 c5, c7, c3, -c7, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
81 c4, -c6, c4, -c6, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
82 -c4, c2, c4, -c2, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
83 c5, -c1, c3, -c1, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
84 c7, c3, c7, -c5 }
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
85
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
86 static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
87 {
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
88 movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
89
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
90 movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
91 movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
92
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
93 movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
94 movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
95
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
96 movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
97 pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
98
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
99 pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
100 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
101
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
102 static inline void mmxext_row (const int16_t * table, const int32_t * rounder)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
103 {
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
104 movq_m2r (*(table+8), mm1); /* mm1 = -C5 -C1 C3 C1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
105 pmaddwd_r2r (mm2, mm4); /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
106
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
107 pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
108 pshufw_r2r (mm6, mm6, 0x4e); /* mm6 = x3 x1 x7 x5 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
109
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
110 movq_m2r (*(table+12), mm7); /* mm7 = -C7 C3 C7 C5 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
111 pmaddwd_r2r (mm5, mm1); /* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
112
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
113 paddd_m2r (*rounder, mm3); /* mm3 += rounder */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
114 pmaddwd_r2r (mm6, mm7); /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
115
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
116 pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
117 paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
118
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
119 pmaddwd_m2r (*(table+24), mm5); /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
120 movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
121
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
122 pmaddwd_m2r (*(table+28), mm6); /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
123 paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
124
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
125 paddd_m2r (*rounder, mm0); /* mm0 += rounder */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
126 psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
127
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
128 psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
129 paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
130
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
131 paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
132 psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
133
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
134 paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
135 movq_r2r (mm0, mm4); /* mm4 = a3 a2 + rounder */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
136
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
137 paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
138 psubd_r2r (mm5, mm4); /* mm4 = a3-b3 a2-b2 + rounder */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
139 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
140
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
141 static inline void mmxext_row_tail (int16_t * row, int store)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
142 {
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
143 psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
144
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
145 psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
146
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
147 packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
148
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
149 packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
150
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
151 movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
152 pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
153
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
154 /* slot */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
155
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
156 movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
157 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
158
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
159 static inline void mmxext_row_mid (int16_t * row, int store,
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
160 int offset, const int16_t * table)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
161 {
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
162 movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
163 psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
164
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
165 movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
166 psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
167
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
168 packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
169 movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
170
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
171 packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
172 movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
173
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
174 movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
175 pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
176
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
177 movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
178 movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
179
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
180 pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
181
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
182 movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
183 pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
184 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
185
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
186
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
187 /* MMX row IDCT */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
188
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
189 #define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
190 c4, c6, -c4, -c2, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
191 c1, c3, c3, -c7, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
192 c5, c7, -c1, -c5, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
193 c4, -c6, c4, -c2, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
194 -c4, c2, c4, -c6, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
195 c5, -c1, c7, -c5, \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
196 c7, c3, c3, -c1 }
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
197
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
198 static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
199 {
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
200 movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
201
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
202 movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
203 movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
204
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
205 movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
206 movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
207
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
208 punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
209
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
210 movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
211 pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
212
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
213 movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
214 punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
215 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
216
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
217 static inline void mmx_row (const int16_t * table, const int32_t * rounder)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
218 {
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
219 pmaddwd_r2r (mm2, mm4); /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
220 punpckldq_r2r (mm5, mm5); /* mm5 = x3 x1 x3 x1 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
221
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
222 pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
223 punpckhdq_r2r (mm6, mm6); /* mm6 = x7 x5 x7 x5 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
224
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
225 movq_m2r (*(table+12), mm7); /* mm7 = -C5 -C1 C7 C5 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
226 pmaddwd_r2r (mm5, mm1); /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
227
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
228 paddd_m2r (*rounder, mm3); /* mm3 += rounder */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
229 pmaddwd_r2r (mm6, mm7); /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
230
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
231 pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
232 paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
233
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
234 pmaddwd_m2r (*(table+24), mm5); /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
235 movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
236
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
237 pmaddwd_m2r (*(table+28), mm6); /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
238 paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
239
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
240 paddd_m2r (*rounder, mm0); /* mm0 += rounder */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
241 psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
242
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
243 psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
244 paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
245
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
246 paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
247 psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
248
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
249 paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
250 movq_r2r (mm0, mm7); /* mm7 = a3 a2 + rounder */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
251
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
252 paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
253 psubd_r2r (mm5, mm7); /* mm7 = a3-b3 a2-b2 + rounder */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
254 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
255
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
256 static inline void mmx_row_tail (int16_t * row, int store)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
257 {
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
258 psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
259
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
260 psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
261
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
262 packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
263
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
264 packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
265
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
266 movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
267 movq_r2r (mm7, mm4); /* mm4 = y6 y7 y4 y5 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
268
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
269 pslld_i2r (16, mm7); /* mm7 = y7 0 y5 0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
270
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
271 psrld_i2r (16, mm4); /* mm4 = 0 y6 0 y4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
272
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
273 por_r2r (mm4, mm7); /* mm7 = y7 y6 y5 y4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
274
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
275 /* slot */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
276
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
277 movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
278 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
279
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
280 static inline void mmx_row_mid (int16_t * row, int store,
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
281 int offset, const int16_t * table)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
282 {
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
283 movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
284 psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
285
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
286 movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
287 psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
288
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
289 packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
290 movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
291
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
292 packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
293 movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
294
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
295 movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
296 movq_r2r (mm7, mm1); /* mm1 = y6 y7 y4 y5 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
297
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
298 punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
299 psrld_i2r (16, mm7); /* mm7 = 0 y6 0 y4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
300
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
301 movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
302 pslld_i2r (16, mm1); /* mm1 = y7 0 y5 0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
303
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
304 movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
305 por_r2r (mm1, mm7); /* mm7 = y7 y6 y5 y4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
306
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
307 movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
308 punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
309
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
310 movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
311 pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
312 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
313
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
314
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
315 #if 0
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
316 /* C column IDCT - it is just here to document the MMXEXT and MMX versions */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
317 static inline void idct_col (int16_t * col, int offset)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
318 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
319 /* multiplication - as implemented on mmx */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
320 #define F(c,x) (((c) * (x)) >> 16)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
321
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
322 /* saturation - it helps us handle torture test cases */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
323 #define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
324
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
325 int16_t x0, x1, x2, x3, x4, x5, x6, x7;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
326 int16_t y0, y1, y2, y3, y4, y5, y6, y7;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
327 int16_t a0, a1, a2, a3, b0, b1, b2, b3;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
328 int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
329
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
330 col += offset;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
331
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
332 x0 = col[0*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
333 x1 = col[1*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
334 x2 = col[2*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
335 x3 = col[3*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
336 x4 = col[4*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
337 x5 = col[5*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
338 x6 = col[6*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
339 x7 = col[7*8];
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
340
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
341 u04 = S (x0 + x4);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
342 v04 = S (x0 - x4);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
343 u26 = S (F (T2, x6) + x2);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
344 v26 = S (F (T2, x2) - x6);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
345
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
346 a0 = S (u04 + u26);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
347 a1 = S (v04 + v26);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
348 a2 = S (v04 - v26);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
349 a3 = S (u04 - u26);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
350
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
351 u17 = S (F (T1, x7) + x1);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
352 v17 = S (F (T1, x1) - x7);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
353 u35 = S (F (T3, x5) + x3);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
354 v35 = S (F (T3, x3) - x5);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
355
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
356 b0 = S (u17 + u35);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
357 b3 = S (v17 - v35);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
358 u12 = S (u17 - u35);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
359 v12 = S (v17 + v35);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
360 u12 = S (2 * F (C4, u12));
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
361 v12 = S (2 * F (C4, v12));
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
362 b1 = S (u12 + v12);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
363 b2 = S (u12 - v12);
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
364
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
365 y0 = S (a0 + b0) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
366 y1 = S (a1 + b1) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
367 y2 = S (a2 + b2) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
368 y3 = S (a3 + b3) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
369
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
370 y4 = S (a3 - b3) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
371 y5 = S (a2 - b2) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
372 y6 = S (a1 - b1) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
373 y7 = S (a0 - b0) >> COL_SHIFT;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
374
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
375 col[0*8] = y0;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
376 col[1*8] = y1;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
377 col[2*8] = y2;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
378 col[3*8] = y3;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
379 col[4*8] = y4;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
380 col[5*8] = y5;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
381 col[6*8] = y6;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
382 col[7*8] = y7;
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
383 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
384 #endif
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
385
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
386
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
387 /* MMX column IDCT */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
388 static inline void idct_col (int16_t * col, int offset)
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
389 {
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
390 #define T1 13036
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
391 #define T2 27146
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
392 #define T3 43790
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
393 #define C4 23170
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
394
6151
2af9eab80fad Rename illegal identifiers, _ followed by capital is reserved for the system.
diego
parents: 5963
diff changeset
395 static const short t1_vector[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
2af9eab80fad Rename illegal identifiers, _ followed by capital is reserved for the system.
diego
parents: 5963
diff changeset
396 static const short t2_vector[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
2af9eab80fad Rename illegal identifiers, _ followed by capital is reserved for the system.
diego
parents: 5963
diff changeset
397 static const short t3_vector[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
2af9eab80fad Rename illegal identifiers, _ followed by capital is reserved for the system.
diego
parents: 5963
diff changeset
398 static const short c4_vector[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
399
5963
80103098c797 spelling
vitor
parents: 5129
diff changeset
400 /* column code adapted from Peter Gubanov */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
401 /* http://www.elecard.com/peter/idct.shtml */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
402
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
403 movq_m2r (*t1_vector, mm0); /* mm0 = T1 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
404
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
405 movq_m2r (*(col+offset+1*8), mm1); /* mm1 = x1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
406 movq_r2r (mm0, mm2); /* mm2 = T1 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
407
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
408 movq_m2r (*(col+offset+7*8), mm4); /* mm4 = x7 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
409 pmulhw_r2r (mm1, mm0); /* mm0 = T1*x1 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
410
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
411 movq_m2r (*t3_vector, mm5); /* mm5 = T3 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
412 pmulhw_r2r (mm4, mm2); /* mm2 = T1*x7 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
413
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
414 movq_m2r (*(col+offset+5*8), mm6); /* mm6 = x5 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
415 movq_r2r (mm5, mm7); /* mm7 = T3-1 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
416
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
417 movq_m2r (*(col+offset+3*8), mm3); /* mm3 = x3 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
418 psubsw_r2r (mm4, mm0); /* mm0 = v17 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
419
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
420 movq_m2r (*t2_vector, mm4); /* mm4 = T2 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
421 pmulhw_r2r (mm3, mm5); /* mm5 = (T3-1)*x3 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
422
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
423 paddsw_r2r (mm2, mm1); /* mm1 = u17 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
424 pmulhw_r2r (mm6, mm7); /* mm7 = (T3-1)*x5 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
425
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
426 /* slot */
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
427
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
428 movq_r2r (mm4, mm2); /* mm2 = T2 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
429 paddsw_r2r (mm3, mm5); /* mm5 = T3*x3 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
430
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
431 pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
432 paddsw_r2r (mm6, mm7); /* mm7 = T3*x5 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
433
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
434 psubsw_r2r (mm6, mm5); /* mm5 = v35 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
435 paddsw_r2r (mm3, mm7); /* mm7 = u35 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
436
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
437 movq_m2r (*(col+offset+6*8), mm3); /* mm3 = x6 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
438 movq_r2r (mm0, mm6); /* mm6 = v17 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
439
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
440 pmulhw_r2r (mm3, mm2); /* mm2 = T2*x6 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
441 psubsw_r2r (mm5, mm0); /* mm0 = b3 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
442
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
443 psubsw_r2r (mm3, mm4); /* mm4 = v26 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
444 paddsw_r2r (mm6, mm5); /* mm5 = v12 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
445
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
446 movq_r2m (mm0, *(col+offset+3*8)); /* save b3 in scratch0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
447 movq_r2r (mm1, mm6); /* mm6 = u17 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
448
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
449 paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
450 paddsw_r2r (mm7, mm6); /* mm6 = b0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
451
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
452 psubsw_r2r (mm7, mm1); /* mm1 = u12 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
453 movq_r2r (mm1, mm7); /* mm7 = u12 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
454
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
455 movq_m2r (*(col+offset+0*8), mm3); /* mm3 = x0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
456 paddsw_r2r (mm5, mm1); /* mm1 = u12+v12 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
457
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
458 movq_m2r (*c4_vector, mm0); /* mm0 = C4/2 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
459 psubsw_r2r (mm5, mm7); /* mm7 = u12-v12 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
460
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
461 movq_r2m (mm6, *(col+offset+5*8)); /* save b0 in scratch1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
462 pmulhw_r2r (mm0, mm1); /* mm1 = b1/2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
463
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
464 movq_r2r (mm4, mm6); /* mm6 = v26 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
465 pmulhw_r2r (mm0, mm7); /* mm7 = b2/2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
466
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
467 movq_m2r (*(col+offset+4*8), mm5); /* mm5 = x4 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
468 movq_r2r (mm3, mm0); /* mm0 = x0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
469
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
470 psubsw_r2r (mm5, mm3); /* mm3 = v04 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
471 paddsw_r2r (mm5, mm0); /* mm0 = u04 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
472
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
473 paddsw_r2r (mm3, mm4); /* mm4 = a1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
474 movq_r2r (mm0, mm5); /* mm5 = u04 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
475
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
476 psubsw_r2r (mm6, mm3); /* mm3 = a2 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
477 paddsw_r2r (mm2, mm5); /* mm5 = a0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
478
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
479 paddsw_r2r (mm1, mm1); /* mm1 = b1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
480 psubsw_r2r (mm2, mm0); /* mm0 = a3 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
481
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
482 paddsw_r2r (mm7, mm7); /* mm7 = b2 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
483 movq_r2r (mm3, mm2); /* mm2 = a2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
484
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
485 movq_r2r (mm4, mm6); /* mm6 = a1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
486 paddsw_r2r (mm7, mm3); /* mm3 = a2+b2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
487
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
488 psraw_i2r (COL_SHIFT, mm3); /* mm3 = y2 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
489 paddsw_r2r (mm1, mm4); /* mm4 = a1+b1 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
490
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
491 psraw_i2r (COL_SHIFT, mm4); /* mm4 = y1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
492 psubsw_r2r (mm1, mm6); /* mm6 = a1-b1 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
493
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
494 movq_m2r (*(col+offset+5*8), mm1); /* mm1 = b0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
495 psubsw_r2r (mm7, mm2); /* mm2 = a2-b2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
496
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
497 psraw_i2r (COL_SHIFT, mm6); /* mm6 = y6 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
498 movq_r2r (mm5, mm7); /* mm7 = a0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
499
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
500 movq_r2m (mm4, *(col+offset+1*8)); /* save y1 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
501 psraw_i2r (COL_SHIFT, mm2); /* mm2 = y5 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
502
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
503 movq_r2m (mm3, *(col+offset+2*8)); /* save y2 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
504 paddsw_r2r (mm1, mm5); /* mm5 = a0+b0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
505
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
506 movq_m2r (*(col+offset+3*8), mm4); /* mm4 = b3 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
507 psubsw_r2r (mm1, mm7); /* mm7 = a0-b0 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
508
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
509 psraw_i2r (COL_SHIFT, mm5); /* mm5 = y0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
510 movq_r2r (mm0, mm3); /* mm3 = a3 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
511
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
512 movq_r2m (mm2, *(col+offset+5*8)); /* save y5 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
513 psubsw_r2r (mm4, mm3); /* mm3 = a3-b3 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
514
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
515 psraw_i2r (COL_SHIFT, mm7); /* mm7 = y7 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
516 paddsw_r2r (mm0, mm4); /* mm4 = a3+b3 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
517
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
518 movq_r2m (mm5, *(col+offset+0*8)); /* save y0 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
519 psraw_i2r (COL_SHIFT, mm3); /* mm3 = y4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
520
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
521 movq_r2m (mm6, *(col+offset+6*8)); /* save y6 */
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
522 psraw_i2r (COL_SHIFT, mm4); /* mm4 = y3 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
523
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
524 movq_r2m (mm7, *(col+offset+7*8)); /* save y7 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
525
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
526 movq_r2m (mm3, *(col+offset+4*8)); /* save y4 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
527
6348
0cd10ee0ecf4 cosmetics: Replace // by /* */ comments.
diego
parents: 6151
diff changeset
528 movq_r2m (mm4, *(col+offset+3*8)); /* save y3 */
436
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
529
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
530 #undef T1
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
531 #undef T2
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
532 #undef T3
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
533 #undef C4
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
534 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
535
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
536 static const int32_t rounder0[] ATTR_ALIGN(8) =
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
537 rounder ((1 << (COL_SHIFT - 1)) - 0.5);
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
538 static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
539 static const int32_t rounder1[] ATTR_ALIGN(8) =
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
540 rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
541 static const int32_t rounder7[] ATTR_ALIGN(8) =
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
542 rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
543 static const int32_t rounder2[] ATTR_ALIGN(8) =
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
544 rounder (0.60355339059); /* C2 * (C6+C2)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
545 static const int32_t rounder6[] ATTR_ALIGN(8) =
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
546 rounder (-0.25); /* C2 * (C6-C2)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
547 static const int32_t rounder3[] ATTR_ALIGN(8) =
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
548 rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
2854
00ff749b33b6 add consts (this was in my local tree, dunno where it came from, probably forgoten from some const patch)
michael
parents: 2817
diff changeset
549 static const int32_t rounder5[] ATTR_ALIGN(8) =
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
550 rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
551
436
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
552 #undef COL_SHIFT
35de17dd6ed8 * undefine local defines when they are no longer needed
kabi
parents: 76
diff changeset
553 #undef ROW_SHIFT
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
554
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
555 #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
556 void idct (int16_t * block) \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
557 { \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
558 static const int16_t table04[] ATTR_ALIGN(16) = \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
559 table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
560 static const int16_t table17[] ATTR_ALIGN(16) = \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
561 table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
562 static const int16_t table26[] ATTR_ALIGN(16) = \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
563 table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
564 static const int16_t table35[] ATTR_ALIGN(16) = \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
565 table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
566 \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
567 idct_row_head (block, 0*8, table04); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
568 idct_row (table04, rounder0); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
569 idct_row_mid (block, 0*8, 4*8, table04); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
570 idct_row (table04, rounder4); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
571 idct_row_mid (block, 4*8, 1*8, table17); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
572 idct_row (table17, rounder1); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
573 idct_row_mid (block, 1*8, 7*8, table17); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
574 idct_row (table17, rounder7); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
575 idct_row_mid (block, 7*8, 2*8, table26); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
576 idct_row (table26, rounder2); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
577 idct_row_mid (block, 2*8, 6*8, table26); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
578 idct_row (table26, rounder6); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
579 idct_row_mid (block, 6*8, 3*8, table35); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
580 idct_row (table35, rounder3); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
581 idct_row_mid (block, 3*8, 5*8, table35); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
582 idct_row (table35, rounder5); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
583 idct_row_tail (block, 5*8); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
584 \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
585 idct_col (block, 0); \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
586 idct_col (block, 4); \
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
587 }
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
588
2024
f65d87bfdd5a some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents: 436
diff changeset
589 void ff_mmx_idct(DCTELEM *block);
f65d87bfdd5a some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents: 436
diff changeset
590 void ff_mmxext_idct(DCTELEM *block);
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
591
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
592 declare_idct (ff_mmxext_idct, mmxext_table,
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
593 mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
30
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
594
b8a290072f26 added idct mmx code
glantau
parents:
diff changeset
595 declare_idct (ff_mmx_idct, mmx_table,
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2854
diff changeset
596 mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
2745
42d3e9068e32 MMX for H.264 iDCT (adapted from x264)
lorenm
parents: 2024
diff changeset
597