annotate libmpeg2/idct_mmx.c @ 10218:f82646fc1431

Moved video filters to a separate section, moved slave mode section to the tech subdir, random improvements, default indentation reduced. Straight from the LUG camp Felsberg by Jonas and Diego.
author jonas
date Sat, 31 May 2003 16:41:41 +0000
parents 47984e3f54ce
children 2c0b6ec77d39
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
1 /*
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
2 * idct_mmx.c
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
3 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
5 *
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
7 * See http://libmpeg2.sourceforge.net/ for updates.
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
8 *
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
9 * mpeg2dec is free software; you can redistribute it and/or modify
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
10 * it under the terms of the GNU General Public License as published by
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
11 * the Free Software Foundation; either version 2 of the License, or
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
12 * (at your option) any later version.
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
13 *
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
14 * mpeg2dec is distributed in the hope that it will be useful,
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
17 * GNU General Public License for more details.
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
18 *
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
19 * You should have received a copy of the GNU General Public License
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
20 * along with this program; if not, write to the Free Software
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
22 */
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
23
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
24 #include "config.h"
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
25
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
26 #ifdef ARCH_X86
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
27
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
28 #include <inttypes.h>
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
29
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
30 #include "mpeg2.h"
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
31 #include "mpeg2_internal.h"
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
32 #include "attributes.h"
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
33 #include "mmx.h"
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
34
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
35 #define ROW_SHIFT 11
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
36 #define COL_SHIFT 6
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
37
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
38 #define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
39 #define rounder(bias) {round (bias), round (bias)}
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
40
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
41
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
42 #if 0
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
43 /* C row IDCT - its just here to document the MMXEXT and MMX versions */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
44 static inline void idct_row (int16_t * row, int offset,
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
45 int16_t * table, int32_t * rounder)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
46 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
47 int C1, C2, C3, C4, C5, C6, C7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
48 int a0, a1, a2, a3, b0, b1, b2, b3;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
49
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
50 row += offset;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
51
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
52 C1 = table[1];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
53 C2 = table[2];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
54 C3 = table[3];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
55 C4 = table[4];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
56 C5 = table[5];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
57 C6 = table[6];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
58 C7 = table[7];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
59
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
60 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
61 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
62 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
63 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
64
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
65 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
66 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
67 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
68 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
69
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
70 row[0] = (a0 + b0) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
71 row[1] = (a1 + b1) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
72 row[2] = (a2 + b2) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
73 row[3] = (a3 + b3) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
74 row[4] = (a3 - b3) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
75 row[5] = (a2 - b2) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
76 row[6] = (a1 - b1) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
77 row[7] = (a0 - b0) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
78 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
79 #endif
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
80
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
81
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
82 /* MMXEXT row IDCT */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
83
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
84 #define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
85 c4, c6, c4, c6, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
86 c1, c3, -c1, -c5, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
87 c5, c7, c3, -c7, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
88 c4, -c6, c4, -c6, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
89 -c4, c2, c4, -c2, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
90 c5, -c1, c3, -c1, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
91 c7, c3, c7, -c5 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
92
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
93 static inline void mmxext_row_head (int16_t * const row, const int offset,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
94 const int16_t * const table)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
95 {
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
96 movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
97
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
98 movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
99 movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
100
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
101 movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
102 movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
103
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
104 movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
105 pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
106
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
107 pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
108 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
109
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
110 static inline void mmxext_row (const int16_t * const table,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
111 const int32_t * const rounder)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
112 {
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
113 movq_m2r (*(table+8), mm1); /* mm1 = -C5 -C1 C3 C1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
114 pmaddwd_r2r (mm2, mm4); /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
115
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
116 pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
117 pshufw_r2r (mm6, mm6, 0x4e); /* mm6 = x3 x1 x7 x5 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
118
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
119 movq_m2r (*(table+12), mm7); /* mm7 = -C7 C3 C7 C5 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
120 pmaddwd_r2r (mm5, mm1); /* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
121
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
122 paddd_m2r (*rounder, mm3); /* mm3 += rounder */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
123 pmaddwd_r2r (mm6, mm7); /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
124
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
125 pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
126 paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
127
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
128 pmaddwd_m2r (*(table+24), mm5); /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
129 movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
130
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
131 pmaddwd_m2r (*(table+28), mm6); /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
132 paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
133
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
134 paddd_m2r (*rounder, mm0); /* mm0 += rounder */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
135 psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
136
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
137 psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
138 paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
139
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
140 paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
141 psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
142
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
143 paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
144 movq_r2r (mm0, mm4); /* mm4 = a3 a2 + rounder */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
145
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
146 paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
147 psubd_r2r (mm5, mm4); /* mm4 = a3-b3 a2-b2 + rounder */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
148 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
149
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
150 static inline void mmxext_row_tail (int16_t * const row, const int store)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
151 {
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
152 psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
153
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
154 psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
155
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
156 packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
157
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
158 packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
159
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
160 movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
161 pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
162
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
163 /* slot */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
164
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
165 movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
166 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
167
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
168 static inline void mmxext_row_mid (int16_t * const row, const int store,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
169 const int offset,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
170 const int16_t * const table)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
171 {
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
172 movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
173 psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
174
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
175 movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
176 psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
177
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
178 packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
179 movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
180
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
181 packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
182 movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
183
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
184 movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
185 pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
186
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
187 movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
188 movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
189
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
190 pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
191
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
192 movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
193 pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
194 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
195
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
196
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
197 /* MMX row IDCT */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
198
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
199 #define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
200 c4, c6, -c4, -c2, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
201 c1, c3, c3, -c7, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
202 c5, c7, -c1, -c5, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
203 c4, -c6, c4, -c2, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
204 -c4, c2, c4, -c6, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
205 c5, -c1, c7, -c5, \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
206 c7, c3, c3, -c1 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
207
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
208 static inline void mmx_row_head (int16_t * const row, const int offset,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
209 const int16_t * const table)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
210 {
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
211 movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
212
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
213 movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
214 movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
215
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
216 movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
217 movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
218
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
219 punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
220
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
221 movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
222 pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
223
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
224 movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
225 punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
226 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
227
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
228 static inline void mmx_row (const int16_t * const table,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
229 const int32_t * const rounder)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
230 {
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
231 pmaddwd_r2r (mm2, mm4); /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
232 punpckldq_r2r (mm5, mm5); /* mm5 = x3 x1 x3 x1 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
233
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
234 pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
235 punpckhdq_r2r (mm6, mm6); /* mm6 = x7 x5 x7 x5 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
236
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
237 movq_m2r (*(table+12), mm7); /* mm7 = -C5 -C1 C7 C5 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
238 pmaddwd_r2r (mm5, mm1); /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
239
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
240 paddd_m2r (*rounder, mm3); /* mm3 += rounder */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
241 pmaddwd_r2r (mm6, mm7); /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
242
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
243 pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
244 paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
245
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
246 pmaddwd_m2r (*(table+24), mm5); /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
247 movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
248
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
249 pmaddwd_m2r (*(table+28), mm6); /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
250 paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
251
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
252 paddd_m2r (*rounder, mm0); /* mm0 += rounder */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
253 psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
254
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
255 psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
256 paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
257
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
258 paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
259 psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
260
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
261 paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
262 movq_r2r (mm0, mm7); /* mm7 = a3 a2 + rounder */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
263
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
264 paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
265 psubd_r2r (mm5, mm7); /* mm7 = a3-b3 a2-b2 + rounder */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
266 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
267
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
268 static inline void mmx_row_tail (int16_t * const row, const int store)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
269 {
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
270 psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
271
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
272 psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
273
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
274 packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
275
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
276 packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
277
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
278 movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
279 movq_r2r (mm7, mm4); /* mm4 = y6 y7 y4 y5 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
280
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
281 pslld_i2r (16, mm7); /* mm7 = y7 0 y5 0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
282
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
283 psrld_i2r (16, mm4); /* mm4 = 0 y6 0 y4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
284
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
285 por_r2r (mm4, mm7); /* mm7 = y7 y6 y5 y4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
286
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
287 /* slot */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
288
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
289 movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
290 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
291
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
292 static inline void mmx_row_mid (int16_t * const row, const int store,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
293 const int offset, const int16_t * const table)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
294 {
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
295 movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
296 psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
297
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
298 movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
299 psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
300
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
301 packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
302 movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
303
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
304 packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
305 movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
306
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
307 movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
308 movq_r2r (mm7, mm1); /* mm1 = y6 y7 y4 y5 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
309
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
310 punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
311 psrld_i2r (16, mm7); /* mm7 = 0 y6 0 y4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
312
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
313 movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
314 pslld_i2r (16, mm1); /* mm1 = y7 0 y5 0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
315
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
316 movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
317 por_r2r (mm1, mm7); /* mm7 = y7 y6 y5 y4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
318
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
319 movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
320 punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
321
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
322 movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
323 pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
324 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
325
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
326
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
327 #if 0
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
328 /* C column IDCT - its just here to document the MMXEXT and MMX versions */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
329 static inline void idct_col (int16_t * col, int offset)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
330 {
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
331 /* multiplication - as implemented on mmx */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
332 #define F(c,x) (((c) * (x)) >> 16)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
333
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
334 /* saturation - it helps us handle torture test cases */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
335 #define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
336
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
337 int16_t x0, x1, x2, x3, x4, x5, x6, x7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
338 int16_t y0, y1, y2, y3, y4, y5, y6, y7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
339 int16_t a0, a1, a2, a3, b0, b1, b2, b3;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
340 int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
341
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
342 col += offset;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
343
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
344 x0 = col[0*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
345 x1 = col[1*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
346 x2 = col[2*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
347 x3 = col[3*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
348 x4 = col[4*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
349 x5 = col[5*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
350 x6 = col[6*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
351 x7 = col[7*8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
352
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
353 u04 = S (x0 + x4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
354 v04 = S (x0 - x4);
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
355 u26 = S (F (T2, x6) + x2);
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
356 v26 = S (F (T2, x2) - x6);
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
357
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
358 a0 = S (u04 + u26);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
359 a1 = S (v04 + v26);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
360 a2 = S (v04 - v26);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
361 a3 = S (u04 - u26);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
362
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
363 u17 = S (F (T1, x7) + x1);
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
364 v17 = S (F (T1, x1) - x7);
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
365 u35 = S (F (T3, x5) + x3);
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
366 v35 = S (F (T3, x3) - x5);
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
367
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
368 b0 = S (u17 + u35);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
369 b3 = S (v17 - v35);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
370 u12 = S (u17 - u35);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
371 v12 = S (v17 + v35);
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
372 u12 = S (2 * F (C4, u12));
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
373 v12 = S (2 * F (C4, v12));
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
374 b1 = S (u12 + v12);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
375 b2 = S (u12 - v12);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
376
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
377 y0 = S (a0 + b0) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
378 y1 = S (a1 + b1) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
379 y2 = S (a2 + b2) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
380 y3 = S (a3 + b3) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
381
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
382 y4 = S (a3 - b3) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
383 y5 = S (a2 - b2) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
384 y6 = S (a1 - b1) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
385 y7 = S (a0 - b0) >> COL_SHIFT;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
386
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
387 col[0*8] = y0;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
388 col[1*8] = y1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
389 col[2*8] = y2;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
390 col[3*8] = y3;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
391 col[4*8] = y4;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
392 col[5*8] = y5;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
393 col[6*8] = y6;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
394 col[7*8] = y7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
395 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
396 #endif
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
397
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
398
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
399 /* MMX column IDCT */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
400 static inline void idct_col (int16_t * const col, const int offset)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
401 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
402 #define T1 13036
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
403 #define T2 27146
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
404 #define T3 43790
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
405 #define C4 23170
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
406
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
407 static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
408 static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
409 static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
410 static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
411
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
412 /* column code adapted from peter gubanov */
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
413 /* http://www.elecard.com/peter/idct.shtml */
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
414
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
415 movq_m2r (*_T1, mm0); /* mm0 = T1 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
416
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
417 movq_m2r (*(col+offset+1*8), mm1); /* mm1 = x1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
418 movq_r2r (mm0, mm2); /* mm2 = T1 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
419
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
420 movq_m2r (*(col+offset+7*8), mm4); /* mm4 = x7 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
421 pmulhw_r2r (mm1, mm0); /* mm0 = T1*x1 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
422
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
423 movq_m2r (*_T3, mm5); /* mm5 = T3 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
424 pmulhw_r2r (mm4, mm2); /* mm2 = T1*x7 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
425
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
426 movq_m2r (*(col+offset+5*8), mm6); /* mm6 = x5 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
427 movq_r2r (mm5, mm7); /* mm7 = T3-1 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
428
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
429 movq_m2r (*(col+offset+3*8), mm3); /* mm3 = x3 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
430 psubsw_r2r (mm4, mm0); /* mm0 = v17 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
431
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
432 movq_m2r (*_T2, mm4); /* mm4 = T2 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
433 pmulhw_r2r (mm3, mm5); /* mm5 = (T3-1)*x3 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
434
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
435 paddsw_r2r (mm2, mm1); /* mm1 = u17 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
436 pmulhw_r2r (mm6, mm7); /* mm7 = (T3-1)*x5 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
437
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
438 /* slot */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
439
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
440 movq_r2r (mm4, mm2); /* mm2 = T2 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
441 paddsw_r2r (mm3, mm5); /* mm5 = T3*x3 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
442
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
443 pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
444 paddsw_r2r (mm6, mm7); /* mm7 = T3*x5 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
445
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
446 psubsw_r2r (mm6, mm5); /* mm5 = v35 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
447 paddsw_r2r (mm3, mm7); /* mm7 = u35 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
448
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
449 movq_m2r (*(col+offset+6*8), mm3); /* mm3 = x6 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
450 movq_r2r (mm0, mm6); /* mm6 = v17 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
451
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
452 pmulhw_r2r (mm3, mm2); /* mm2 = T2*x6 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
453 psubsw_r2r (mm5, mm0); /* mm0 = b3 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
454
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
455 psubsw_r2r (mm3, mm4); /* mm4 = v26 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
456 paddsw_r2r (mm6, mm5); /* mm5 = v12 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
457
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
458 movq_r2m (mm0, *(col+offset+3*8)); /* save b3 in scratch0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
459 movq_r2r (mm1, mm6); /* mm6 = u17 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
460
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
461 paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
462 paddsw_r2r (mm7, mm6); /* mm6 = b0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
463
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
464 psubsw_r2r (mm7, mm1); /* mm1 = u12 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
465 movq_r2r (mm1, mm7); /* mm7 = u12 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
466
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
467 movq_m2r (*(col+offset+0*8), mm3); /* mm3 = x0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
468 paddsw_r2r (mm5, mm1); /* mm1 = u12+v12 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
469
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
470 movq_m2r (*_C4, mm0); /* mm0 = C4/2 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
471 psubsw_r2r (mm5, mm7); /* mm7 = u12-v12 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
472
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
473 movq_r2m (mm6, *(col+offset+5*8)); /* save b0 in scratch1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
474 pmulhw_r2r (mm0, mm1); /* mm1 = b1/2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
475
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
476 movq_r2r (mm4, mm6); /* mm6 = v26 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
477 pmulhw_r2r (mm0, mm7); /* mm7 = b2/2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
478
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
479 movq_m2r (*(col+offset+4*8), mm5); /* mm5 = x4 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
480 movq_r2r (mm3, mm0); /* mm0 = x0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
481
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
482 psubsw_r2r (mm5, mm3); /* mm3 = v04 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
483 paddsw_r2r (mm5, mm0); /* mm0 = u04 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
484
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
485 paddsw_r2r (mm3, mm4); /* mm4 = a1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
486 movq_r2r (mm0, mm5); /* mm5 = u04 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
487
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
488 psubsw_r2r (mm6, mm3); /* mm3 = a2 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
489 paddsw_r2r (mm2, mm5); /* mm5 = a0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
490
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
491 paddsw_r2r (mm1, mm1); /* mm1 = b1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
492 psubsw_r2r (mm2, mm0); /* mm0 = a3 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
493
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
494 paddsw_r2r (mm7, mm7); /* mm7 = b2 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
495 movq_r2r (mm3, mm2); /* mm2 = a2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
496
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
497 movq_r2r (mm4, mm6); /* mm6 = a1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
498 paddsw_r2r (mm7, mm3); /* mm3 = a2+b2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
499
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
500 psraw_i2r (COL_SHIFT, mm3); /* mm3 = y2 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
501 paddsw_r2r (mm1, mm4); /* mm4 = a1+b1 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
502
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
503 psraw_i2r (COL_SHIFT, mm4); /* mm4 = y1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
504 psubsw_r2r (mm1, mm6); /* mm6 = a1-b1 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
505
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
506 movq_m2r (*(col+offset+5*8), mm1); /* mm1 = b0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
507 psubsw_r2r (mm7, mm2); /* mm2 = a2-b2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
508
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
509 psraw_i2r (COL_SHIFT, mm6); /* mm6 = y6 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
510 movq_r2r (mm5, mm7); /* mm7 = a0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
511
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
512 movq_r2m (mm4, *(col+offset+1*8)); /* save y1 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
513 psraw_i2r (COL_SHIFT, mm2); /* mm2 = y5 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
514
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
515 movq_r2m (mm3, *(col+offset+2*8)); /* save y2 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
516 paddsw_r2r (mm1, mm5); /* mm5 = a0+b0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
517
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
518 movq_m2r (*(col+offset+3*8), mm4); /* mm4 = b3 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
519 psubsw_r2r (mm1, mm7); /* mm7 = a0-b0 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
520
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
521 psraw_i2r (COL_SHIFT, mm5); /* mm5 = y0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
522 movq_r2r (mm0, mm3); /* mm3 = a3 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
523
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
524 movq_r2m (mm2, *(col+offset+5*8)); /* save y5 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
525 psubsw_r2r (mm4, mm3); /* mm3 = a3-b3 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
526
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
527 psraw_i2r (COL_SHIFT, mm7); /* mm7 = y7 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
528 paddsw_r2r (mm0, mm4); /* mm4 = a3+b3 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
529
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
530 movq_r2m (mm5, *(col+offset+0*8)); /* save y0 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
531 psraw_i2r (COL_SHIFT, mm3); /* mm3 = y4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
532
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
533 movq_r2m (mm6, *(col+offset+6*8)); /* save y6 */
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
534 psraw_i2r (COL_SHIFT, mm4); /* mm4 = y3 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
535
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
536 movq_r2m (mm7, *(col+offset+7*8)); /* save y7 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
537
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
538 movq_r2m (mm3, *(col+offset+4*8)); /* save y4 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
539
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
540 movq_r2m (mm4, *(col+offset+3*8)); /* save y3 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
541 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
542
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
543
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
544 static const int32_t rounder0[] ATTR_ALIGN(8) =
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
545 rounder ((1 << (COL_SHIFT - 1)) - 0.5);
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
546 static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
547 static const int32_t rounder1[] ATTR_ALIGN(8) =
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
548 rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
549 static const int32_t rounder7[] ATTR_ALIGN(8) =
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
550 rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
551 static const int32_t rounder2[] ATTR_ALIGN(8) =
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
552 rounder (0.60355339059); /* C2 * (C6+C2)/2 */
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
553 static const int32_t rounder6[] ATTR_ALIGN(8) =
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
554 rounder (-0.25); /* C2 * (C6-C2)/2 */
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
555 static const int32_t rounder3[] ATTR_ALIGN(8) =
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
556 rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
557 static const int32_t rounder5[] ATTR_ALIGN(8) =
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
558 rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
559
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
560
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
561 #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
562 static inline void idct (int16_t * const block) \
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
563 { \
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
564 static const int16_t table04[] ATTR_ALIGN(16) = \
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
565 table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
566 static const int16_t table17[] ATTR_ALIGN(16) = \
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
567 table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
568 static const int16_t table26[] ATTR_ALIGN(16) = \
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
569 table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
570 static const int16_t table35[] ATTR_ALIGN(16) = \
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
571 table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
572 \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
573 idct_row_head (block, 0*8, table04); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
574 idct_row (table04, rounder0); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
575 idct_row_mid (block, 0*8, 4*8, table04); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
576 idct_row (table04, rounder4); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
577 idct_row_mid (block, 4*8, 1*8, table17); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
578 idct_row (table17, rounder1); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
579 idct_row_mid (block, 1*8, 7*8, table17); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
580 idct_row (table17, rounder7); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
581 idct_row_mid (block, 7*8, 2*8, table26); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
582 idct_row (table26, rounder2); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
583 idct_row_mid (block, 2*8, 6*8, table26); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
584 idct_row (table26, rounder6); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
585 idct_row_mid (block, 6*8, 3*8, table35); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
586 idct_row (table35, rounder3); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
587 idct_row_mid (block, 3*8, 5*8, table35); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
588 idct_row (table35, rounder5); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
589 idct_row_tail (block, 5*8); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
590 \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
591 idct_col (block, 0); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
592 idct_col (block, 4); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
593 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
594
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
595
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
596 #define COPY_MMX(offset,r0,r1,r2) \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
597 do { \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
598 movq_m2r (*(block+offset), r0); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
599 dest += stride; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
600 movq_m2r (*(block+offset+4), r1); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
601 movq_r2m (r2, *dest); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
602 packuswb_r2r (r1, r0); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
603 } while (0)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
604
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
605 static inline void block_copy (int16_t * const block, uint8_t * dest,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
606 const int stride)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
607 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
608 movq_m2r (*(block+0*8), mm0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
609 movq_m2r (*(block+0*8+4), mm1);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
610 movq_m2r (*(block+1*8), mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
611 packuswb_r2r (mm1, mm0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
612 movq_m2r (*(block+1*8+4), mm3);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
613 movq_r2m (mm0, *dest);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
614 packuswb_r2r (mm3, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
615 COPY_MMX (2*8, mm0, mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
616 COPY_MMX (3*8, mm2, mm3, mm0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
617 COPY_MMX (4*8, mm0, mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
618 COPY_MMX (5*8, mm2, mm3, mm0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
619 COPY_MMX (6*8, mm0, mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
620 COPY_MMX (7*8, mm2, mm3, mm0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
621 movq_r2m (mm2, *(dest+stride));
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
622 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
623
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
624
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
625 #define ADD_MMX(offset,r1,r2,r3,r4) \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
626 do { \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
627 movq_m2r (*(dest+2*stride), r1); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
628 packuswb_r2r (r4, r3); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
629 movq_r2r (r1, r2); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
630 dest += stride; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
631 movq_r2m (r3, *dest); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
632 punpcklbw_r2r (mm0, r1); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
633 paddsw_m2r (*(block+offset), r1); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
634 punpckhbw_r2r (mm0, r2); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
635 paddsw_m2r (*(block+offset+4), r2); \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
636 } while (0)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
637
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
638 static inline void block_add (int16_t * const block, uint8_t * dest,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
639 const int stride)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
640 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
641 movq_m2r (*dest, mm1);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
642 pxor_r2r (mm0, mm0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
643 movq_m2r (*(dest+stride), mm3);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
644 movq_r2r (mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
645 punpcklbw_r2r (mm0, mm1);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
646 movq_r2r (mm3, mm4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
647 paddsw_m2r (*(block+0*8), mm1);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
648 punpckhbw_r2r (mm0, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
649 paddsw_m2r (*(block+0*8+4), mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
650 punpcklbw_r2r (mm0, mm3);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
651 paddsw_m2r (*(block+1*8), mm3);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
652 packuswb_r2r (mm2, mm1);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
653 punpckhbw_r2r (mm0, mm4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
654 movq_r2m (mm1, *dest);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
655 paddsw_m2r (*(block+1*8+4), mm4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
656 ADD_MMX (2*8, mm1, mm2, mm3, mm4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
657 ADD_MMX (3*8, mm3, mm4, mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
658 ADD_MMX (4*8, mm1, mm2, mm3, mm4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
659 ADD_MMX (5*8, mm3, mm4, mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
660 ADD_MMX (6*8, mm1, mm2, mm3, mm4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
661 ADD_MMX (7*8, mm3, mm4, mm1, mm2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
662 packuswb_r2r (mm4, mm3);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
663 movq_r2m (mm3, *(dest+stride));
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
664 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
665
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
666
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
667 static inline void block_zero (int16_t * const block)
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
668 {
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
669 pxor_r2r (mm0, mm0);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
670 movq_r2m (mm0, *(block+0*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
671 movq_r2m (mm0, *(block+1*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
672 movq_r2m (mm0, *(block+2*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
673 movq_r2m (mm0, *(block+3*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
674 movq_r2m (mm0, *(block+4*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
675 movq_r2m (mm0, *(block+5*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
676 movq_r2m (mm0, *(block+6*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
677 movq_r2m (mm0, *(block+7*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
678 movq_r2m (mm0, *(block+8*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
679 movq_r2m (mm0, *(block+9*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
680 movq_r2m (mm0, *(block+10*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
681 movq_r2m (mm0, *(block+11*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
682 movq_r2m (mm0, *(block+12*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
683 movq_r2m (mm0, *(block+13*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
684 movq_r2m (mm0, *(block+14*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
685 movq_r2m (mm0, *(block+15*4));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
686 }
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
687
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
688
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
689 #define CPU_MMXEXT 0
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
690 #define CPU_MMX 1
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
691
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
692 #define dup4(reg) \
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
693 do { \
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
694 if (cpu != CPU_MMXEXT) { \
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
695 punpcklwd_r2r (reg, reg); \
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
696 punpckldq_r2r (reg, reg); \
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
697 } else \
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
698 pshufw_r2r (reg, reg, 0x00); \
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
699 } while (0)
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
700
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
701 static inline void block_add_DC (int16_t * const block, uint8_t * dest,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
702 const int stride, const int cpu)
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
703 {
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
704 movd_v2r ((block[0] + 4) >> 3, mm0);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
705 pxor_r2r (mm1, mm1);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
706 movq_m2r (*dest, mm2);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
707 dup4 (mm0);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
708 psubsw_r2r (mm0, mm1);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
709 packuswb_r2r (mm0, mm0);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
710 paddusb_r2r (mm0, mm2);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
711 packuswb_r2r (mm1, mm1);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
712 movq_m2r (*(dest + stride), mm3);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
713 psubusb_r2r (mm1, mm2);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
714 block[0] = 0;
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
715 paddusb_r2r (mm0, mm3);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
716 movq_r2m (mm2, *dest);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
717 psubusb_r2r (mm1, mm3);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
718 movq_m2r (*(dest + 2*stride), mm2);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
719 dest += stride;
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
720 movq_r2m (mm3, *dest);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
721 paddusb_r2r (mm0, mm2);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
722 movq_m2r (*(dest + 2*stride), mm3);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
723 psubusb_r2r (mm1, mm2);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
724 dest += stride;
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
725 paddusb_r2r (mm0, mm3);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
726 movq_r2m (mm2, *dest);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
727 psubusb_r2r (mm1, mm3);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
728 movq_m2r (*(dest + 2*stride), mm2);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
729 dest += stride;
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
730 movq_r2m (mm3, *dest);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
731 paddusb_r2r (mm0, mm2);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
732 movq_m2r (*(dest + 2*stride), mm3);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
733 psubusb_r2r (mm1, mm2);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
734 dest += stride;
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
735 paddusb_r2r (mm0, mm3);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
736 movq_r2m (mm2, *dest);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
737 psubusb_r2r (mm1, mm3);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
738 movq_m2r (*(dest + 2*stride), mm2);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
739 dest += stride;
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
740 movq_r2m (mm3, *dest);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
741 paddusb_r2r (mm0, mm2);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
742 movq_m2r (*(dest + 2*stride), mm3);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
743 psubusb_r2r (mm1, mm2);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
744 block[63] = 0;
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
745 paddusb_r2r (mm0, mm3);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
746 movq_r2m (mm2, *(dest + stride));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
747 psubusb_r2r (mm1, mm3);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
748 movq_r2m (mm3, *(dest + 2*stride));
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
749 }
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
750
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
751
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
752 declare_idct (mmxext_idct, mmxext_table,
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
753 mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
754
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
755 void mpeg2_idct_copy_mmxext (int16_t * const block, uint8_t * const dest,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
756 const int stride)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
757 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
758 mmxext_idct (block);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
759 block_copy (block, dest, stride);
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
760 block_zero (block);
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
761 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
762
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
763 void mpeg2_idct_add_mmxext (const int last, int16_t * const block,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
764 uint8_t * const dest, const int stride)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
765 {
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
766 if (last != 129 || (block[0] & 7) == 4) {
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
767 mmxext_idct (block);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
768 block_add (block, dest, stride);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
769 block_zero (block);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
770 } else
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
771 block_add_DC (block, dest, stride, CPU_MMXEXT);
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
772 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
773
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
774
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
775 declare_idct (mmx_idct, mmx_table,
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
776 mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
777
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
778 void mpeg2_idct_copy_mmx (int16_t * const block, uint8_t * const dest,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
779 const int stride)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
780 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
781 mmx_idct (block);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
782 block_copy (block, dest, stride);
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
783 block_zero (block);
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
784 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
785
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
786 void mpeg2_idct_add_mmx (const int last, int16_t * const block,
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
787 uint8_t * const dest, const int stride)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
788 {
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
789 if (last != 129 || (block[0] & 7) == 4) {
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
790 mmx_idct (block);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
791 block_add (block, dest, stride);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
792 block_zero (block);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
793 } else
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
794 block_add_DC (block, dest, stride, CPU_MMX);
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
795 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
796
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
797
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
798 void mpeg2_idct_mmx_init (void)
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
799 {
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
800 extern uint8_t mpeg2_scan_norm[64];
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
801 extern uint8_t mpeg2_scan_alt[64];
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
802 int i, j;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
803
36
846535ace7a2 libmpeg2-0.2.0 merge
arpi_esp
parents: 1
diff changeset
804 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
805
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
806 for (i = 0; i < 64; i++) {
9852
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
807 j = mpeg2_scan_norm[i];
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
808 mpeg2_scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
809 j = mpeg2_scan_alt[i];
47984e3f54ce Importing libmpeg2 from mpeg2dec-0.3.1
arpi
parents: 1437
diff changeset
810 mpeg2_scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
811 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
812 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
813
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
814 #endif