annotate x86/idct_mmx.c @ 12494:94eaea836bf4 libavcodec

Check avctx width/height more thoroughly (e.g. all values 0 except width would have been accepted before). Also do not fail if they are invalid but instead override them to 0. This allows decoding e.g. MPEG video when only the container values are corrupted. For encoding a value of 0,0 of course makes no sense, but was allowed through before and will be caught by an extra check in the encode function.
author reimar
date Wed, 15 Sep 2010 04:46:55 +0000
parents be9129c5503e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1 /*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2 * idct_mmx.c
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
3 * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
4 *
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
5 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
6 * See http://libmpeg2.sourceforge.net/ for updates.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
7 *
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
8 * mpeg2dec is free software; you can redistribute it and/or modify
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
9 * it under the terms of the GNU General Public License as published by
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
10 * the Free Software Foundation; either version 2 of the License, or
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
11 * (at your option) any later version.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
12 *
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
13 * mpeg2dec is distributed in the hope that it will be useful,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
16 * GNU General Public License for more details.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
17 *
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
18 * You should have received a copy of the GNU General Public License
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
19 * along with mpeg2dec; if not, write to the Free Software
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
21 */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
22
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
23 #include "libavutil/common.h"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
24 #include "libavcodec/dsputil.h"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
25
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
26 #include "libavutil/x86_cpu.h"
11381
f5ccf2e590d6 x86: move function prototypes to header files
mru
parents: 8430
diff changeset
27 #include "dsputil_mmx.h"
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
28
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
29 #define ROW_SHIFT 11
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
30 #define COL_SHIFT 6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
31
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
32 #define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
33 #define rounder(bias) {round (bias), round (bias)}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
34
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
35
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
36 #if 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
37 /* C row IDCT - it is just here to document the MMXEXT and MMX versions */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
38 static inline void idct_row (int16_t * row, int offset,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
39 int16_t * table, int32_t * rounder)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
40 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
41 int C1, C2, C3, C4, C5, C6, C7;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
42 int a0, a1, a2, a3, b0, b1, b2, b3;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
43
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
44 row += offset;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
45
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
46 C1 = table[1];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
47 C2 = table[2];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
48 C3 = table[3];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
49 C4 = table[4];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
50 C5 = table[5];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
51 C6 = table[6];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
52 C7 = table[7];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
53
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
54 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
55 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
56 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
57 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
58
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
59 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
60 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
61 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
62 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
63
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
64 row[0] = (a0 + b0) >> ROW_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
65 row[1] = (a1 + b1) >> ROW_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
66 row[2] = (a2 + b2) >> ROW_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
67 row[3] = (a3 + b3) >> ROW_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
68 row[4] = (a3 - b3) >> ROW_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
69 row[5] = (a2 - b2) >> ROW_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
70 row[6] = (a1 - b1) >> ROW_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
71 row[7] = (a0 - b0) >> ROW_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
72 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
73 #endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
74
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
75
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
76 /* MMXEXT row IDCT */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
77
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
78 #define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
79 c4, c6, c4, c6, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
80 c1, c3, -c1, -c5, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
81 c5, c7, c3, -c7, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
82 c4, -c6, c4, -c6, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
83 -c4, c2, c4, -c2, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
84 c5, -c1, c3, -c1, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
85 c7, c3, c7, -c5 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
86
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
87 static inline void mmxext_row_head (int16_t * const row, const int offset,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
88 const int16_t * const table)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
89 {
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
90 __asm__ volatile(
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
91 "movq (%0), %%mm2 \n\t" /* mm2 = x6 x4 x2 x0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
92
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
93 "movq 8(%0), %%mm5 \n\t" /* mm5 = x7 x5 x3 x1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
94 "movq %%mm2, %%mm0 \n\t" /* mm0 = x6 x4 x2 x0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
95
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
96 "movq (%1), %%mm3 \n\t" /* mm3 = -C2 -C4 C2 C4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
97 "movq %%mm5, %%mm6 \n\t" /* mm6 = x7 x5 x3 x1 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
98
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
99 "movq 8(%1), %%mm4 \n\t" /* mm4 = C6 C4 C6 C4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
100 "pmaddwd %%mm0, %%mm3 \n\t" /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
101
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
102 "pshufw $0x4e, %%mm2, %%mm2 \n\t" /* mm2 = x2 x0 x6 x4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
103 :: "r" ((row+offset)), "r" (table)
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
104 );
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
105 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
106
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
107 static inline void mmxext_row (const int16_t * const table,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
108 const int32_t * const rounder)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
109 {
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
110 __asm__ volatile (
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
111 "movq 16(%0), %%mm1 \n\t" /* mm1 = -C5 -C1 C3 C1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
112 "pmaddwd %%mm2, %%mm4 \n\t" /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
113
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
114 "pmaddwd 32(%0), %%mm0 \n\t" /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
115 "pshufw $0x4e, %%mm6, %%mm6 \n\t" /* mm6 = x3 x1 x7 x5 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
116
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
117 "movq 24(%0), %%mm7 \n\t" /* mm7 = -C7 C3 C7 C5 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
118 "pmaddwd %%mm5, %%mm1 \n\t" /* mm1= -C1*x5-C5*x7 C1*x1+C3*x3 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
119
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
120 "paddd (%1), %%mm3 \n\t" /* mm3 += rounder */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
121 "pmaddwd %%mm6, %%mm7 \n\t" /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
122
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
123 "pmaddwd 40(%0), %%mm2 \n\t" /* mm2= C4*x0-C2*x2 -C4*x4+C2*x6 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
124 "paddd %%mm4, %%mm3 \n\t" /* mm3 = a1 a0 + rounder */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
125
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
126 "pmaddwd 48(%0), %%mm5 \n\t" /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
127 "movq %%mm3, %%mm4 \n\t" /* mm4 = a1 a0 + rounder */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
128
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
129 "pmaddwd 56(%0), %%mm6 \n\t" /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
130 "paddd %%mm7, %%mm1 \n\t" /* mm1 = b1 b0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
131
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
132 "paddd (%1), %%mm0 \n\t" /* mm0 += rounder */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
133 "psubd %%mm1, %%mm3 \n\t" /* mm3 = a1-b1 a0-b0 + rounder */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
134
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
135 "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm3 \n\t" /* mm3 = y6 y7 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
136 "paddd %%mm4, %%mm1 \n\t" /* mm1 = a1+b1 a0+b0 + rounder */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
137
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
138 "paddd %%mm2, %%mm0 \n\t" /* mm0 = a3 a2 + rounder */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
139 "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm1 \n\t" /* mm1 = y1 y0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
140
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
141 "paddd %%mm6, %%mm5 \n\t" /* mm5 = b3 b2 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
142 "movq %%mm0, %%mm4 \n\t" /* mm4 = a3 a2 + rounder */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
143
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
144 "paddd %%mm5, %%mm0 \n\t" /* mm0 = a3+b3 a2+b2 + rounder */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
145 "psubd %%mm5, %%mm4 \n\t" /* mm4 = a3-b3 a2-b2 + rounder */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
146 : : "r" (table), "r" (rounder));
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
147 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
148
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
149 static inline void mmxext_row_tail (int16_t * const row, const int store)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
150 {
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
151 __asm__ volatile (
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
152 "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm0 \n\t" /* mm0 = y3 y2 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
153
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
154 "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm4 \n\t" /* mm4 = y4 y5 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
155
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
156 "packssdw %%mm0, %%mm1 \n\t" /* mm1 = y3 y2 y1 y0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
157
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
158 "packssdw %%mm3, %%mm4 \n\t" /* mm4 = y6 y7 y4 y5 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
159
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
160 "movq %%mm1, (%0) \n\t" /* save y3 y2 y1 y0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
161 "pshufw $0xb1, %%mm4, %%mm4 \n\t" /* mm4 = y7 y6 y5 y4 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
162
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
163 /* slot */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
164
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
165 "movq %%mm4, 8(%0) \n\t" /* save y7 y6 y5 y4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
166 :: "r" (row+store)
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
167 );
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
168 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
169
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
170 static inline void mmxext_row_mid (int16_t * const row, const int store,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
171 const int offset,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
172 const int16_t * const table)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
173 {
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
174 __asm__ volatile (
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
175 "movq (%0,%1), %%mm2 \n\t" /* mm2 = x6 x4 x2 x0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
176 "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm0 \n\t" /* mm0 = y3 y2 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
177
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
178 "movq 8(%0,%1), %%mm5 \n\t" /* mm5 = x7 x5 x3 x1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
179 "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm4 \n\t" /* mm4 = y4 y5 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
180
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
181 "packssdw %%mm0, %%mm1 \n\t" /* mm1 = y3 y2 y1 y0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
182 "movq %%mm5, %%mm6 \n\t" /* mm6 = x7 x5 x3 x1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
183
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
184 "packssdw %%mm3, %%mm4 \n\t" /* mm4 = y6 y7 y4 y5 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
185 "movq %%mm2, %%mm0 \n\t" /* mm0 = x6 x4 x2 x0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
186
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
187 "movq %%mm1, (%0,%2) \n\t" /* save y3 y2 y1 y0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
188 "pshufw $0xb1, %%mm4, %%mm4\n\t" /* mm4 = y7 y6 y5 y4 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
189
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
190 "movq (%3), %%mm3 \n\t" /* mm3 = -C2 -C4 C2 C4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
191 "movq %%mm4, 8(%0,%2) \n\t" /* save y7 y6 y5 y4 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
192
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
193 "pmaddwd %%mm0, %%mm3 \n\t" /* mm3= -C4*x4-C2*x6 C4*x0+C2*x2 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
194
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
195 "movq 8(%3), %%mm4 \n\t" /* mm4 = C6 C4 C6 C4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
196 "pshufw $0x4e, %%mm2, %%mm2\n\t" /* mm2 = x2 x0 x6 x4 */
12297
be9129c5503e Fix compilation in x86_64. I broke it with r24580.
vitor
parents: 12296
diff changeset
197 :: "r" (row), "r" ((x86_reg) (2*offset)), "r" ((x86_reg) (2*store)), "r" (table)
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
198 );
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
199 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
200
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
201
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
202 /* MMX row IDCT */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
203
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
204 #define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
205 c4, c6, -c4, -c2, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
206 c1, c3, c3, -c7, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
207 c5, c7, -c1, -c5, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
208 c4, -c6, c4, -c2, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
209 -c4, c2, c4, -c6, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
210 c5, -c1, c7, -c5, \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
211 c7, c3, c3, -c1 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
212
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
213 static inline void mmx_row_head (int16_t * const row, const int offset,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
214 const int16_t * const table)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
215 {
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
216 __asm__ volatile (
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
217 "movq (%0), %%mm2 \n\t" /* mm2 = x6 x4 x2 x0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
218
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
219 "movq 8(%0), %%mm5 \n\t" /* mm5 = x7 x5 x3 x1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
220 "movq %%mm2, %%mm0 \n\t" /* mm0 = x6 x4 x2 x0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
221
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
222 "movq (%1), %%mm3 \n\t" /* mm3 = C6 C4 C2 C4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
223 "movq %%mm5, %%mm6 \n\t" /* mm6 = x7 x5 x3 x1 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
224
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
225 "punpckldq %%mm0, %%mm0 \n\t" /* mm0 = x2 x0 x2 x0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
226
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
227 "movq 8(%1), %%mm4 \n\t" /* mm4 = -C2 -C4 C6 C4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
228 "pmaddwd %%mm0, %%mm3 \n\t" /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
229
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
230 "movq 16(%1), %%mm1 \n\t" /* mm1 = -C7 C3 C3 C1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
231 "punpckhdq %%mm2, %%mm2 \n\t" /* mm2 = x6 x4 x6 x4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
232 :: "r" ((row+offset)), "r" (table)
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
233 );
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
234 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
235
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
236 static inline void mmx_row (const int16_t * const table,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
237 const int32_t * const rounder)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
238 {
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
239 __asm__ volatile (
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
240 "pmaddwd %%mm2, %%mm4 \n\t" /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
241 "punpckldq %%mm5, %%mm5 \n\t" /* mm5 = x3 x1 x3 x1 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
242
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
243 "pmaddwd 32(%0), %%mm0 \n\t" /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
244 "punpckhdq %%mm6, %%mm6 \n\t" /* mm6 = x7 x5 x7 x5 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
245
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
246 "movq 24(%0), %%mm7 \n\t" /* mm7 = -C5 -C1 C7 C5 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
247 "pmaddwd %%mm5, %%mm1 \n\t" /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
248
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
249 "paddd (%1), %%mm3 \n\t" /* mm3 += rounder */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
250 "pmaddwd %%mm6, %%mm7 \n\t" /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
251
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
252 "pmaddwd 40(%0), %%mm2 \n\t" /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
253 "paddd %%mm4, %%mm3 \n\t" /* mm3 = a1 a0 + rounder */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
254
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
255 "pmaddwd 48(%0), %%mm5 \n\t" /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
256 "movq %%mm3, %%mm4 \n\t" /* mm4 = a1 a0 + rounder */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
257
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
258 "pmaddwd 56(%0), %%mm6 \n\t" /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
259 "paddd %%mm7, %%mm1 \n\t" /* mm1 = b1 b0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
260
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
261 "paddd (%1), %%mm0 \n\t" /* mm0 += rounder */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
262 "psubd %%mm1, %%mm3 \n\t" /* mm3 = a1-b1 a0-b0 + rounder */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
263
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
264 "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm3 \n\t" /* mm3 = y6 y7 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
265 "paddd %%mm4, %%mm1 \n\t" /* mm1 = a1+b1 a0+b0 + rounder */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
266
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
267 "paddd %%mm2, %%mm0 \n\t" /* mm0 = a3 a2 + rounder */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
268 "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm1 \n\t" /* mm1 = y1 y0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
269
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
270 "paddd %%mm6, %%mm5 \n\t" /* mm5 = b3 b2 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
271 "movq %%mm0, %%mm7 \n\t" /* mm7 = a3 a2 + rounder */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
272
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
273 "paddd %%mm5, %%mm0 \n\t" /* mm0 = a3+b3 a2+b2 + rounder */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
274 "psubd %%mm5, %%mm7 \n\t" /* mm7 = a3-b3 a2-b2 + rounder */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
275 :: "r" (table), "r" (rounder)
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
276 );
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
277 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
278
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
279 static inline void mmx_row_tail (int16_t * const row, const int store)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
280 {
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
281 __asm__ volatile (
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
282 "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm0 \n\t" /* mm0 = y3 y2 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
283
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
284 "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm7 \n\t" /* mm7 = y4 y5 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
285
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
286 "packssdw %%mm0, %%mm1 \n\t" /* mm1 = y3 y2 y1 y0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
287
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
288 "packssdw %%mm3, %%mm7 \n\t" /* mm7 = y6 y7 y4 y5 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
289
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
290 "movq %%mm1, (%0) \n\t" /* save y3 y2 y1 y0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
291 "movq %%mm7, %%mm4 \n\t" /* mm4 = y6 y7 y4 y5 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
292
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
293 "pslld $16, %%mm7 \n\t" /* mm7 = y7 0 y5 0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
294
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
295 "psrld $16, %%mm4 \n\t" /* mm4 = 0 y6 0 y4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
297 "por %%mm4, %%mm7 \n\t" /* mm7 = y7 y6 y5 y4 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
298
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
299 /* slot */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
300
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
301 "movq %%mm7, 8(%0) \n\t" /* save y7 y6 y5 y4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
302 :: "r" (row+store)
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
303 );
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
304 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
305
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
306 static inline void mmx_row_mid (int16_t * const row, const int store,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
307 const int offset, const int16_t * const table)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
308 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
309
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
310 __asm__ volatile (
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
311 "movq (%0,%1), %%mm2 \n\t" /* mm2 = x6 x4 x2 x0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
312 "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm0 \n\t" /* mm0 = y3 y2 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
313
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
314 "movq 8(%0,%1), %%mm5 \n\t" /* mm5 = x7 x5 x3 x1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
315 "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm7 \n\t" /* mm7 = y4 y5 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
316
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
317 "packssdw %%mm0, %%mm1 \n\t" /* mm1 = y3 y2 y1 y0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
318 "movq %%mm5, %%mm6 \n\t" /* mm6 = x7 x5 x3 x1 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
319
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
320 "packssdw %%mm3, %%mm7 \n\t" /* mm7 = y6 y7 y4 y5 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
321 "movq %%mm2, %%mm0 \n\t" /* mm0 = x6 x4 x2 x0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
322
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
323 "movq %%mm1, (%0,%2) \n\t" /* save y3 y2 y1 y0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
324 "movq %%mm7, %%mm1 \n\t" /* mm1 = y6 y7 y4 y5 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
325
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
326 "punpckldq %%mm0, %%mm0 \n\t" /* mm0 = x2 x0 x2 x0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
327 "psrld $16, %%mm7 \n\t" /* mm7 = 0 y6 0 y4 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
328
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
329 "movq (%3), %%mm3 \n\t" /* mm3 = C6 C4 C2 C4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
330 "pslld $16, %%mm1 \n\t" /* mm1 = y7 0 y5 0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
331
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
332 "movq 8(%3), %%mm4 \n\t" /* mm4 = -C2 -C4 C6 C4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
333 "por %%mm1, %%mm7 \n\t" /* mm7 = y7 y6 y5 y4 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
334
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
335 "movq 16(%3), %%mm1 \n\t" /* mm1 = -C7 C3 C3 C1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
336 "punpckhdq %%mm2, %%mm2 \n\t" /* mm2 = x6 x4 x6 x4 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
337
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
338 "movq %%mm7, 8(%0,%2) \n\t" /* save y7 y6 y5 y4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
339 "pmaddwd %%mm0, %%mm3 \n\t" /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
12297
be9129c5503e Fix compilation in x86_64. I broke it with r24580.
vitor
parents: 12296
diff changeset
340 : : "r" (row), "r" ((x86_reg) (2*offset)), "r" ((x86_reg) (2*store)), "r" (table)
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
341 );
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
342 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
343
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
344
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
345 #if 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
346 /* C column IDCT - it is just here to document the MMXEXT and MMX versions */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
347 static inline void idct_col (int16_t * col, int offset)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
348 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
349 /* multiplication - as implemented on mmx */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
350 #define F(c,x) (((c) * (x)) >> 16)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
351
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
352 /* saturation - it helps us handle torture test cases */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
353 #define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
354
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
355 int16_t x0, x1, x2, x3, x4, x5, x6, x7;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
356 int16_t y0, y1, y2, y3, y4, y5, y6, y7;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
357 int16_t a0, a1, a2, a3, b0, b1, b2, b3;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
358 int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
359
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
360 col += offset;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
361
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
362 x0 = col[0*8];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
363 x1 = col[1*8];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
364 x2 = col[2*8];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
365 x3 = col[3*8];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
366 x4 = col[4*8];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
367 x5 = col[5*8];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
368 x6 = col[6*8];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
369 x7 = col[7*8];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
370
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
371 u04 = S (x0 + x4);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
372 v04 = S (x0 - x4);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
373 u26 = S (F (T2, x6) + x2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
374 v26 = S (F (T2, x2) - x6);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
375
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
376 a0 = S (u04 + u26);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
377 a1 = S (v04 + v26);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
378 a2 = S (v04 - v26);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
379 a3 = S (u04 - u26);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
380
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
381 u17 = S (F (T1, x7) + x1);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
382 v17 = S (F (T1, x1) - x7);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
383 u35 = S (F (T3, x5) + x3);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
384 v35 = S (F (T3, x3) - x5);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
385
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
386 b0 = S (u17 + u35);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
387 b3 = S (v17 - v35);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
388 u12 = S (u17 - u35);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
389 v12 = S (v17 + v35);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
390 u12 = S (2 * F (C4, u12));
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
391 v12 = S (2 * F (C4, v12));
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
392 b1 = S (u12 + v12);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
393 b2 = S (u12 - v12);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
394
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
395 y0 = S (a0 + b0) >> COL_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
396 y1 = S (a1 + b1) >> COL_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
397 y2 = S (a2 + b2) >> COL_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
398 y3 = S (a3 + b3) >> COL_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
399
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
400 y4 = S (a3 - b3) >> COL_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
401 y5 = S (a2 - b2) >> COL_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
402 y6 = S (a1 - b1) >> COL_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
403 y7 = S (a0 - b0) >> COL_SHIFT;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
404
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
405 col[0*8] = y0;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
406 col[1*8] = y1;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
407 col[2*8] = y2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
408 col[3*8] = y3;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
409 col[4*8] = y4;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
410 col[5*8] = y5;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
411 col[6*8] = y6;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
412 col[7*8] = y7;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
413 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
414 #endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
415
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
416
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
417 /* MMX column IDCT */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
418 static inline void idct_col (int16_t * const col, const int offset)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
419 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
420 #define T1 13036
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
421 #define T2 27146
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
422 #define T3 43790
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
423 #define C4 23170
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
424
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
425 DECLARE_ALIGNED(8, static const short, t1_vector)[] = {
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
426 T1,T1,T1,T1,
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
427 T2,T2,T2,T2,
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
428 T3,T3,T3,T3,
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
429 C4,C4,C4,C4
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
430 };
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
431
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
432 /* column code adapted from Peter Gubanov */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
433 /* http://www.elecard.com/peter/idct.shtml */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
434
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
435 __asm__ volatile (
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
436 "movq (%0), %%mm0 \n\t" /* mm0 = T1 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
437
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
438 "movq 2*8(%1), %%mm1 \n\t" /* mm1 = x1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
439 "movq %%mm0, %%mm2 \n\t" /* mm2 = T1 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
440
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
441 "movq 7*2*8(%1), %%mm4 \n\t" /* mm4 = x7 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
442 "pmulhw %%mm1, %%mm0 \n\t" /* mm0 = T1*x1 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
443
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
444 "movq 16(%0), %%mm5 \n\t" /* mm5 = T3 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
445 "pmulhw %%mm4, %%mm2 \n\t" /* mm2 = T1*x7 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
446
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
447 "movq 2*5*8(%1), %%mm6 \n\t" /* mm6 = x5 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
448 "movq %%mm5, %%mm7 \n\t" /* mm7 = T3-1 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
449
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
450 "movq 3*8*2(%1), %%mm3 \n\t" /* mm3 = x3 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
451 "psubsw %%mm4, %%mm0 \n\t" /* mm0 = v17 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
452
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
453 "movq 8(%0), %%mm4 \n\t" /* mm4 = T2 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
454 "pmulhw %%mm3, %%mm5 \n\t" /* mm5 = (T3-1)*x3 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
455
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
456 "paddsw %%mm2, %%mm1 \n\t" /* mm1 = u17 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
457 "pmulhw %%mm6, %%mm7 \n\t" /* mm7 = (T3-1)*x5 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
458
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
459 /* slot */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
460
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
461 "movq %%mm4, %%mm2 \n\t" /* mm2 = T2 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
462 "paddsw %%mm3, %%mm5 \n\t" /* mm5 = T3*x3 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
463
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
464 "pmulhw 2*8*2(%1), %%mm4 \n\t" /* mm4 = T2*x2 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
465 "paddsw %%mm6, %%mm7 \n\t" /* mm7 = T3*x5 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
466
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
467 "psubsw %%mm6, %%mm5 \n\t" /* mm5 = v35 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
468 "paddsw %%mm3, %%mm7 \n\t" /* mm7 = u35 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
469
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
470 "movq 6*8*2(%1), %%mm3 \n\t" /* mm3 = x6 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
471 "movq %%mm0, %%mm6 \n\t" /* mm6 = v17 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
472
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
473 "pmulhw %%mm3, %%mm2 \n\t" /* mm2 = T2*x6 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
474 "psubsw %%mm5, %%mm0 \n\t" /* mm0 = b3 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
475
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
476 "psubsw %%mm3, %%mm4 \n\t" /* mm4 = v26 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
477 "paddsw %%mm6, %%mm5 \n\t" /* mm5 = v12 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
478
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
479 "movq %%mm0, 3*8*2(%1)\n\t" /* save b3 in scratch0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
480 "movq %%mm1, %%mm6 \n\t" /* mm6 = u17 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
481
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
482 "paddsw 2*8*2(%1), %%mm2 \n\t" /* mm2 = u26 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
483 "paddsw %%mm7, %%mm6 \n\t" /* mm6 = b0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
484
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
485 "psubsw %%mm7, %%mm1 \n\t" /* mm1 = u12 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
486 "movq %%mm1, %%mm7 \n\t" /* mm7 = u12 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
487
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
488 "movq 0*8(%1), %%mm3 \n\t" /* mm3 = x0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
489 "paddsw %%mm5, %%mm1 \n\t" /* mm1 = u12+v12 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
490
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
491 "movq 24(%0), %%mm0 \n\t" /* mm0 = C4/2 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
492 "psubsw %%mm5, %%mm7 \n\t" /* mm7 = u12-v12 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
493
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
494 "movq %%mm6, 5*8*2(%1)\n\t" /* save b0 in scratch1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
495 "pmulhw %%mm0, %%mm1 \n\t" /* mm1 = b1/2 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
496
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
497 "movq %%mm4, %%mm6 \n\t" /* mm6 = v26 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
498 "pmulhw %%mm0, %%mm7 \n\t" /* mm7 = b2/2 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
499
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
500 "movq 4*8*2(%1), %%mm5 \n\t" /* mm5 = x4 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
501 "movq %%mm3, %%mm0 \n\t" /* mm0 = x0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
502
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
503 "psubsw %%mm5, %%mm3 \n\t" /* mm3 = v04 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
504 "paddsw %%mm5, %%mm0 \n\t" /* mm0 = u04 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
505
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
506 "paddsw %%mm3, %%mm4 \n\t" /* mm4 = a1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
507 "movq %%mm0, %%mm5 \n\t" /* mm5 = u04 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
508
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
509 "psubsw %%mm6, %%mm3 \n\t" /* mm3 = a2 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
510 "paddsw %%mm2, %%mm5 \n\t" /* mm5 = a0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
511
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
512 "paddsw %%mm1, %%mm1 \n\t" /* mm1 = b1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
513 "psubsw %%mm2, %%mm0 \n\t" /* mm0 = a3 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
514
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
515 "paddsw %%mm7, %%mm7 \n\t" /* mm7 = b2 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
516 "movq %%mm3, %%mm2 \n\t" /* mm2 = a2 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
517
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
518 "movq %%mm4, %%mm6 \n\t" /* mm6 = a1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
519 "paddsw %%mm7, %%mm3 \n\t" /* mm3 = a2+b2 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
520
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
521 "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm3\n\t" /* mm3 = y2 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
522 "paddsw %%mm1, %%mm4\n\t" /* mm4 = a1+b1 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
523
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
524 "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm4\n\t" /* mm4 = y1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
525 "psubsw %%mm1, %%mm6 \n\t" /* mm6 = a1-b1 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
526
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
527 "movq 5*8*2(%1), %%mm1 \n\t" /* mm1 = b0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
528 "psubsw %%mm7, %%mm2 \n\t" /* mm2 = a2-b2 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
529
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
530 "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm6\n\t" /* mm6 = y6 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
531 "movq %%mm5, %%mm7 \n\t" /* mm7 = a0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
532
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
533 "movq %%mm4, 1*8*2(%1)\n\t" /* save y1 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
534 "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm2\n\t" /* mm2 = y5 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
535
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
536 "movq %%mm3, 2*8*2(%1)\n\t" /* save y2 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
537 "paddsw %%mm1, %%mm5 \n\t" /* mm5 = a0+b0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
538
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
539 "movq 3*8*2(%1), %%mm4 \n\t" /* mm4 = b3 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
540 "psubsw %%mm1, %%mm7 \n\t" /* mm7 = a0-b0 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
541
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
542 "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm5\n\t" /* mm5 = y0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
543 "movq %%mm0, %%mm3 \n\t" /* mm3 = a3 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
544
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
545 "movq %%mm2, 5*8*2(%1)\n\t" /* save y5 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
546 "psubsw %%mm4, %%mm3 \n\t" /* mm3 = a3-b3 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
547
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
548 "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm7\n\t" /* mm7 = y7 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
549 "paddsw %%mm0, %%mm4 \n\t" /* mm4 = a3+b3 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
550
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
551 "movq %%mm5, 0*8*2(%1)\n\t" /* save y0 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
552 "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm3\n\t" /* mm3 = y4 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
553
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
554 "movq %%mm6, 6*8*2(%1)\n\t" /* save y6 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
555 "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm4\n\t" /* mm4 = y3 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
556
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
557 "movq %%mm7, 7*8*2(%1)\n\t" /* save y7 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
558
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
559 "movq %%mm3, 4*8*2(%1)\n\t" /* save y4 */
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
560
12296
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
561 "movq %%mm4, 3*8*2(%1)\n\t" /* save y3 */
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
562 :: "r" (t1_vector), "r" (col+offset)
34d95bdfd38c Translate libmpeg2 MMX IDCT to plain asm
vitor
parents: 11508
diff changeset
563 );
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
564
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
565 #undef T1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
566 #undef T2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
567 #undef T3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
568 #undef C4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
569 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
570
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
571
11508
7be32921237f Replace remaining uses of ATTR_ALIGNED with DECLARE_ALIGNED
mru
parents: 11381
diff changeset
572 DECLARE_ALIGNED(8, static const int32_t, rounder0)[] =
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
573 rounder ((1 << (COL_SHIFT - 1)) - 0.5);
11508
7be32921237f Replace remaining uses of ATTR_ALIGNED with DECLARE_ALIGNED
mru
parents: 11381
diff changeset
574 DECLARE_ALIGNED(8, static const int32_t, rounder4)[] = rounder (0);
7be32921237f Replace remaining uses of ATTR_ALIGNED with DECLARE_ALIGNED
mru
parents: 11381
diff changeset
575 DECLARE_ALIGNED(8, static const int32_t, rounder1)[] =
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
576 rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
11508
7be32921237f Replace remaining uses of ATTR_ALIGNED with DECLARE_ALIGNED
mru
parents: 11381
diff changeset
577 DECLARE_ALIGNED(8, static const int32_t, rounder7)[] =
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
578 rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
11508
7be32921237f Replace remaining uses of ATTR_ALIGNED with DECLARE_ALIGNED
mru
parents: 11381
diff changeset
579 DECLARE_ALIGNED(8, static const int32_t, rounder2)[] =
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
580 rounder (0.60355339059); /* C2 * (C6+C2)/2 */
11508
7be32921237f Replace remaining uses of ATTR_ALIGNED with DECLARE_ALIGNED
mru
parents: 11381
diff changeset
581 DECLARE_ALIGNED(8, static const int32_t, rounder6)[] =
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
582 rounder (-0.25); /* C2 * (C6-C2)/2 */
11508
7be32921237f Replace remaining uses of ATTR_ALIGNED with DECLARE_ALIGNED
mru
parents: 11381
diff changeset
583 DECLARE_ALIGNED(8, static const int32_t, rounder3)[] =
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
584 rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
11508
7be32921237f Replace remaining uses of ATTR_ALIGNED with DECLARE_ALIGNED
mru
parents: 11381
diff changeset
585 DECLARE_ALIGNED(8, static const int32_t, rounder5)[] =
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
586 rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
587
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
588 #undef COL_SHIFT
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
589 #undef ROW_SHIFT
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
590
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
591 #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
592 void idct (int16_t * const block) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
593 { \
11508
7be32921237f Replace remaining uses of ATTR_ALIGNED with DECLARE_ALIGNED
mru
parents: 11381
diff changeset
594 DECLARE_ALIGNED(16, static const int16_t, table04)[] = \
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
595 table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
11508
7be32921237f Replace remaining uses of ATTR_ALIGNED with DECLARE_ALIGNED
mru
parents: 11381
diff changeset
596 DECLARE_ALIGNED(16, static const int16_t, table17)[] = \
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
597 table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
11508
7be32921237f Replace remaining uses of ATTR_ALIGNED with DECLARE_ALIGNED
mru
parents: 11381
diff changeset
598 DECLARE_ALIGNED(16, static const int16_t, table26)[] = \
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
599 table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
11508
7be32921237f Replace remaining uses of ATTR_ALIGNED with DECLARE_ALIGNED
mru
parents: 11381
diff changeset
600 DECLARE_ALIGNED(16, static const int16_t, table35)[] = \
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
601 table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
602 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
603 idct_row_head (block, 0*8, table04); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
604 idct_row (table04, rounder0); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
605 idct_row_mid (block, 0*8, 4*8, table04); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
606 idct_row (table04, rounder4); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
607 idct_row_mid (block, 4*8, 1*8, table17); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
608 idct_row (table17, rounder1); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
609 idct_row_mid (block, 1*8, 7*8, table17); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
610 idct_row (table17, rounder7); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
611 idct_row_mid (block, 7*8, 2*8, table26); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
612 idct_row (table26, rounder2); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
613 idct_row_mid (block, 2*8, 6*8, table26); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
614 idct_row (table26, rounder6); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
615 idct_row_mid (block, 6*8, 3*8, table35); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
616 idct_row (table35, rounder3); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
617 idct_row_mid (block, 3*8, 5*8, table35); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
618 idct_row (table35, rounder5); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
619 idct_row_tail (block, 5*8); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
620 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
621 idct_col (block, 0); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
622 idct_col (block, 4); \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
623 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
624
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
625 declare_idct (ff_mmxext_idct, mmxext_table,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
626 mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
627
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
628 declare_idct (ff_mmx_idct, mmx_table,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
629 mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
630