annotate x86/vp3dsp.asm @ 12506:747e5f278c4b libavcodec

The debug text output of macroblocks can indicate MB_TYPE_INTERLACED, but it used to do it only for h264 codec. Allow it for other codecs, as mpeg2 and mpeg4 also set this flag.
author iive
date Tue, 21 Sep 2010 22:44:27 +0000
parents 2982071047a2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12436
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
1 ;******************************************************************************
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
2 ;* MMX/SSE2-optimized functions for the VP3 decoder
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
3 ;* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
4 ;*
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
5 ;* This file is part of FFmpeg.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
6 ;*
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
7 ;* FFmpeg is free software; you can redistribute it and/or
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
8 ;* modify it under the terms of the GNU Lesser General Public
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
9 ;* License as published by the Free Software Foundation; either
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
10 ;* version 2.1 of the License, or (at your option) any later version.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
11 ;*
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
12 ;* FFmpeg is distributed in the hope that it will be useful,
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
13 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
14 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
15 ;* Lesser General Public License for more details.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
16 ;*
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
17 ;* You should have received a copy of the GNU Lesser General Public
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
18 ;* License along with FFmpeg; if not, write to the Free Software
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
19 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
20 ;******************************************************************************
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
21
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
22 %include "x86inc.asm"
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
23 %include "x86util.asm"
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
24
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
25 ; MMX-optimized functions cribbed from the original VP3 source code.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
26
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
27 SECTION_RODATA
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
28
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
29 vp3_idct_data: times 8 dw 64277
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
30 times 8 dw 60547
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
31 times 8 dw 54491
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
32 times 8 dw 46341
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
33 times 8 dw 36410
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
34 times 8 dw 25080
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
35 times 8 dw 12785
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
36
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
37 cextern pb_1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
38 cextern pb_3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
39 cextern pb_7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
40 cextern pb_1F
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
41 cextern pb_81
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
42
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
43 cextern pw_8
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
44
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
45 cextern put_signed_pixels_clamped_mmx
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
46 cextern add_pixels_clamped_mmx
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
47
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
48 SECTION .text
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
49
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
50 ; this is off by one or two for some cases when filter_limit is greater than 63
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
51 ; in: p0 in mm6, p1 in mm4, p2 in mm2, p3 in mm1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
52 ; out: p1 in mm4, p2 in mm3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
53 %macro VP3_LOOP_FILTER 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
54 movq m7, m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
55 pand m6, [pb_7] ; p0&7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
56 psrlw m7, 3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
57 pand m7, [pb_1F] ; p0>>3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
58 movq m3, m2 ; p2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
59 pxor m2, m4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
60 pand m2, [pb_1] ; (p2^p1)&1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
61 movq m5, m2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
62 paddb m2, m2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
63 paddb m2, m5 ; 3*(p2^p1)&1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
64 paddb m2, m6 ; extra bits lost in shifts
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
65 pcmpeqb m0, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
66 pxor m1, m0 ; 255 - p3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
67 pavgb m1, m2 ; (256 - p3 + extrabits) >> 1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
68 pxor m0, m4 ; 255 - p1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
69 pavgb m0, m3 ; (256 + p2-p1) >> 1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
70 paddb m1, [pb_3]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
71 pavgb m1, m0 ; 128+2+( p2-p1 - p3) >> 2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
72 pavgb m1, m0 ; 128+1+(3*(p2-p1) - p3) >> 3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
73 paddusb m7, m1 ; d+128+1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
74 movq m6, [pb_81]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
75 psubusb m6, m7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
76 psubusb m7, [pb_81]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
77
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
78 movq m5, [r2+516] ; flim
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
79 pminub m6, m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
80 pminub m7, m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
81 movq m0, m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
82 movq m1, m7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
83 paddb m6, m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
84 paddb m7, m7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
85 pminub m6, m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
86 pminub m7, m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
87 psubb m6, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
88 psubb m7, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
89 paddusb m4, m7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
90 psubusb m4, m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
91 psubusb m3, m7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
92 paddusb m3, m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
93 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
94
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
95 %macro STORE_4_WORDS 1
12457
2982071047a2 Use "d" suffix for general-purpose registers used with movd.
reimar
parents: 12436
diff changeset
96 movd r2d, %1
12436
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
97 mov [r0 -1], r2w
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
98 psrlq %1, 32
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
99 shr r2, 16
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
100 mov [r0+r1 -1], r2w
12457
2982071047a2 Use "d" suffix for general-purpose registers used with movd.
reimar
parents: 12436
diff changeset
101 movd r2d, %1
12436
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
102 mov [r0+r1*2-1], r2w
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
103 shr r2, 16
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
104 mov [r0+r3 -1], r2w
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
105 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
106
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
107 INIT_MMX
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
108 cglobal vp3_v_loop_filter_mmx2, 3, 4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
109 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
110 movsxd r1, r1d
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
111 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
112 mov r3, r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
113 neg r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
114 movq m6, [r0+r1*2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
115 movq m4, [r0+r1 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
116 movq m2, [r0 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
117 movq m1, [r0+r3 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
118
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
119 VP3_LOOP_FILTER
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
120
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
121 movq [r0+r1], m4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
122 movq [r0 ], m3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
123 RET
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
124
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
125 cglobal vp3_h_loop_filter_mmx2, 3, 4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
126 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
127 movsxd r1, r1d
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
128 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
129 lea r3, [r1*3]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
130
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
131 movd m6, [r0 -2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
132 movd m4, [r0+r1 -2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
133 movd m2, [r0+r1*2-2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
134 movd m1, [r0+r3 -2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
135 lea r0, [r0+r1*4 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
136 punpcklbw m6, [r0 -2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
137 punpcklbw m4, [r0+r1 -2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
138 punpcklbw m2, [r0+r1*2-2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
139 punpcklbw m1, [r0+r3 -2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
140 sub r0, r3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
141 sub r0, r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
142
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
143 TRANSPOSE4x4B 6, 4, 2, 1, 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
144 VP3_LOOP_FILTER
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
145 SBUTTERFLY bw, 4, 3, 5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
146
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
147 STORE_4_WORDS m4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
148 lea r0, [r0+r1*4 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
149 STORE_4_WORDS m3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
150 RET
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
151
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
152 ; from original comments: The Macro does IDct on 4 1-D Dcts
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
153 %macro BeginIDCT 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
154 movq m2, I(3)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
155 movq m6, C(3)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
156 movq m4, m2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
157 movq m7, J(5)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
158 pmulhw m4, m6 ; r4 = c3*i3 - i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
159 movq m1, C(5)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
160 pmulhw m6, m7 ; r6 = c3*i5 - i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
161 movq m5, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
162 pmulhw m1, m2 ; r1 = c5*i3 - i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
163 movq m3, I(1)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
164 pmulhw m5, m7 ; r5 = c5*i5 - i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
165 movq m0, C(1)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
166 paddw m4, m2 ; r4 = c3*i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
167 paddw m6, m7 ; r6 = c3*i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
168 paddw m2, m1 ; r2 = c5*i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
169 movq m1, J(7)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
170 paddw m7, m5 ; r7 = c5*i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
171 movq m5, m0 ; r5 = c1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
172 pmulhw m0, m3 ; r0 = c1*i1 - i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
173 paddsw m4, m7 ; r4 = C = c3*i3 + c5*i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
174 pmulhw m5, m1 ; r5 = c1*i7 - i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
175 movq m7, C(7)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
176 psubsw m6, m2 ; r6 = D = c3*i5 - c5*i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
177 paddw m0, m3 ; r0 = c1*i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
178 pmulhw m3, m7 ; r3 = c7*i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
179 movq m2, I(2)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
180 pmulhw m7, m1 ; r7 = c7*i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
181 paddw m5, m1 ; r5 = c1*i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
182 movq m1, m2 ; r1 = i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
183 pmulhw m2, C(2) ; r2 = c2*i2 - i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
184 psubsw m3, m5 ; r3 = B = c7*i1 - c1*i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
185 movq m5, J(6)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
186 paddsw m0, m7 ; r0 = A = c1*i1 + c7*i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
187 movq m7, m5 ; r7 = i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
188 psubsw m0, m4 ; r0 = A - C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
189 pmulhw m5, C(2) ; r5 = c2*i6 - i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
190 paddw m2, m1 ; r2 = c2*i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
191 pmulhw m1, C(6) ; r1 = c6*i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
192 paddsw m4, m4 ; r4 = C + C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
193 paddsw m4, m0 ; r4 = C. = A + C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
194 psubsw m3, m6 ; r3 = B - D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
195 paddw m5, m7 ; r5 = c2*i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
196 paddsw m6, m6 ; r6 = D + D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
197 pmulhw m7, C(6) ; r7 = c6*i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
198 paddsw m6, m3 ; r6 = D. = B + D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
199 movq I(1), m4 ; save C. at I(1)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
200 psubsw m1, m5 ; r1 = H = c6*i2 - c2*i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
201 movq m4, C(4)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
202 movq m5, m3 ; r5 = B - D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
203 pmulhw m3, m4 ; r3 = (c4 - 1) * (B - D)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
204 paddsw m7, m2 ; r3 = (c4 - 1) * (B - D)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
205 movq I(2), m6 ; save D. at I(2)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
206 movq m2, m0 ; r2 = A - C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
207 movq m6, I(0)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
208 pmulhw m0, m4 ; r0 = (c4 - 1) * (A - C)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
209 paddw m5, m3 ; r5 = B. = c4 * (B - D)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
210 movq m3, J(4)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
211 psubsw m5, m1 ; r5 = B.. = B. - H
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
212 paddw m2, m0 ; r0 = A. = c4 * (A - C)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
213 psubsw m6, m3 ; r6 = i0 - i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
214 movq m0, m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
215 pmulhw m6, m4 ; r6 = (c4 - 1) * (i0 - i4)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
216 paddsw m3, m3 ; r3 = i4 + i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
217 paddsw m1, m1 ; r1 = H + H
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
218 paddsw m3, m0 ; r3 = i0 + i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
219 paddsw m1, m5 ; r1 = H. = B + H
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
220 pmulhw m4, m3 ; r4 = (c4 - 1) * (i0 + i4)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
221 paddsw m6, m0 ; r6 = F = c4 * (i0 - i4)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
222 psubsw m6, m2 ; r6 = F. = F - A.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
223 paddsw m2, m2 ; r2 = A. + A.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
224 movq m0, I(1) ; r0 = C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
225 paddsw m2, m6 ; r2 = A.. = F + A.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
226 paddw m4, m3 ; r4 = E = c4 * (i0 + i4)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
227 psubsw m2, m1 ; r2 = R2 = A.. - H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
228 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
229
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
230 ; RowIDCT gets ready to transpose
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
231 %macro RowIDCT 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
232 BeginIDCT
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
233 movq m3, I(2) ; r3 = D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
234 psubsw m4, m7 ; r4 = E. = E - G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
235 paddsw m1, m1 ; r1 = H. + H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
236 paddsw m7, m7 ; r7 = G + G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
237 paddsw m1, m2 ; r1 = R1 = A.. + H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
238 paddsw m7, m4 ; r1 = R1 = A.. + H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
239 psubsw m4, m3 ; r4 = R4 = E. - D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
240 paddsw m3, m3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
241 psubsw m6, m5 ; r6 = R6 = F. - B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
242 paddsw m5, m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
243 paddsw m3, m4 ; r3 = R3 = E. + D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
244 paddsw m5, m6 ; r5 = R5 = F. + B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
245 psubsw m7, m0 ; r7 = R7 = G. - C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
246 paddsw m0, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
247 movq I(1), m1 ; save R1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
248 paddsw m0, m7 ; r0 = R0 = G. + C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
249 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
250
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
251 ; Column IDCT normalizes and stores final results
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
252 %macro ColumnIDCT 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
253 BeginIDCT
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
254 paddsw m2, OC_8 ; adjust R2 (and R1) for shift
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
255 paddsw m1, m1 ; r1 = H. + H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
256 paddsw m1, m2 ; r1 = R1 = A.. + H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
257 psraw m2, 4 ; r2 = NR2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
258 psubsw m4, m7 ; r4 = E. = E - G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
259 psraw m1, 4 ; r1 = NR2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
260 movq m3, I(2) ; r3 = D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
261 paddsw m7, m7 ; r7 = G + G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
262 movq I(2), m2 ; store NR2 at I2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
263 paddsw m7, m4 ; r7 = G. = E + G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
264 movq I(1), m1 ; store NR1 at I1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
265 psubsw m4, m3 ; r4 = R4 = E. - D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
266 paddsw m4, OC_8 ; adjust R4 (and R3) for shift
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
267 paddsw m3, m3 ; r3 = D. + D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
268 paddsw m3, m4 ; r3 = R3 = E. + D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
269 psraw m4, 4 ; r4 = NR4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
270 psubsw m6, m5 ; r6 = R6 = F. - B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
271 psraw m3, 4 ; r3 = NR3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
272 paddsw m6, OC_8 ; adjust R6 (and R5) for shift
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
273 paddsw m5, m5 ; r5 = B.. + B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
274 paddsw m5, m6 ; r5 = R5 = F. + B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
275 psraw m6, 4 ; r6 = NR6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
276 movq J(4), m4 ; store NR4 at J4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
277 psraw m5, 4 ; r5 = NR5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
278 movq I(3), m3 ; store NR3 at I3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
279 psubsw m7, m0 ; r7 = R7 = G. - C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
280 paddsw m7, OC_8 ; adjust R7 (and R0) for shift
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
281 paddsw m0, m0 ; r0 = C. + C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
282 paddsw m0, m7 ; r0 = R0 = G. + C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
283 psraw m7, 4 ; r7 = NR7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
284 movq J(6), m6 ; store NR6 at J6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
285 psraw m0, 4 ; r0 = NR0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
286 movq J(5), m5 ; store NR5 at J5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
287 movq J(7), m7 ; store NR7 at J7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
288 movq I(0), m0 ; store NR0 at I0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
289 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
290
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
291 ; Following macro does two 4x4 transposes in place.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
292 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
293 ; At entry (we assume):
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
294 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
295 ; r0 = a3 a2 a1 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
296 ; I(1) = b3 b2 b1 b0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
297 ; r2 = c3 c2 c1 c0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
298 ; r3 = d3 d2 d1 d0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
299 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
300 ; r4 = e3 e2 e1 e0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
301 ; r5 = f3 f2 f1 f0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
302 ; r6 = g3 g2 g1 g0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
303 ; r7 = h3 h2 h1 h0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
304 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
305 ; At exit, we have:
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
306 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
307 ; I(0) = d0 c0 b0 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
308 ; I(1) = d1 c1 b1 a1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
309 ; I(2) = d2 c2 b2 a2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
310 ; I(3) = d3 c3 b3 a3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
311 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
312 ; J(4) = h0 g0 f0 e0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
313 ; J(5) = h1 g1 f1 e1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
314 ; J(6) = h2 g2 f2 e2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
315 ; J(7) = h3 g3 f3 e3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
316 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
317 ; I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
318 ; J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
319 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
320 ; Since r1 is free at entry, we calculate the Js first.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
321 %macro Transpose 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
322 movq m1, m4 ; r1 = e3 e2 e1 e0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
323 punpcklwd m4, m5 ; r4 = f1 e1 f0 e0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
324 movq I(0), m0 ; save a3 a2 a1 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
325 punpckhwd m1, m5 ; r1 = f3 e3 f2 e2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
326 movq m0, m6 ; r0 = g3 g2 g1 g0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
327 punpcklwd m6, m7 ; r6 = h1 g1 h0 g0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
328 movq m5, m4 ; r5 = f1 e1 f0 e0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
329 punpckldq m4, m6 ; r4 = h0 g0 f0 e0 = R4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
330 punpckhdq m5, m6 ; r5 = h1 g1 f1 e1 = R5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
331 movq m6, m1 ; r6 = f3 e3 f2 e2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
332 movq J(4), m4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
333 punpckhwd m0, m7 ; r0 = h3 g3 h2 g2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
334 movq J(5), m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
335 punpckhdq m6, m0 ; r6 = h3 g3 f3 e3 = R7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
336 movq m4, I(0) ; r4 = a3 a2 a1 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
337 punpckldq m1, m0 ; r1 = h2 g2 f2 e2 = R6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
338 movq m5, I(1) ; r5 = b3 b2 b1 b0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
339 movq m0, m4 ; r0 = a3 a2 a1 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
340 movq J(7), m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
341 punpcklwd m0, m5 ; r0 = b1 a1 b0 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
342 movq J(6), m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
343 punpckhwd m4, m5 ; r4 = b3 a3 b2 a2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
344 movq m5, m2 ; r5 = c3 c2 c1 c0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
345 punpcklwd m2, m3 ; r2 = d1 c1 d0 c0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
346 movq m1, m0 ; r1 = b1 a1 b0 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
347 punpckldq m0, m2 ; r0 = d0 c0 b0 a0 = R0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
348 punpckhdq m1, m2 ; r1 = d1 c1 b1 a1 = R1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
349 movq m2, m4 ; r2 = b3 a3 b2 a2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
350 movq I(0), m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
351 punpckhwd m5, m3 ; r5 = d3 c3 d2 c2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
352 movq I(1), m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
353 punpckhdq m4, m5 ; r4 = d3 c3 b3 a3 = R3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
354 punpckldq m2, m5 ; r2 = d2 c2 b2 a2 = R2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
355 movq I(3), m4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
356 movq I(2), m2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
357 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
358
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
359 %macro VP3_IDCT_mmx 1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
360 ; eax = quantized input
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
361 ; ebx = dequantizer matrix
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
362 ; ecx = IDCT constants
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
363 ; M(I) = ecx + MaskOffset(0) + I * 8
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
364 ; C(I) = ecx + CosineOffset(32) + (I-1) * 8
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
365 ; edx = output
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
366 ; r0..r7 = mm0..mm7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
367 %define OC_8 [pw_8]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
368 %define C(x) [vp3_idct_data+16*(x-1)]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
369
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
370 ; at this point, function has completed dequantization + dezigzag +
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
371 ; partial transposition; now do the idct itself
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
372 %define I(x) [%1+16* x ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
373 %define J(x) [%1+16*(x-4)+8]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
374 RowIDCT
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
375 Transpose
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
376
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
377 %define I(x) [%1+16* x +64]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
378 %define J(x) [%1+16*(x-4)+72]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
379 RowIDCT
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
380 Transpose
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
381
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
382 %define I(x) [%1+16*x]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
383 %define J(x) [%1+16*x]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
384 ColumnIDCT
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
385
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
386 %define I(x) [%1+16*x+8]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
387 %define J(x) [%1+16*x+8]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
388 ColumnIDCT
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
389 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
390
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
391 %macro VP3_1D_IDCT_SSE2 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
392 movdqa m2, I(3) ; xmm2 = i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
393 movdqa m6, C(3) ; xmm6 = c3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
394 movdqa m4, m2 ; xmm4 = i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
395 movdqa m7, I(5) ; xmm7 = i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
396 pmulhw m4, m6 ; xmm4 = c3 * i3 - i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
397 movdqa m1, C(5) ; xmm1 = c5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
398 pmulhw m6, m7 ; xmm6 = c3 * i5 - i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
399 movdqa m5, m1 ; xmm5 = c5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
400 pmulhw m1, m2 ; xmm1 = c5 * i3 - i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
401 movdqa m3, I(1) ; xmm3 = i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
402 pmulhw m5, m7 ; xmm5 = c5 * i5 - i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
403 movdqa m0, C(1) ; xmm0 = c1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
404 paddw m4, m2 ; xmm4 = c3 * i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
405 paddw m6, m7 ; xmm6 = c3 * i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
406 paddw m2, m1 ; xmm2 = c5 * i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
407 movdqa m1, I(7) ; xmm1 = i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
408 paddw m7, m5 ; xmm7 = c5 * i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
409 movdqa m5, m0 ; xmm5 = c1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
410 pmulhw m0, m3 ; xmm0 = c1 * i1 - i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
411 paddsw m4, m7 ; xmm4 = c3 * i3 + c5 * i5 = C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
412 pmulhw m5, m1 ; xmm5 = c1 * i7 - i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
413 movdqa m7, C(7) ; xmm7 = c7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
414 psubsw m6, m2 ; xmm6 = c3 * i5 - c5 * i3 = D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
415 paddw m0, m3 ; xmm0 = c1 * i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
416 pmulhw m3, m7 ; xmm3 = c7 * i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
417 movdqa m2, I(2) ; xmm2 = i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
418 pmulhw m7, m1 ; xmm7 = c7 * i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
419 paddw m5, m1 ; xmm5 = c1 * i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
420 movdqa m1, m2 ; xmm1 = i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
421 pmulhw m2, C(2) ; xmm2 = i2 * c2 -i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
422 psubsw m3, m5 ; xmm3 = c7 * i1 - c1 * i7 = B
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
423 movdqa m5, I(6) ; xmm5 = i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
424 paddsw m0, m7 ; xmm0 = c1 * i1 + c7 * i7 = A
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
425 movdqa m7, m5 ; xmm7 = i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
426 psubsw m0, m4 ; xmm0 = A - C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
427 pmulhw m5, C(2) ; xmm5 = c2 * i6 - i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
428 paddw m2, m1 ; xmm2 = i2 * c2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
429 pmulhw m1, C(6) ; xmm1 = c6 * i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
430 paddsw m4, m4 ; xmm4 = C + C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
431 paddsw m4, m0 ; xmm4 = A + C = C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
432 psubsw m3, m6 ; xmm3 = B - D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
433 paddw m5, m7 ; xmm5 = c2 * i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
434 paddsw m6, m6 ; xmm6 = D + D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
435 pmulhw m7, C(6) ; xmm7 = c6 * i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
436 paddsw m6, m3 ; xmm6 = B + D = D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
437 movdqa I(1), m4 ; Save C. at I(1)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
438 psubsw m1, m5 ; xmm1 = c6 * i2 - c2 * i6 = H
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
439 movdqa m4, C(4) ; xmm4 = C4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
440 movdqa m5, m3 ; xmm5 = B - D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
441 pmulhw m3, m4 ; xmm3 = ( c4 -1 ) * ( B - D )
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
442 paddsw m7, m2 ; xmm7 = c2 * i2 + c6 * i6 = G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
443 movdqa I(2), m6 ; save D. at I(2)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
444 movdqa m2, m0 ; xmm2 = A - C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
445 movdqa m6, I(0) ; xmm6 = i0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
446 pmulhw m0, m4 ; xmm0 = ( c4 - 1 ) * ( A - C ) = A.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
447 paddw m5, m3 ; xmm5 = c4 * ( B - D ) = B.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
448 movdqa m3, I(4) ; xmm3 = i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
449 psubsw m5, m1 ; xmm5 = B. - H = B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
450 paddw m2, m0 ; xmm2 = c4 * ( A - C) = A.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
451 psubsw m6, m3 ; xmm6 = i0 - i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
452 movdqa m0, m6 ; xmm0 = i0 - i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
453 pmulhw m6, m4 ; xmm6 = (c4 - 1) * (i0 - i4) = F
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
454 paddsw m3, m3 ; xmm3 = i4 + i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
455 paddsw m1, m1 ; xmm1 = H + H
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
456 paddsw m3, m0 ; xmm3 = i0 + i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
457 paddsw m1, m5 ; xmm1 = B. + H = H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
458 pmulhw m4, m3 ; xmm4 = ( c4 - 1 ) * ( i0 + i4 )
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
459 paddw m6, m0 ; xmm6 = c4 * ( i0 - i4 )
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
460 psubsw m6, m2 ; xmm6 = F - A. = F.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
461 paddsw m2, m2 ; xmm2 = A. + A.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
462 movdqa m0, I(1) ; Load C. from I(1)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
463 paddsw m2, m6 ; xmm2 = F + A. = A..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
464 paddw m4, m3 ; xmm4 = c4 * ( i0 + i4 ) = 3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
465 psubsw m2, m1 ; xmm2 = A.. - H. = R2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
466 ADD(m2) ; Adjust R2 and R1 before shifting
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
467 paddsw m1, m1 ; xmm1 = H. + H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
468 paddsw m1, m2 ; xmm1 = A.. + H. = R1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
469 SHIFT(m2) ; xmm2 = op2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
470 psubsw m4, m7 ; xmm4 = E - G = E.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
471 SHIFT(m1) ; xmm1 = op1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
472 movdqa m3, I(2) ; Load D. from I(2)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
473 paddsw m7, m7 ; xmm7 = G + G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
474 paddsw m7, m4 ; xmm7 = E + G = G.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
475 psubsw m4, m3 ; xmm4 = E. - D. = R4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
476 ADD(m4) ; Adjust R4 and R3 before shifting
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
477 paddsw m3, m3 ; xmm3 = D. + D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
478 paddsw m3, m4 ; xmm3 = E. + D. = R3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
479 SHIFT(m4) ; xmm4 = op4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
480 psubsw m6, m5 ; xmm6 = F. - B..= R6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
481 SHIFT(m3) ; xmm3 = op3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
482 ADD(m6) ; Adjust R6 and R5 before shifting
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
483 paddsw m5, m5 ; xmm5 = B.. + B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
484 paddsw m5, m6 ; xmm5 = F. + B.. = R5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
485 SHIFT(m6) ; xmm6 = op6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
486 SHIFT(m5) ; xmm5 = op5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
487 psubsw m7, m0 ; xmm7 = G. - C. = R7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
488 ADD(m7) ; Adjust R7 and R0 before shifting
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
489 paddsw m0, m0 ; xmm0 = C. + C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
490 paddsw m0, m7 ; xmm0 = G. + C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
491 SHIFT(m7) ; xmm7 = op7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
492 SHIFT(m0) ; xmm0 = op0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
493 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
494
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
495 %macro PUT_BLOCK 8
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
496 movdqa O(0), m%1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
497 movdqa O(1), m%2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
498 movdqa O(2), m%3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
499 movdqa O(3), m%4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
500 movdqa O(4), m%5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
501 movdqa O(5), m%6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
502 movdqa O(6), m%7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
503 movdqa O(7), m%8
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
504 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
505
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
506 %macro VP3_IDCT_sse2 1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
507 %define I(x) [%1+16*x]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
508 %define O(x) [%1+16*x]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
509 %define C(x) [vp3_idct_data+16*(x-1)]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
510 %define SHIFT(x)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
511 %define ADD(x)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
512 VP3_1D_IDCT_SSE2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
513 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
514 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
515 %else
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
516 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%1], [%1+16]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
517 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
518 PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
519
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
520 %define SHIFT(x) psraw x, 4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
521 %define ADD(x) paddsw x, [pw_8]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
522 VP3_1D_IDCT_SSE2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
523 PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
524 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
525
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
526 %macro vp3_idct_funcs 3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
527 cglobal vp3_idct_%1, 1, 1, %2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
528 VP3_IDCT_%1 r0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
529 RET
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
530
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
531 cglobal vp3_idct_put_%1, 3, %3, %2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
532 VP3_IDCT_%1 r2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
533 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
534 mov r3, r2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
535 mov r2, r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
536 mov r1, r0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
537 mov r0, r3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
538 %else
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
539 mov r0m, r2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
540 mov r1m, r0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
541 mov r2m, r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
542 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
543 %ifdef WIN64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
544 call put_signed_pixels_clamped_mmx
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
545 RET
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
546 %else
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
547 jmp put_signed_pixels_clamped_mmx
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
548 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
549
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
550 cglobal vp3_idct_add_%1, 3, %3, %2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
551 VP3_IDCT_%1 r2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
552 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
553 mov r3, r2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
554 mov r2, r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
555 mov r1, r0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
556 mov r0, r3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
557 %else
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
558 mov r0m, r2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
559 mov r1m, r0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
560 mov r2m, r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
561 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
562 %ifdef WIN64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
563 call add_pixels_clamped_mmx
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
564 RET
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
565 %else
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
566 jmp add_pixels_clamped_mmx
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
567 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
568 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
569
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
570 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
571 %define REGS 4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
572 %else
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
573 %define REGS 3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
574 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
575 INIT_MMX
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
576 vp3_idct_funcs mmx, 0, REGS
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
577 INIT_XMM
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
578 vp3_idct_funcs sse2, 9, REGS
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
579 %undef REGS
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
580
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
581 %macro DC_ADD 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
582 movq m2, [r0 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
583 movq m3, [r0+r1 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
584 paddusb m2, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
585 movq m4, [r0+r1*2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
586 paddusb m3, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
587 movq m5, [r0+r3 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
588 paddusb m4, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
589 paddusb m5, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
590 psubusb m2, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
591 psubusb m3, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
592 movq [r0 ], m2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
593 psubusb m4, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
594 movq [r0+r1 ], m3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
595 psubusb m5, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
596 movq [r0+r1*2], m4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
597 movq [r0+r3 ], m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
598 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
599
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
600 INIT_MMX
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
601 cglobal vp3_idct_dc_add_mmx2, 3, 4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
602 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
603 movsxd r1, r1d
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
604 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
605 lea r3, [r1*3]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
606 movsx r2, word [r2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
607 add r2, 15
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
608 sar r2, 5
12457
2982071047a2 Use "d" suffix for general-purpose registers used with movd.
reimar
parents: 12436
diff changeset
609 movd m0, r2d
12436
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
610 pshufw m0, m0, 0x0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
611 pxor m1, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
612 psubw m1, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
613 packuswb m0, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
614 packuswb m1, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
615 DC_ADD
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
616 lea r0, [r0+r1*4]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
617 DC_ADD
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
618 RET