annotate x86/vp3dsp.asm @ 12475:9fef0a8ddd63 libavcodec

Move mm_support() from libavcodec to libavutil, make it a public function and rename it to av_get_cpu_flags().
author stefano
date Wed, 08 Sep 2010 15:07:14 +0000
parents 2982071047a2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12436
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
1 ;******************************************************************************
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
2 ;* MMX/SSE2-optimized functions for the VP3 decoder
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
3 ;* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
4 ;*
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
5 ;* This file is part of FFmpeg.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
6 ;*
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
7 ;* FFmpeg is free software; you can redistribute it and/or
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
8 ;* modify it under the terms of the GNU Lesser General Public
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
9 ;* License as published by the Free Software Foundation; either
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
10 ;* version 2.1 of the License, or (at your option) any later version.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
11 ;*
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
12 ;* FFmpeg is distributed in the hope that it will be useful,
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
13 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
14 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
15 ;* Lesser General Public License for more details.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
16 ;*
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
17 ;* You should have received a copy of the GNU Lesser General Public
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
18 ;* License along with FFmpeg; if not, write to the Free Software
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
19 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
20 ;******************************************************************************
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
21
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
22 %include "x86inc.asm"
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
23 %include "x86util.asm"
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
24
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
25 ; MMX-optimized functions cribbed from the original VP3 source code.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
26
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
27 SECTION_RODATA
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
28
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
29 vp3_idct_data: times 8 dw 64277
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
30 times 8 dw 60547
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
31 times 8 dw 54491
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
32 times 8 dw 46341
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
33 times 8 dw 36410
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
34 times 8 dw 25080
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
35 times 8 dw 12785
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
36
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
37 cextern pb_1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
38 cextern pb_3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
39 cextern pb_7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
40 cextern pb_1F
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
41 cextern pb_81
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
42
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
43 cextern pw_8
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
44
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
45 cextern put_signed_pixels_clamped_mmx
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
46 cextern add_pixels_clamped_mmx
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
47
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
48 SECTION .text
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
49
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
50 ; this is off by one or two for some cases when filter_limit is greater than 63
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
51 ; in: p0 in mm6, p1 in mm4, p2 in mm2, p3 in mm1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
52 ; out: p1 in mm4, p2 in mm3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
53 %macro VP3_LOOP_FILTER 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
54 movq m7, m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
55 pand m6, [pb_7] ; p0&7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
56 psrlw m7, 3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
57 pand m7, [pb_1F] ; p0>>3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
58 movq m3, m2 ; p2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
59 pxor m2, m4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
60 pand m2, [pb_1] ; (p2^p1)&1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
61 movq m5, m2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
62 paddb m2, m2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
63 paddb m2, m5 ; 3*(p2^p1)&1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
64 paddb m2, m6 ; extra bits lost in shifts
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
65 pcmpeqb m0, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
66 pxor m1, m0 ; 255 - p3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
67 pavgb m1, m2 ; (256 - p3 + extrabits) >> 1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
68 pxor m0, m4 ; 255 - p1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
69 pavgb m0, m3 ; (256 + p2-p1) >> 1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
70 paddb m1, [pb_3]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
71 pavgb m1, m0 ; 128+2+( p2-p1 - p3) >> 2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
72 pavgb m1, m0 ; 128+1+(3*(p2-p1) - p3) >> 3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
73 paddusb m7, m1 ; d+128+1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
74 movq m6, [pb_81]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
75 psubusb m6, m7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
76 psubusb m7, [pb_81]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
77
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
78 movq m5, [r2+516] ; flim
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
79 pminub m6, m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
80 pminub m7, m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
81 movq m0, m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
82 movq m1, m7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
83 paddb m6, m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
84 paddb m7, m7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
85 pminub m6, m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
86 pminub m7, m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
87 psubb m6, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
88 psubb m7, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
89 paddusb m4, m7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
90 psubusb m4, m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
91 psubusb m3, m7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
92 paddusb m3, m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
93 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
94
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
95 %macro STORE_4_WORDS 1
12457
2982071047a2 Use "d" suffix for general-purpose registers used with movd.
reimar
parents: 12436
diff changeset
96 movd r2d, %1
12436
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
97 mov [r0 -1], r2w
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
98 psrlq %1, 32
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
99 shr r2, 16
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
100 mov [r0+r1 -1], r2w
12457
2982071047a2 Use "d" suffix for general-purpose registers used with movd.
reimar
parents: 12436
diff changeset
101 movd r2d, %1
12436
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
102 mov [r0+r1*2-1], r2w
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
103 shr r2, 16
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
104 mov [r0+r3 -1], r2w
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
105 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
106
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
107 INIT_MMX
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
108 cglobal vp3_v_loop_filter_mmx2, 3, 4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
109 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
110 movsxd r1, r1d
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
111 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
112 mov r3, r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
113 neg r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
114 movq m6, [r0+r1*2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
115 movq m4, [r0+r1 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
116 movq m2, [r0 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
117 movq m1, [r0+r3 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
118
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
119 VP3_LOOP_FILTER
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
120
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
121 movq [r0+r1], m4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
122 movq [r0 ], m3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
123 RET
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
124
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
125 cglobal vp3_h_loop_filter_mmx2, 3, 4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
126 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
127 movsxd r1, r1d
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
128 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
129 lea r3, [r1*3]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
130
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
131 movd m6, [r0 -2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
132 movd m4, [r0+r1 -2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
133 movd m2, [r0+r1*2-2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
134 movd m1, [r0+r3 -2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
135 lea r0, [r0+r1*4 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
136 punpcklbw m6, [r0 -2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
137 punpcklbw m4, [r0+r1 -2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
138 punpcklbw m2, [r0+r1*2-2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
139 punpcklbw m1, [r0+r3 -2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
140 sub r0, r3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
141 sub r0, r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
142
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
143 TRANSPOSE4x4B 6, 4, 2, 1, 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
144 VP3_LOOP_FILTER
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
145 SBUTTERFLY bw, 4, 3, 5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
146
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
147 STORE_4_WORDS m4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
148 lea r0, [r0+r1*4 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
149 STORE_4_WORDS m3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
150 RET
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
151
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
152 ; from original comments: The Macro does IDct on 4 1-D Dcts
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
153 %macro BeginIDCT 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
154 movq m2, I(3)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
155 movq m6, C(3)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
156 movq m4, m2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
157 movq m7, J(5)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
158 pmulhw m4, m6 ; r4 = c3*i3 - i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
159 movq m1, C(5)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
160 pmulhw m6, m7 ; r6 = c3*i5 - i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
161 movq m5, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
162 pmulhw m1, m2 ; r1 = c5*i3 - i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
163 movq m3, I(1)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
164 pmulhw m5, m7 ; r5 = c5*i5 - i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
165 movq m0, C(1)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
166 paddw m4, m2 ; r4 = c3*i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
167 paddw m6, m7 ; r6 = c3*i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
168 paddw m2, m1 ; r2 = c5*i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
169 movq m1, J(7)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
170 paddw m7, m5 ; r7 = c5*i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
171 movq m5, m0 ; r5 = c1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
172 pmulhw m0, m3 ; r0 = c1*i1 - i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
173 paddsw m4, m7 ; r4 = C = c3*i3 + c5*i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
174 pmulhw m5, m1 ; r5 = c1*i7 - i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
175 movq m7, C(7)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
176 psubsw m6, m2 ; r6 = D = c3*i5 - c5*i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
177 paddw m0, m3 ; r0 = c1*i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
178 pmulhw m3, m7 ; r3 = c7*i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
179 movq m2, I(2)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
180 pmulhw m7, m1 ; r7 = c7*i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
181 paddw m5, m1 ; r5 = c1*i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
182 movq m1, m2 ; r1 = i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
183 pmulhw m2, C(2) ; r2 = c2*i2 - i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
184 psubsw m3, m5 ; r3 = B = c7*i1 - c1*i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
185 movq m5, J(6)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
186 paddsw m0, m7 ; r0 = A = c1*i1 + c7*i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
187 movq m7, m5 ; r7 = i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
188 psubsw m0, m4 ; r0 = A - C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
189 pmulhw m5, C(2) ; r5 = c2*i6 - i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
190 paddw m2, m1 ; r2 = c2*i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
191 pmulhw m1, C(6) ; r1 = c6*i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
192 paddsw m4, m4 ; r4 = C + C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
193 paddsw m4, m0 ; r4 = C. = A + C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
194 psubsw m3, m6 ; r3 = B - D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
195 paddw m5, m7 ; r5 = c2*i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
196 paddsw m6, m6 ; r6 = D + D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
197 pmulhw m7, C(6) ; r7 = c6*i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
198 paddsw m6, m3 ; r6 = D. = B + D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
199 movq I(1), m4 ; save C. at I(1)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
200 psubsw m1, m5 ; r1 = H = c6*i2 - c2*i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
201 movq m4, C(4)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
202 movq m5, m3 ; r5 = B - D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
203 pmulhw m3, m4 ; r3 = (c4 - 1) * (B - D)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
204 paddsw m7, m2 ; r3 = (c4 - 1) * (B - D)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
205 movq I(2), m6 ; save D. at I(2)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
206 movq m2, m0 ; r2 = A - C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
207 movq m6, I(0)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
208 pmulhw m0, m4 ; r0 = (c4 - 1) * (A - C)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
209 paddw m5, m3 ; r5 = B. = c4 * (B - D)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
210 movq m3, J(4)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
211 psubsw m5, m1 ; r5 = B.. = B. - H
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
212 paddw m2, m0 ; r0 = A. = c4 * (A - C)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
213 psubsw m6, m3 ; r6 = i0 - i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
214 movq m0, m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
215 pmulhw m6, m4 ; r6 = (c4 - 1) * (i0 - i4)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
216 paddsw m3, m3 ; r3 = i4 + i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
217 paddsw m1, m1 ; r1 = H + H
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
218 paddsw m3, m0 ; r3 = i0 + i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
219 paddsw m1, m5 ; r1 = H. = B + H
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
220 pmulhw m4, m3 ; r4 = (c4 - 1) * (i0 + i4)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
221 paddsw m6, m0 ; r6 = F = c4 * (i0 - i4)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
222 psubsw m6, m2 ; r6 = F. = F - A.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
223 paddsw m2, m2 ; r2 = A. + A.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
224 movq m0, I(1) ; r0 = C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
225 paddsw m2, m6 ; r2 = A.. = F + A.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
226 paddw m4, m3 ; r4 = E = c4 * (i0 + i4)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
227 psubsw m2, m1 ; r2 = R2 = A.. - H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
228 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
229
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
230 ; RowIDCT gets ready to transpose
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
231 %macro RowIDCT 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
232 BeginIDCT
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
233 movq m3, I(2) ; r3 = D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
234 psubsw m4, m7 ; r4 = E. = E - G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
235 paddsw m1, m1 ; r1 = H. + H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
236 paddsw m7, m7 ; r7 = G + G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
237 paddsw m1, m2 ; r1 = R1 = A.. + H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
238 paddsw m7, m4 ; r1 = R1 = A.. + H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
239 psubsw m4, m3 ; r4 = R4 = E. - D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
240 paddsw m3, m3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
241 psubsw m6, m5 ; r6 = R6 = F. - B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
242 paddsw m5, m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
243 paddsw m3, m4 ; r3 = R3 = E. + D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
244 paddsw m5, m6 ; r5 = R5 = F. + B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
245 psubsw m7, m0 ; r7 = R7 = G. - C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
246 paddsw m0, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
247 movq I(1), m1 ; save R1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
248 paddsw m0, m7 ; r0 = R0 = G. + C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
249 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
250
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
251 ; Column IDCT normalizes and stores final results
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
252 %macro ColumnIDCT 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
253 BeginIDCT
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
254 paddsw m2, OC_8 ; adjust R2 (and R1) for shift
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
255 paddsw m1, m1 ; r1 = H. + H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
256 paddsw m1, m2 ; r1 = R1 = A.. + H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
257 psraw m2, 4 ; r2 = NR2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
258 psubsw m4, m7 ; r4 = E. = E - G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
259 psraw m1, 4 ; r1 = NR2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
260 movq m3, I(2) ; r3 = D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
261 paddsw m7, m7 ; r7 = G + G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
262 movq I(2), m2 ; store NR2 at I2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
263 paddsw m7, m4 ; r7 = G. = E + G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
264 movq I(1), m1 ; store NR1 at I1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
265 psubsw m4, m3 ; r4 = R4 = E. - D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
266 paddsw m4, OC_8 ; adjust R4 (and R3) for shift
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
267 paddsw m3, m3 ; r3 = D. + D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
268 paddsw m3, m4 ; r3 = R3 = E. + D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
269 psraw m4, 4 ; r4 = NR4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
270 psubsw m6, m5 ; r6 = R6 = F. - B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
271 psraw m3, 4 ; r3 = NR3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
272 paddsw m6, OC_8 ; adjust R6 (and R5) for shift
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
273 paddsw m5, m5 ; r5 = B.. + B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
274 paddsw m5, m6 ; r5 = R5 = F. + B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
275 psraw m6, 4 ; r6 = NR6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
276 movq J(4), m4 ; store NR4 at J4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
277 psraw m5, 4 ; r5 = NR5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
278 movq I(3), m3 ; store NR3 at I3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
279 psubsw m7, m0 ; r7 = R7 = G. - C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
280 paddsw m7, OC_8 ; adjust R7 (and R0) for shift
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
281 paddsw m0, m0 ; r0 = C. + C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
282 paddsw m0, m7 ; r0 = R0 = G. + C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
283 psraw m7, 4 ; r7 = NR7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
284 movq J(6), m6 ; store NR6 at J6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
285 psraw m0, 4 ; r0 = NR0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
286 movq J(5), m5 ; store NR5 at J5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
287 movq J(7), m7 ; store NR7 at J7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
288 movq I(0), m0 ; store NR0 at I0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
289 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
290
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
291 ; Following macro does two 4x4 transposes in place.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
292 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
293 ; At entry (we assume):
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
294 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
295 ; r0 = a3 a2 a1 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
296 ; I(1) = b3 b2 b1 b0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
297 ; r2 = c3 c2 c1 c0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
298 ; r3 = d3 d2 d1 d0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
299 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
300 ; r4 = e3 e2 e1 e0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
301 ; r5 = f3 f2 f1 f0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
302 ; r6 = g3 g2 g1 g0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
303 ; r7 = h3 h2 h1 h0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
304 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
305 ; At exit, we have:
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
306 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
307 ; I(0) = d0 c0 b0 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
308 ; I(1) = d1 c1 b1 a1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
309 ; I(2) = d2 c2 b2 a2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
310 ; I(3) = d3 c3 b3 a3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
311 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
312 ; J(4) = h0 g0 f0 e0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
313 ; J(5) = h1 g1 f1 e1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
314 ; J(6) = h2 g2 f2 e2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
315 ; J(7) = h3 g3 f3 e3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
316 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
317 ; I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
318 ; J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
319 ;
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
320 ; Since r1 is free at entry, we calculate the Js first.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
321 %macro Transpose 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
322 movq m1, m4 ; r1 = e3 e2 e1 e0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
323 punpcklwd m4, m5 ; r4 = f1 e1 f0 e0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
324 movq I(0), m0 ; save a3 a2 a1 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
325 punpckhwd m1, m5 ; r1 = f3 e3 f2 e2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
326 movq m0, m6 ; r0 = g3 g2 g1 g0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
327 punpcklwd m6, m7 ; r6 = h1 g1 h0 g0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
328 movq m5, m4 ; r5 = f1 e1 f0 e0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
329 punpckldq m4, m6 ; r4 = h0 g0 f0 e0 = R4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
330 punpckhdq m5, m6 ; r5 = h1 g1 f1 e1 = R5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
331 movq m6, m1 ; r6 = f3 e3 f2 e2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
332 movq J(4), m4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
333 punpckhwd m0, m7 ; r0 = h3 g3 h2 g2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
334 movq J(5), m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
335 punpckhdq m6, m0 ; r6 = h3 g3 f3 e3 = R7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
336 movq m4, I(0) ; r4 = a3 a2 a1 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
337 punpckldq m1, m0 ; r1 = h2 g2 f2 e2 = R6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
338 movq m5, I(1) ; r5 = b3 b2 b1 b0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
339 movq m0, m4 ; r0 = a3 a2 a1 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
340 movq J(7), m6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
341 punpcklwd m0, m5 ; r0 = b1 a1 b0 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
342 movq J(6), m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
343 punpckhwd m4, m5 ; r4 = b3 a3 b2 a2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
344 movq m5, m2 ; r5 = c3 c2 c1 c0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
345 punpcklwd m2, m3 ; r2 = d1 c1 d0 c0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
346 movq m1, m0 ; r1 = b1 a1 b0 a0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
347 punpckldq m0, m2 ; r0 = d0 c0 b0 a0 = R0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
348 punpckhdq m1, m2 ; r1 = d1 c1 b1 a1 = R1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
349 movq m2, m4 ; r2 = b3 a3 b2 a2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
350 movq I(0), m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
351 punpckhwd m5, m3 ; r5 = d3 c3 d2 c2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
352 movq I(1), m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
353 punpckhdq m4, m5 ; r4 = d3 c3 b3 a3 = R3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
354 punpckldq m2, m5 ; r2 = d2 c2 b2 a2 = R2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
355 movq I(3), m4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
356 movq I(2), m2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
357 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
358
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
359 %macro VP3_IDCT_mmx 1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
360 ; eax = quantized input
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
361 ; ebx = dequantizer matrix
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
362 ; ecx = IDCT constants
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
363 ; M(I) = ecx + MaskOffset(0) + I * 8
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
364 ; C(I) = ecx + CosineOffset(32) + (I-1) * 8
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
365 ; edx = output
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
366 ; r0..r7 = mm0..mm7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
367 %define OC_8 [pw_8]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
368 %define C(x) [vp3_idct_data+16*(x-1)]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
369
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
370 ; at this point, function has completed dequantization + dezigzag +
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
371 ; partial transposition; now do the idct itself
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
372 %define I(x) [%1+16* x ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
373 %define J(x) [%1+16*(x-4)+8]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
374 RowIDCT
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
375 Transpose
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
376
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
377 %define I(x) [%1+16* x +64]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
378 %define J(x) [%1+16*(x-4)+72]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
379 RowIDCT
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
380 Transpose
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
381
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
382 %define I(x) [%1+16*x]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
383 %define J(x) [%1+16*x]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
384 ColumnIDCT
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
385
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
386 %define I(x) [%1+16*x+8]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
387 %define J(x) [%1+16*x+8]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
388 ColumnIDCT
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
389 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
390
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
391 %macro VP3_1D_IDCT_SSE2 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
392 movdqa m2, I(3) ; xmm2 = i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
393 movdqa m6, C(3) ; xmm6 = c3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
394 movdqa m4, m2 ; xmm4 = i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
395 movdqa m7, I(5) ; xmm7 = i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
396 pmulhw m4, m6 ; xmm4 = c3 * i3 - i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
397 movdqa m1, C(5) ; xmm1 = c5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
398 pmulhw m6, m7 ; xmm6 = c3 * i5 - i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
399 movdqa m5, m1 ; xmm5 = c5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
400 pmulhw m1, m2 ; xmm1 = c5 * i3 - i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
401 movdqa m3, I(1) ; xmm3 = i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
402 pmulhw m5, m7 ; xmm5 = c5 * i5 - i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
403 movdqa m0, C(1) ; xmm0 = c1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
404 paddw m4, m2 ; xmm4 = c3 * i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
405 paddw m6, m7 ; xmm6 = c3 * i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
406 paddw m2, m1 ; xmm2 = c5 * i3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
407 movdqa m1, I(7) ; xmm1 = i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
408 paddw m7, m5 ; xmm7 = c5 * i5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
409 movdqa m5, m0 ; xmm5 = c1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
410 pmulhw m0, m3 ; xmm0 = c1 * i1 - i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
411 paddsw m4, m7 ; xmm4 = c3 * i3 + c5 * i5 = C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
412 pmulhw m5, m1 ; xmm5 = c1 * i7 - i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
413 movdqa m7, C(7) ; xmm7 = c7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
414 psubsw m6, m2 ; xmm6 = c3 * i5 - c5 * i3 = D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
415 paddw m0, m3 ; xmm0 = c1 * i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
416 pmulhw m3, m7 ; xmm3 = c7 * i1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
417 movdqa m2, I(2) ; xmm2 = i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
418 pmulhw m7, m1 ; xmm7 = c7 * i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
419 paddw m5, m1 ; xmm5 = c1 * i7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
420 movdqa m1, m2 ; xmm1 = i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
421 pmulhw m2, C(2) ; xmm2 = i2 * c2 -i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
422 psubsw m3, m5 ; xmm3 = c7 * i1 - c1 * i7 = B
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
423 movdqa m5, I(6) ; xmm5 = i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
424 paddsw m0, m7 ; xmm0 = c1 * i1 + c7 * i7 = A
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
425 movdqa m7, m5 ; xmm7 = i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
426 psubsw m0, m4 ; xmm0 = A - C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
427 pmulhw m5, C(2) ; xmm5 = c2 * i6 - i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
428 paddw m2, m1 ; xmm2 = i2 * c2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
429 pmulhw m1, C(6) ; xmm1 = c6 * i2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
430 paddsw m4, m4 ; xmm4 = C + C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
431 paddsw m4, m0 ; xmm4 = A + C = C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
432 psubsw m3, m6 ; xmm3 = B - D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
433 paddw m5, m7 ; xmm5 = c2 * i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
434 paddsw m6, m6 ; xmm6 = D + D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
435 pmulhw m7, C(6) ; xmm7 = c6 * i6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
436 paddsw m6, m3 ; xmm6 = B + D = D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
437 movdqa I(1), m4 ; Save C. at I(1)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
438 psubsw m1, m5 ; xmm1 = c6 * i2 - c2 * i6 = H
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
439 movdqa m4, C(4) ; xmm4 = C4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
440 movdqa m5, m3 ; xmm5 = B - D
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
441 pmulhw m3, m4 ; xmm3 = ( c4 -1 ) * ( B - D )
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
442 paddsw m7, m2 ; xmm7 = c2 * i2 + c6 * i6 = G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
443 movdqa I(2), m6 ; save D. at I(2)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
444 movdqa m2, m0 ; xmm2 = A - C
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
445 movdqa m6, I(0) ; xmm6 = i0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
446 pmulhw m0, m4 ; xmm0 = ( c4 - 1 ) * ( A - C ) = A.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
447 paddw m5, m3 ; xmm5 = c4 * ( B - D ) = B.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
448 movdqa m3, I(4) ; xmm3 = i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
449 psubsw m5, m1 ; xmm5 = B. - H = B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
450 paddw m2, m0 ; xmm2 = c4 * ( A - C) = A.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
451 psubsw m6, m3 ; xmm6 = i0 - i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
452 movdqa m0, m6 ; xmm0 = i0 - i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
453 pmulhw m6, m4 ; xmm6 = (c4 - 1) * (i0 - i4) = F
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
454 paddsw m3, m3 ; xmm3 = i4 + i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
455 paddsw m1, m1 ; xmm1 = H + H
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
456 paddsw m3, m0 ; xmm3 = i0 + i4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
457 paddsw m1, m5 ; xmm1 = B. + H = H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
458 pmulhw m4, m3 ; xmm4 = ( c4 - 1 ) * ( i0 + i4 )
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
459 paddw m6, m0 ; xmm6 = c4 * ( i0 - i4 )
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
460 psubsw m6, m2 ; xmm6 = F - A. = F.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
461 paddsw m2, m2 ; xmm2 = A. + A.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
462 movdqa m0, I(1) ; Load C. from I(1)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
463 paddsw m2, m6 ; xmm2 = F + A. = A..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
464 paddw m4, m3 ; xmm4 = c4 * ( i0 + i4 ) = 3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
465 psubsw m2, m1 ; xmm2 = A.. - H. = R2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
466 ADD(m2) ; Adjust R2 and R1 before shifting
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
467 paddsw m1, m1 ; xmm1 = H. + H.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
468 paddsw m1, m2 ; xmm1 = A.. + H. = R1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
469 SHIFT(m2) ; xmm2 = op2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
470 psubsw m4, m7 ; xmm4 = E - G = E.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
471 SHIFT(m1) ; xmm1 = op1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
472 movdqa m3, I(2) ; Load D. from I(2)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
473 paddsw m7, m7 ; xmm7 = G + G
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
474 paddsw m7, m4 ; xmm7 = E + G = G.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
475 psubsw m4, m3 ; xmm4 = E. - D. = R4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
476 ADD(m4) ; Adjust R4 and R3 before shifting
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
477 paddsw m3, m3 ; xmm3 = D. + D.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
478 paddsw m3, m4 ; xmm3 = E. + D. = R3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
479 SHIFT(m4) ; xmm4 = op4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
480 psubsw m6, m5 ; xmm6 = F. - B..= R6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
481 SHIFT(m3) ; xmm3 = op3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
482 ADD(m6) ; Adjust R6 and R5 before shifting
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
483 paddsw m5, m5 ; xmm5 = B.. + B..
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
484 paddsw m5, m6 ; xmm5 = F. + B.. = R5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
485 SHIFT(m6) ; xmm6 = op6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
486 SHIFT(m5) ; xmm5 = op5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
487 psubsw m7, m0 ; xmm7 = G. - C. = R7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
488 ADD(m7) ; Adjust R7 and R0 before shifting
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
489 paddsw m0, m0 ; xmm0 = C. + C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
490 paddsw m0, m7 ; xmm0 = G. + C.
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
491 SHIFT(m7) ; xmm7 = op7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
492 SHIFT(m0) ; xmm0 = op0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
493 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
494
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
495 %macro PUT_BLOCK 8
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
496 movdqa O(0), m%1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
497 movdqa O(1), m%2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
498 movdqa O(2), m%3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
499 movdqa O(3), m%4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
500 movdqa O(4), m%5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
501 movdqa O(5), m%6
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
502 movdqa O(6), m%7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
503 movdqa O(7), m%8
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
504 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
505
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
506 %macro VP3_IDCT_sse2 1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
507 %define I(x) [%1+16*x]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
508 %define O(x) [%1+16*x]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
509 %define C(x) [vp3_idct_data+16*(x-1)]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
510 %define SHIFT(x)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
511 %define ADD(x)
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
512 VP3_1D_IDCT_SSE2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
513 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
514 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
515 %else
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
516 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%1], [%1+16]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
517 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
518 PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
519
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
520 %define SHIFT(x) psraw x, 4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
521 %define ADD(x) paddsw x, [pw_8]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
522 VP3_1D_IDCT_SSE2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
523 PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
524 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
525
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
526 %macro vp3_idct_funcs 3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
527 cglobal vp3_idct_%1, 1, 1, %2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
528 VP3_IDCT_%1 r0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
529 RET
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
530
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
531 cglobal vp3_idct_put_%1, 3, %3, %2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
532 VP3_IDCT_%1 r2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
533 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
534 mov r3, r2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
535 mov r2, r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
536 mov r1, r0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
537 mov r0, r3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
538 %else
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
539 mov r0m, r2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
540 mov r1m, r0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
541 mov r2m, r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
542 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
543 %ifdef WIN64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
544 call put_signed_pixels_clamped_mmx
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
545 RET
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
546 %else
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
547 jmp put_signed_pixels_clamped_mmx
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
548 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
549
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
550 cglobal vp3_idct_add_%1, 3, %3, %2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
551 VP3_IDCT_%1 r2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
552 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
553 mov r3, r2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
554 mov r2, r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
555 mov r1, r0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
556 mov r0, r3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
557 %else
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
558 mov r0m, r2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
559 mov r1m, r0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
560 mov r2m, r1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
561 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
562 %ifdef WIN64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
563 call add_pixels_clamped_mmx
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
564 RET
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
565 %else
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
566 jmp add_pixels_clamped_mmx
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
567 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
568 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
569
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
570 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
571 %define REGS 4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
572 %else
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
573 %define REGS 3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
574 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
575 INIT_MMX
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
576 vp3_idct_funcs mmx, 0, REGS
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
577 INIT_XMM
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
578 vp3_idct_funcs sse2, 9, REGS
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
579 %undef REGS
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
580
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
581 %macro DC_ADD 0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
582 movq m2, [r0 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
583 movq m3, [r0+r1 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
584 paddusb m2, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
585 movq m4, [r0+r1*2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
586 paddusb m3, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
587 movq m5, [r0+r3 ]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
588 paddusb m4, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
589 paddusb m5, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
590 psubusb m2, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
591 psubusb m3, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
592 movq [r0 ], m2
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
593 psubusb m4, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
594 movq [r0+r1 ], m3
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
595 psubusb m5, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
596 movq [r0+r1*2], m4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
597 movq [r0+r3 ], m5
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
598 %endmacro
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
599
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
600 INIT_MMX
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
601 cglobal vp3_idct_dc_add_mmx2, 3, 4
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
602 %ifdef ARCH_X86_64
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
603 movsxd r1, r1d
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
604 %endif
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
605 lea r3, [r1*3]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
606 movsx r2, word [r2]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
607 add r2, 15
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
608 sar r2, 5
12457
2982071047a2 Use "d" suffix for general-purpose registers used with movd.
reimar
parents: 12436
diff changeset
609 movd m0, r2d
12436
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
610 pshufw m0, m0, 0x0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
611 pxor m1, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
612 psubw m1, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
613 packuswb m0, m0
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
614 packuswb m1, m1
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
615 DC_ADD
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
616 lea r0, [r0+r1*4]
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
617 DC_ADD
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff changeset
618 RET