Mercurial > libavcodec.hg
annotate x86/vp3dsp.asm @ 12483:0159a19bfff7 libavcodec
aacdec: Rework channel mapping compatibility hacks.
For a PCE based configuration map the channels solely based on tags.
For an indexed configuration map the channels solely based on position.
This works with all known exotic samples including al17, elem_id0, bad_concat,
and lfe_is_sce.
author | alexc |
---|---|
date | Fri, 10 Sep 2010 18:01:48 +0000 |
parents | 2982071047a2 |
children |
rev | line source |
---|---|
12436
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
1 ;****************************************************************************** |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
2 ;* MMX/SSE2-optimized functions for the VP3 decoder |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
3 ;* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org> |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
4 ;* |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
5 ;* This file is part of FFmpeg. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
6 ;* |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
7 ;* FFmpeg is free software; you can redistribute it and/or |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
8 ;* modify it under the terms of the GNU Lesser General Public |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
9 ;* License as published by the Free Software Foundation; either |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
10 ;* version 2.1 of the License, or (at your option) any later version. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
11 ;* |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
12 ;* FFmpeg is distributed in the hope that it will be useful, |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
13 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
14 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
15 ;* Lesser General Public License for more details. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
16 ;* |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
17 ;* You should have received a copy of the GNU Lesser General Public |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
18 ;* License along with FFmpeg; if not, write to the Free Software |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
19 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
20 ;****************************************************************************** |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
21 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
22 %include "x86inc.asm" |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
23 %include "x86util.asm" |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
24 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
25 ; MMX-optimized functions cribbed from the original VP3 source code. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
26 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
27 SECTION_RODATA |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
28 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
29 vp3_idct_data: times 8 dw 64277 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
30 times 8 dw 60547 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
31 times 8 dw 54491 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
32 times 8 dw 46341 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
33 times 8 dw 36410 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
34 times 8 dw 25080 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
35 times 8 dw 12785 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
36 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
37 cextern pb_1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
38 cextern pb_3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
39 cextern pb_7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
40 cextern pb_1F |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
41 cextern pb_81 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
42 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
43 cextern pw_8 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
44 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
45 cextern put_signed_pixels_clamped_mmx |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
46 cextern add_pixels_clamped_mmx |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
47 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
48 SECTION .text |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
49 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
50 ; this is off by one or two for some cases when filter_limit is greater than 63 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
51 ; in: p0 in mm6, p1 in mm4, p2 in mm2, p3 in mm1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
52 ; out: p1 in mm4, p2 in mm3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
53 %macro VP3_LOOP_FILTER 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
54 movq m7, m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
55 pand m6, [pb_7] ; p0&7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
56 psrlw m7, 3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
57 pand m7, [pb_1F] ; p0>>3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
58 movq m3, m2 ; p2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
59 pxor m2, m4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
60 pand m2, [pb_1] ; (p2^p1)&1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
61 movq m5, m2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
62 paddb m2, m2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
63 paddb m2, m5 ; 3*(p2^p1)&1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
64 paddb m2, m6 ; extra bits lost in shifts |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
65 pcmpeqb m0, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
66 pxor m1, m0 ; 255 - p3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
67 pavgb m1, m2 ; (256 - p3 + extrabits) >> 1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
68 pxor m0, m4 ; 255 - p1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
69 pavgb m0, m3 ; (256 + p2-p1) >> 1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
70 paddb m1, [pb_3] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
71 pavgb m1, m0 ; 128+2+( p2-p1 - p3) >> 2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
72 pavgb m1, m0 ; 128+1+(3*(p2-p1) - p3) >> 3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
73 paddusb m7, m1 ; d+128+1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
74 movq m6, [pb_81] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
75 psubusb m6, m7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
76 psubusb m7, [pb_81] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
77 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
78 movq m5, [r2+516] ; flim |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
79 pminub m6, m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
80 pminub m7, m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
81 movq m0, m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
82 movq m1, m7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
83 paddb m6, m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
84 paddb m7, m7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
85 pminub m6, m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
86 pminub m7, m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
87 psubb m6, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
88 psubb m7, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
89 paddusb m4, m7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
90 psubusb m4, m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
91 psubusb m3, m7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
92 paddusb m3, m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
93 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
94 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
95 %macro STORE_4_WORDS 1 |
12457
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12436
diff
changeset
|
96 movd r2d, %1 |
12436
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
97 mov [r0 -1], r2w |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
98 psrlq %1, 32 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
99 shr r2, 16 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
100 mov [r0+r1 -1], r2w |
12457
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12436
diff
changeset
|
101 movd r2d, %1 |
12436
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
102 mov [r0+r1*2-1], r2w |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
103 shr r2, 16 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
104 mov [r0+r3 -1], r2w |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
105 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
106 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
107 INIT_MMX |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
108 cglobal vp3_v_loop_filter_mmx2, 3, 4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
109 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
110 movsxd r1, r1d |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
111 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
112 mov r3, r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
113 neg r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
114 movq m6, [r0+r1*2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
115 movq m4, [r0+r1 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
116 movq m2, [r0 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
117 movq m1, [r0+r3 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
118 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
119 VP3_LOOP_FILTER |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
120 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
121 movq [r0+r1], m4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
122 movq [r0 ], m3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
123 RET |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
124 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
125 cglobal vp3_h_loop_filter_mmx2, 3, 4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
126 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
127 movsxd r1, r1d |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
128 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
129 lea r3, [r1*3] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
130 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
131 movd m6, [r0 -2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
132 movd m4, [r0+r1 -2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
133 movd m2, [r0+r1*2-2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
134 movd m1, [r0+r3 -2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
135 lea r0, [r0+r1*4 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
136 punpcklbw m6, [r0 -2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
137 punpcklbw m4, [r0+r1 -2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
138 punpcklbw m2, [r0+r1*2-2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
139 punpcklbw m1, [r0+r3 -2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
140 sub r0, r3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
141 sub r0, r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
142 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
143 TRANSPOSE4x4B 6, 4, 2, 1, 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
144 VP3_LOOP_FILTER |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
145 SBUTTERFLY bw, 4, 3, 5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
146 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
147 STORE_4_WORDS m4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
148 lea r0, [r0+r1*4 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
149 STORE_4_WORDS m3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
150 RET |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
151 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
152 ; from original comments: The Macro does IDct on 4 1-D Dcts |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
153 %macro BeginIDCT 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
154 movq m2, I(3) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
155 movq m6, C(3) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
156 movq m4, m2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
157 movq m7, J(5) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
158 pmulhw m4, m6 ; r4 = c3*i3 - i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
159 movq m1, C(5) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
160 pmulhw m6, m7 ; r6 = c3*i5 - i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
161 movq m5, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
162 pmulhw m1, m2 ; r1 = c5*i3 - i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
163 movq m3, I(1) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
164 pmulhw m5, m7 ; r5 = c5*i5 - i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
165 movq m0, C(1) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
166 paddw m4, m2 ; r4 = c3*i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
167 paddw m6, m7 ; r6 = c3*i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
168 paddw m2, m1 ; r2 = c5*i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
169 movq m1, J(7) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
170 paddw m7, m5 ; r7 = c5*i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
171 movq m5, m0 ; r5 = c1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
172 pmulhw m0, m3 ; r0 = c1*i1 - i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
173 paddsw m4, m7 ; r4 = C = c3*i3 + c5*i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
174 pmulhw m5, m1 ; r5 = c1*i7 - i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
175 movq m7, C(7) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
176 psubsw m6, m2 ; r6 = D = c3*i5 - c5*i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
177 paddw m0, m3 ; r0 = c1*i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
178 pmulhw m3, m7 ; r3 = c7*i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
179 movq m2, I(2) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
180 pmulhw m7, m1 ; r7 = c7*i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
181 paddw m5, m1 ; r5 = c1*i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
182 movq m1, m2 ; r1 = i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
183 pmulhw m2, C(2) ; r2 = c2*i2 - i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
184 psubsw m3, m5 ; r3 = B = c7*i1 - c1*i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
185 movq m5, J(6) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
186 paddsw m0, m7 ; r0 = A = c1*i1 + c7*i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
187 movq m7, m5 ; r7 = i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
188 psubsw m0, m4 ; r0 = A - C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
189 pmulhw m5, C(2) ; r5 = c2*i6 - i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
190 paddw m2, m1 ; r2 = c2*i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
191 pmulhw m1, C(6) ; r1 = c6*i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
192 paddsw m4, m4 ; r4 = C + C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
193 paddsw m4, m0 ; r4 = C. = A + C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
194 psubsw m3, m6 ; r3 = B - D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
195 paddw m5, m7 ; r5 = c2*i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
196 paddsw m6, m6 ; r6 = D + D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
197 pmulhw m7, C(6) ; r7 = c6*i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
198 paddsw m6, m3 ; r6 = D. = B + D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
199 movq I(1), m4 ; save C. at I(1) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
200 psubsw m1, m5 ; r1 = H = c6*i2 - c2*i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
201 movq m4, C(4) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
202 movq m5, m3 ; r5 = B - D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
203 pmulhw m3, m4 ; r3 = (c4 - 1) * (B - D) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
204 paddsw m7, m2 ; r3 = (c4 - 1) * (B - D) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
205 movq I(2), m6 ; save D. at I(2) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
206 movq m2, m0 ; r2 = A - C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
207 movq m6, I(0) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
208 pmulhw m0, m4 ; r0 = (c4 - 1) * (A - C) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
209 paddw m5, m3 ; r5 = B. = c4 * (B - D) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
210 movq m3, J(4) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
211 psubsw m5, m1 ; r5 = B.. = B. - H |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
212 paddw m2, m0 ; r0 = A. = c4 * (A - C) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
213 psubsw m6, m3 ; r6 = i0 - i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
214 movq m0, m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
215 pmulhw m6, m4 ; r6 = (c4 - 1) * (i0 - i4) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
216 paddsw m3, m3 ; r3 = i4 + i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
217 paddsw m1, m1 ; r1 = H + H |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
218 paddsw m3, m0 ; r3 = i0 + i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
219 paddsw m1, m5 ; r1 = H. = B + H |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
220 pmulhw m4, m3 ; r4 = (c4 - 1) * (i0 + i4) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
221 paddsw m6, m0 ; r6 = F = c4 * (i0 - i4) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
222 psubsw m6, m2 ; r6 = F. = F - A. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
223 paddsw m2, m2 ; r2 = A. + A. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
224 movq m0, I(1) ; r0 = C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
225 paddsw m2, m6 ; r2 = A.. = F + A. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
226 paddw m4, m3 ; r4 = E = c4 * (i0 + i4) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
227 psubsw m2, m1 ; r2 = R2 = A.. - H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
228 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
229 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
230 ; RowIDCT gets ready to transpose |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
231 %macro RowIDCT 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
232 BeginIDCT |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
233 movq m3, I(2) ; r3 = D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
234 psubsw m4, m7 ; r4 = E. = E - G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
235 paddsw m1, m1 ; r1 = H. + H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
236 paddsw m7, m7 ; r7 = G + G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
237 paddsw m1, m2 ; r1 = R1 = A.. + H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
238 paddsw m7, m4 ; r1 = R1 = A.. + H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
239 psubsw m4, m3 ; r4 = R4 = E. - D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
240 paddsw m3, m3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
241 psubsw m6, m5 ; r6 = R6 = F. - B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
242 paddsw m5, m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
243 paddsw m3, m4 ; r3 = R3 = E. + D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
244 paddsw m5, m6 ; r5 = R5 = F. + B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
245 psubsw m7, m0 ; r7 = R7 = G. - C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
246 paddsw m0, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
247 movq I(1), m1 ; save R1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
248 paddsw m0, m7 ; r0 = R0 = G. + C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
249 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
250 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
251 ; Column IDCT normalizes and stores final results |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
252 %macro ColumnIDCT 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
253 BeginIDCT |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
254 paddsw m2, OC_8 ; adjust R2 (and R1) for shift |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
255 paddsw m1, m1 ; r1 = H. + H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
256 paddsw m1, m2 ; r1 = R1 = A.. + H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
257 psraw m2, 4 ; r2 = NR2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
258 psubsw m4, m7 ; r4 = E. = E - G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
259 psraw m1, 4 ; r1 = NR2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
260 movq m3, I(2) ; r3 = D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
261 paddsw m7, m7 ; r7 = G + G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
262 movq I(2), m2 ; store NR2 at I2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
263 paddsw m7, m4 ; r7 = G. = E + G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
264 movq I(1), m1 ; store NR1 at I1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
265 psubsw m4, m3 ; r4 = R4 = E. - D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
266 paddsw m4, OC_8 ; adjust R4 (and R3) for shift |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
267 paddsw m3, m3 ; r3 = D. + D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
268 paddsw m3, m4 ; r3 = R3 = E. + D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
269 psraw m4, 4 ; r4 = NR4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
270 psubsw m6, m5 ; r6 = R6 = F. - B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
271 psraw m3, 4 ; r3 = NR3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
272 paddsw m6, OC_8 ; adjust R6 (and R5) for shift |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
273 paddsw m5, m5 ; r5 = B.. + B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
274 paddsw m5, m6 ; r5 = R5 = F. + B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
275 psraw m6, 4 ; r6 = NR6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
276 movq J(4), m4 ; store NR4 at J4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
277 psraw m5, 4 ; r5 = NR5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
278 movq I(3), m3 ; store NR3 at I3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
279 psubsw m7, m0 ; r7 = R7 = G. - C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
280 paddsw m7, OC_8 ; adjust R7 (and R0) for shift |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
281 paddsw m0, m0 ; r0 = C. + C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
282 paddsw m0, m7 ; r0 = R0 = G. + C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
283 psraw m7, 4 ; r7 = NR7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
284 movq J(6), m6 ; store NR6 at J6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
285 psraw m0, 4 ; r0 = NR0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
286 movq J(5), m5 ; store NR5 at J5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
287 movq J(7), m7 ; store NR7 at J7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
288 movq I(0), m0 ; store NR0 at I0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
289 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
290 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
291 ; Following macro does two 4x4 transposes in place. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
292 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
293 ; At entry (we assume): |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
294 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
295 ; r0 = a3 a2 a1 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
296 ; I(1) = b3 b2 b1 b0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
297 ; r2 = c3 c2 c1 c0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
298 ; r3 = d3 d2 d1 d0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
299 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
300 ; r4 = e3 e2 e1 e0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
301 ; r5 = f3 f2 f1 f0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
302 ; r6 = g3 g2 g1 g0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
303 ; r7 = h3 h2 h1 h0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
304 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
305 ; At exit, we have: |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
306 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
307 ; I(0) = d0 c0 b0 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
308 ; I(1) = d1 c1 b1 a1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
309 ; I(2) = d2 c2 b2 a2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
310 ; I(3) = d3 c3 b3 a3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
311 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
312 ; J(4) = h0 g0 f0 e0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
313 ; J(5) = h1 g1 f1 e1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
314 ; J(6) = h2 g2 f2 e2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
315 ; J(7) = h3 g3 f3 e3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
316 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
317 ; I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
318 ; J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
319 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
320 ; Since r1 is free at entry, we calculate the Js first. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
321 %macro Transpose 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
322 movq m1, m4 ; r1 = e3 e2 e1 e0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
323 punpcklwd m4, m5 ; r4 = f1 e1 f0 e0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
324 movq I(0), m0 ; save a3 a2 a1 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
325 punpckhwd m1, m5 ; r1 = f3 e3 f2 e2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
326 movq m0, m6 ; r0 = g3 g2 g1 g0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
327 punpcklwd m6, m7 ; r6 = h1 g1 h0 g0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
328 movq m5, m4 ; r5 = f1 e1 f0 e0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
329 punpckldq m4, m6 ; r4 = h0 g0 f0 e0 = R4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
330 punpckhdq m5, m6 ; r5 = h1 g1 f1 e1 = R5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
331 movq m6, m1 ; r6 = f3 e3 f2 e2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
332 movq J(4), m4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
333 punpckhwd m0, m7 ; r0 = h3 g3 h2 g2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
334 movq J(5), m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
335 punpckhdq m6, m0 ; r6 = h3 g3 f3 e3 = R7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
336 movq m4, I(0) ; r4 = a3 a2 a1 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
337 punpckldq m1, m0 ; r1 = h2 g2 f2 e2 = R6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
338 movq m5, I(1) ; r5 = b3 b2 b1 b0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
339 movq m0, m4 ; r0 = a3 a2 a1 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
340 movq J(7), m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
341 punpcklwd m0, m5 ; r0 = b1 a1 b0 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
342 movq J(6), m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
343 punpckhwd m4, m5 ; r4 = b3 a3 b2 a2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
344 movq m5, m2 ; r5 = c3 c2 c1 c0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
345 punpcklwd m2, m3 ; r2 = d1 c1 d0 c0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
346 movq m1, m0 ; r1 = b1 a1 b0 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
347 punpckldq m0, m2 ; r0 = d0 c0 b0 a0 = R0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
348 punpckhdq m1, m2 ; r1 = d1 c1 b1 a1 = R1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
349 movq m2, m4 ; r2 = b3 a3 b2 a2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
350 movq I(0), m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
351 punpckhwd m5, m3 ; r5 = d3 c3 d2 c2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
352 movq I(1), m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
353 punpckhdq m4, m5 ; r4 = d3 c3 b3 a3 = R3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
354 punpckldq m2, m5 ; r2 = d2 c2 b2 a2 = R2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
355 movq I(3), m4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
356 movq I(2), m2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
357 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
358 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
359 %macro VP3_IDCT_mmx 1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
360 ; eax = quantized input |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
361 ; ebx = dequantizer matrix |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
362 ; ecx = IDCT constants |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
363 ; M(I) = ecx + MaskOffset(0) + I * 8 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
364 ; C(I) = ecx + CosineOffset(32) + (I-1) * 8 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
365 ; edx = output |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
366 ; r0..r7 = mm0..mm7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
367 %define OC_8 [pw_8] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
368 %define C(x) [vp3_idct_data+16*(x-1)] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
369 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
370 ; at this point, function has completed dequantization + dezigzag + |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
371 ; partial transposition; now do the idct itself |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
372 %define I(x) [%1+16* x ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
373 %define J(x) [%1+16*(x-4)+8] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
374 RowIDCT |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
375 Transpose |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
376 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
377 %define I(x) [%1+16* x +64] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
378 %define J(x) [%1+16*(x-4)+72] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
379 RowIDCT |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
380 Transpose |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
381 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
382 %define I(x) [%1+16*x] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
383 %define J(x) [%1+16*x] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
384 ColumnIDCT |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
385 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
386 %define I(x) [%1+16*x+8] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
387 %define J(x) [%1+16*x+8] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
388 ColumnIDCT |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
389 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
390 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
391 %macro VP3_1D_IDCT_SSE2 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
392 movdqa m2, I(3) ; xmm2 = i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
393 movdqa m6, C(3) ; xmm6 = c3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
394 movdqa m4, m2 ; xmm4 = i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
395 movdqa m7, I(5) ; xmm7 = i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
396 pmulhw m4, m6 ; xmm4 = c3 * i3 - i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
397 movdqa m1, C(5) ; xmm1 = c5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
398 pmulhw m6, m7 ; xmm6 = c3 * i5 - i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
399 movdqa m5, m1 ; xmm5 = c5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
400 pmulhw m1, m2 ; xmm1 = c5 * i3 - i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
401 movdqa m3, I(1) ; xmm3 = i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
402 pmulhw m5, m7 ; xmm5 = c5 * i5 - i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
403 movdqa m0, C(1) ; xmm0 = c1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
404 paddw m4, m2 ; xmm4 = c3 * i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
405 paddw m6, m7 ; xmm6 = c3 * i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
406 paddw m2, m1 ; xmm2 = c5 * i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
407 movdqa m1, I(7) ; xmm1 = i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
408 paddw m7, m5 ; xmm7 = c5 * i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
409 movdqa m5, m0 ; xmm5 = c1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
410 pmulhw m0, m3 ; xmm0 = c1 * i1 - i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
411 paddsw m4, m7 ; xmm4 = c3 * i3 + c5 * i5 = C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
412 pmulhw m5, m1 ; xmm5 = c1 * i7 - i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
413 movdqa m7, C(7) ; xmm7 = c7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
414 psubsw m6, m2 ; xmm6 = c3 * i5 - c5 * i3 = D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
415 paddw m0, m3 ; xmm0 = c1 * i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
416 pmulhw m3, m7 ; xmm3 = c7 * i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
417 movdqa m2, I(2) ; xmm2 = i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
418 pmulhw m7, m1 ; xmm7 = c7 * i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
419 paddw m5, m1 ; xmm5 = c1 * i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
420 movdqa m1, m2 ; xmm1 = i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
421 pmulhw m2, C(2) ; xmm2 = i2 * c2 -i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
422 psubsw m3, m5 ; xmm3 = c7 * i1 - c1 * i7 = B |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
423 movdqa m5, I(6) ; xmm5 = i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
424 paddsw m0, m7 ; xmm0 = c1 * i1 + c7 * i7 = A |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
425 movdqa m7, m5 ; xmm7 = i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
426 psubsw m0, m4 ; xmm0 = A - C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
427 pmulhw m5, C(2) ; xmm5 = c2 * i6 - i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
428 paddw m2, m1 ; xmm2 = i2 * c2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
429 pmulhw m1, C(6) ; xmm1 = c6 * i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
430 paddsw m4, m4 ; xmm4 = C + C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
431 paddsw m4, m0 ; xmm4 = A + C = C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
432 psubsw m3, m6 ; xmm3 = B - D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
433 paddw m5, m7 ; xmm5 = c2 * i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
434 paddsw m6, m6 ; xmm6 = D + D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
435 pmulhw m7, C(6) ; xmm7 = c6 * i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
436 paddsw m6, m3 ; xmm6 = B + D = D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
437 movdqa I(1), m4 ; Save C. at I(1) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
438 psubsw m1, m5 ; xmm1 = c6 * i2 - c2 * i6 = H |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
439 movdqa m4, C(4) ; xmm4 = C4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
440 movdqa m5, m3 ; xmm5 = B - D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
441 pmulhw m3, m4 ; xmm3 = ( c4 -1 ) * ( B - D ) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
442 paddsw m7, m2 ; xmm7 = c2 * i2 + c6 * i6 = G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
443 movdqa I(2), m6 ; save D. at I(2) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
444 movdqa m2, m0 ; xmm2 = A - C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
445 movdqa m6, I(0) ; xmm6 = i0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
446 pmulhw m0, m4 ; xmm0 = ( c4 - 1 ) * ( A - C ) = A. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
447 paddw m5, m3 ; xmm5 = c4 * ( B - D ) = B. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
448 movdqa m3, I(4) ; xmm3 = i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
449 psubsw m5, m1 ; xmm5 = B. - H = B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
450 paddw m2, m0 ; xmm2 = c4 * ( A - C) = A. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
451 psubsw m6, m3 ; xmm6 = i0 - i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
452 movdqa m0, m6 ; xmm0 = i0 - i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
453 pmulhw m6, m4 ; xmm6 = (c4 - 1) * (i0 - i4) = F |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
454 paddsw m3, m3 ; xmm3 = i4 + i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
455 paddsw m1, m1 ; xmm1 = H + H |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
456 paddsw m3, m0 ; xmm3 = i0 + i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
457 paddsw m1, m5 ; xmm1 = B. + H = H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
458 pmulhw m4, m3 ; xmm4 = ( c4 - 1 ) * ( i0 + i4 ) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
459 paddw m6, m0 ; xmm6 = c4 * ( i0 - i4 ) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
460 psubsw m6, m2 ; xmm6 = F - A. = F. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
461 paddsw m2, m2 ; xmm2 = A. + A. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
462 movdqa m0, I(1) ; Load C. from I(1) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
463 paddsw m2, m6 ; xmm2 = F + A. = A.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
464 paddw m4, m3 ; xmm4 = c4 * ( i0 + i4 ) = 3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
465 psubsw m2, m1 ; xmm2 = A.. - H. = R2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
466 ADD(m2) ; Adjust R2 and R1 before shifting |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
467 paddsw m1, m1 ; xmm1 = H. + H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
468 paddsw m1, m2 ; xmm1 = A.. + H. = R1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
469 SHIFT(m2) ; xmm2 = op2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
470 psubsw m4, m7 ; xmm4 = E - G = E. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
471 SHIFT(m1) ; xmm1 = op1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
472 movdqa m3, I(2) ; Load D. from I(2) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
473 paddsw m7, m7 ; xmm7 = G + G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
474 paddsw m7, m4 ; xmm7 = E + G = G. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
475 psubsw m4, m3 ; xmm4 = E. - D. = R4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
476 ADD(m4) ; Adjust R4 and R3 before shifting |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
477 paddsw m3, m3 ; xmm3 = D. + D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
478 paddsw m3, m4 ; xmm3 = E. + D. = R3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
479 SHIFT(m4) ; xmm4 = op4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
480 psubsw m6, m5 ; xmm6 = F. - B..= R6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
481 SHIFT(m3) ; xmm3 = op3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
482 ADD(m6) ; Adjust R6 and R5 before shifting |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
483 paddsw m5, m5 ; xmm5 = B.. + B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
484 paddsw m5, m6 ; xmm5 = F. + B.. = R5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
485 SHIFT(m6) ; xmm6 = op6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
486 SHIFT(m5) ; xmm5 = op5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
487 psubsw m7, m0 ; xmm7 = G. - C. = R7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
488 ADD(m7) ; Adjust R7 and R0 before shifting |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
489 paddsw m0, m0 ; xmm0 = C. + C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
490 paddsw m0, m7 ; xmm0 = G. + C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
491 SHIFT(m7) ; xmm7 = op7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
492 SHIFT(m0) ; xmm0 = op0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
493 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
494 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
495 %macro PUT_BLOCK 8 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
496 movdqa O(0), m%1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
497 movdqa O(1), m%2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
498 movdqa O(2), m%3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
499 movdqa O(3), m%4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
500 movdqa O(4), m%5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
501 movdqa O(5), m%6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
502 movdqa O(6), m%7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
503 movdqa O(7), m%8 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
504 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
505 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
506 %macro VP3_IDCT_sse2 1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
507 %define I(x) [%1+16*x] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
508 %define O(x) [%1+16*x] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
509 %define C(x) [vp3_idct_data+16*(x-1)] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
510 %define SHIFT(x) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
511 %define ADD(x) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
512 VP3_1D_IDCT_SSE2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
513 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
514 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
515 %else |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
516 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%1], [%1+16] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
517 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
518 PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
519 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
520 %define SHIFT(x) psraw x, 4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
521 %define ADD(x) paddsw x, [pw_8] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
522 VP3_1D_IDCT_SSE2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
523 PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
524 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
525 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
526 %macro vp3_idct_funcs 3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
527 cglobal vp3_idct_%1, 1, 1, %2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
528 VP3_IDCT_%1 r0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
529 RET |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
530 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
531 cglobal vp3_idct_put_%1, 3, %3, %2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
532 VP3_IDCT_%1 r2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
533 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
534 mov r3, r2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
535 mov r2, r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
536 mov r1, r0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
537 mov r0, r3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
538 %else |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
539 mov r0m, r2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
540 mov r1m, r0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
541 mov r2m, r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
542 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
543 %ifdef WIN64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
544 call put_signed_pixels_clamped_mmx |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
545 RET |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
546 %else |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
547 jmp put_signed_pixels_clamped_mmx |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
548 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
549 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
550 cglobal vp3_idct_add_%1, 3, %3, %2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
551 VP3_IDCT_%1 r2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
552 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
553 mov r3, r2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
554 mov r2, r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
555 mov r1, r0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
556 mov r0, r3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
557 %else |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
558 mov r0m, r2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
559 mov r1m, r0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
560 mov r2m, r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
561 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
562 %ifdef WIN64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
563 call add_pixels_clamped_mmx |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
564 RET |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
565 %else |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
566 jmp add_pixels_clamped_mmx |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
567 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
568 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
569 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
570 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
571 %define REGS 4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
572 %else |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
573 %define REGS 3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
574 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
575 INIT_MMX |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
576 vp3_idct_funcs mmx, 0, REGS |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
577 INIT_XMM |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
578 vp3_idct_funcs sse2, 9, REGS |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
579 %undef REGS |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
580 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
581 %macro DC_ADD 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
582 movq m2, [r0 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
583 movq m3, [r0+r1 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
584 paddusb m2, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
585 movq m4, [r0+r1*2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
586 paddusb m3, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
587 movq m5, [r0+r3 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
588 paddusb m4, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
589 paddusb m5, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
590 psubusb m2, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
591 psubusb m3, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
592 movq [r0 ], m2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
593 psubusb m4, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
594 movq [r0+r1 ], m3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
595 psubusb m5, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
596 movq [r0+r1*2], m4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
597 movq [r0+r3 ], m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
598 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
599 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
600 INIT_MMX |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
601 cglobal vp3_idct_dc_add_mmx2, 3, 4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
602 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
603 movsxd r1, r1d |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
604 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
605 lea r3, [r1*3] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
606 movsx r2, word [r2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
607 add r2, 15 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
608 sar r2, 5 |
12457
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12436
diff
changeset
|
609 movd m0, r2d |
12436
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
610 pshufw m0, m0, 0x0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
611 pxor m1, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
612 psubw m1, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
613 packuswb m0, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
614 packuswb m1, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
615 DC_ADD |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
616 lea r0, [r0+r1*4] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
617 DC_ADD |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
618 RET |