Mercurial > libavcodec.hg
annotate x86/vp3dsp.asm @ 12515:307776e26174 libavcodec
Support deinterlacing of YUVJ422P in old deinterlacer.
Patch by Maksym Veremeyenko verem at m1stereo tv.
author | banan |
---|---|
date | Sat, 25 Sep 2010 14:37:54 +0000 |
parents | 2982071047a2 |
children |
rev | line source |
---|---|
12436
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
1 ;****************************************************************************** |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
2 ;* MMX/SSE2-optimized functions for the VP3 decoder |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
3 ;* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org> |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
4 ;* |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
5 ;* This file is part of FFmpeg. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
6 ;* |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
7 ;* FFmpeg is free software; you can redistribute it and/or |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
8 ;* modify it under the terms of the GNU Lesser General Public |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
9 ;* License as published by the Free Software Foundation; either |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
10 ;* version 2.1 of the License, or (at your option) any later version. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
11 ;* |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
12 ;* FFmpeg is distributed in the hope that it will be useful, |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
13 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
14 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
15 ;* Lesser General Public License for more details. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
16 ;* |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
17 ;* You should have received a copy of the GNU Lesser General Public |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
18 ;* License along with FFmpeg; if not, write to the Free Software |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
19 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
20 ;****************************************************************************** |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
21 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
22 %include "x86inc.asm" |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
23 %include "x86util.asm" |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
24 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
25 ; MMX-optimized functions cribbed from the original VP3 source code. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
26 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
27 SECTION_RODATA |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
28 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
29 vp3_idct_data: times 8 dw 64277 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
30 times 8 dw 60547 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
31 times 8 dw 54491 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
32 times 8 dw 46341 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
33 times 8 dw 36410 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
34 times 8 dw 25080 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
35 times 8 dw 12785 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
36 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
37 cextern pb_1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
38 cextern pb_3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
39 cextern pb_7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
40 cextern pb_1F |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
41 cextern pb_81 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
42 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
43 cextern pw_8 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
44 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
45 cextern put_signed_pixels_clamped_mmx |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
46 cextern add_pixels_clamped_mmx |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
47 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
48 SECTION .text |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
49 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
50 ; this is off by one or two for some cases when filter_limit is greater than 63 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
51 ; in: p0 in mm6, p1 in mm4, p2 in mm2, p3 in mm1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
52 ; out: p1 in mm4, p2 in mm3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
53 %macro VP3_LOOP_FILTER 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
54 movq m7, m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
55 pand m6, [pb_7] ; p0&7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
56 psrlw m7, 3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
57 pand m7, [pb_1F] ; p0>>3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
58 movq m3, m2 ; p2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
59 pxor m2, m4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
60 pand m2, [pb_1] ; (p2^p1)&1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
61 movq m5, m2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
62 paddb m2, m2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
63 paddb m2, m5 ; 3*(p2^p1)&1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
64 paddb m2, m6 ; extra bits lost in shifts |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
65 pcmpeqb m0, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
66 pxor m1, m0 ; 255 - p3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
67 pavgb m1, m2 ; (256 - p3 + extrabits) >> 1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
68 pxor m0, m4 ; 255 - p1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
69 pavgb m0, m3 ; (256 + p2-p1) >> 1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
70 paddb m1, [pb_3] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
71 pavgb m1, m0 ; 128+2+( p2-p1 - p3) >> 2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
72 pavgb m1, m0 ; 128+1+(3*(p2-p1) - p3) >> 3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
73 paddusb m7, m1 ; d+128+1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
74 movq m6, [pb_81] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
75 psubusb m6, m7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
76 psubusb m7, [pb_81] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
77 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
78 movq m5, [r2+516] ; flim |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
79 pminub m6, m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
80 pminub m7, m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
81 movq m0, m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
82 movq m1, m7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
83 paddb m6, m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
84 paddb m7, m7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
85 pminub m6, m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
86 pminub m7, m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
87 psubb m6, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
88 psubb m7, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
89 paddusb m4, m7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
90 psubusb m4, m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
91 psubusb m3, m7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
92 paddusb m3, m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
93 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
94 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
95 %macro STORE_4_WORDS 1 |
12457
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12436
diff
changeset
|
96 movd r2d, %1 |
12436
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
97 mov [r0 -1], r2w |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
98 psrlq %1, 32 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
99 shr r2, 16 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
100 mov [r0+r1 -1], r2w |
12457
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12436
diff
changeset
|
101 movd r2d, %1 |
12436
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
102 mov [r0+r1*2-1], r2w |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
103 shr r2, 16 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
104 mov [r0+r3 -1], r2w |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
105 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
106 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
107 INIT_MMX |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
108 cglobal vp3_v_loop_filter_mmx2, 3, 4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
109 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
110 movsxd r1, r1d |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
111 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
112 mov r3, r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
113 neg r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
114 movq m6, [r0+r1*2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
115 movq m4, [r0+r1 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
116 movq m2, [r0 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
117 movq m1, [r0+r3 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
118 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
119 VP3_LOOP_FILTER |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
120 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
121 movq [r0+r1], m4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
122 movq [r0 ], m3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
123 RET |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
124 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
125 cglobal vp3_h_loop_filter_mmx2, 3, 4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
126 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
127 movsxd r1, r1d |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
128 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
129 lea r3, [r1*3] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
130 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
131 movd m6, [r0 -2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
132 movd m4, [r0+r1 -2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
133 movd m2, [r0+r1*2-2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
134 movd m1, [r0+r3 -2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
135 lea r0, [r0+r1*4 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
136 punpcklbw m6, [r0 -2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
137 punpcklbw m4, [r0+r1 -2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
138 punpcklbw m2, [r0+r1*2-2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
139 punpcklbw m1, [r0+r3 -2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
140 sub r0, r3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
141 sub r0, r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
142 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
143 TRANSPOSE4x4B 6, 4, 2, 1, 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
144 VP3_LOOP_FILTER |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
145 SBUTTERFLY bw, 4, 3, 5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
146 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
147 STORE_4_WORDS m4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
148 lea r0, [r0+r1*4 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
149 STORE_4_WORDS m3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
150 RET |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
151 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
152 ; from original comments: The Macro does IDct on 4 1-D Dcts |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
153 %macro BeginIDCT 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
154 movq m2, I(3) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
155 movq m6, C(3) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
156 movq m4, m2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
157 movq m7, J(5) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
158 pmulhw m4, m6 ; r4 = c3*i3 - i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
159 movq m1, C(5) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
160 pmulhw m6, m7 ; r6 = c3*i5 - i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
161 movq m5, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
162 pmulhw m1, m2 ; r1 = c5*i3 - i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
163 movq m3, I(1) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
164 pmulhw m5, m7 ; r5 = c5*i5 - i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
165 movq m0, C(1) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
166 paddw m4, m2 ; r4 = c3*i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
167 paddw m6, m7 ; r6 = c3*i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
168 paddw m2, m1 ; r2 = c5*i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
169 movq m1, J(7) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
170 paddw m7, m5 ; r7 = c5*i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
171 movq m5, m0 ; r5 = c1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
172 pmulhw m0, m3 ; r0 = c1*i1 - i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
173 paddsw m4, m7 ; r4 = C = c3*i3 + c5*i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
174 pmulhw m5, m1 ; r5 = c1*i7 - i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
175 movq m7, C(7) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
176 psubsw m6, m2 ; r6 = D = c3*i5 - c5*i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
177 paddw m0, m3 ; r0 = c1*i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
178 pmulhw m3, m7 ; r3 = c7*i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
179 movq m2, I(2) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
180 pmulhw m7, m1 ; r7 = c7*i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
181 paddw m5, m1 ; r5 = c1*i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
182 movq m1, m2 ; r1 = i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
183 pmulhw m2, C(2) ; r2 = c2*i2 - i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
184 psubsw m3, m5 ; r3 = B = c7*i1 - c1*i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
185 movq m5, J(6) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
186 paddsw m0, m7 ; r0 = A = c1*i1 + c7*i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
187 movq m7, m5 ; r7 = i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
188 psubsw m0, m4 ; r0 = A - C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
189 pmulhw m5, C(2) ; r5 = c2*i6 - i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
190 paddw m2, m1 ; r2 = c2*i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
191 pmulhw m1, C(6) ; r1 = c6*i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
192 paddsw m4, m4 ; r4 = C + C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
193 paddsw m4, m0 ; r4 = C. = A + C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
194 psubsw m3, m6 ; r3 = B - D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
195 paddw m5, m7 ; r5 = c2*i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
196 paddsw m6, m6 ; r6 = D + D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
197 pmulhw m7, C(6) ; r7 = c6*i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
198 paddsw m6, m3 ; r6 = D. = B + D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
199 movq I(1), m4 ; save C. at I(1) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
200 psubsw m1, m5 ; r1 = H = c6*i2 - c2*i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
201 movq m4, C(4) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
202 movq m5, m3 ; r5 = B - D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
203 pmulhw m3, m4 ; r3 = (c4 - 1) * (B - D) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
204 paddsw m7, m2 ; r3 = (c4 - 1) * (B - D) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
205 movq I(2), m6 ; save D. at I(2) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
206 movq m2, m0 ; r2 = A - C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
207 movq m6, I(0) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
208 pmulhw m0, m4 ; r0 = (c4 - 1) * (A - C) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
209 paddw m5, m3 ; r5 = B. = c4 * (B - D) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
210 movq m3, J(4) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
211 psubsw m5, m1 ; r5 = B.. = B. - H |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
212 paddw m2, m0 ; r0 = A. = c4 * (A - C) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
213 psubsw m6, m3 ; r6 = i0 - i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
214 movq m0, m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
215 pmulhw m6, m4 ; r6 = (c4 - 1) * (i0 - i4) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
216 paddsw m3, m3 ; r3 = i4 + i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
217 paddsw m1, m1 ; r1 = H + H |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
218 paddsw m3, m0 ; r3 = i0 + i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
219 paddsw m1, m5 ; r1 = H. = B + H |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
220 pmulhw m4, m3 ; r4 = (c4 - 1) * (i0 + i4) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
221 paddsw m6, m0 ; r6 = F = c4 * (i0 - i4) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
222 psubsw m6, m2 ; r6 = F. = F - A. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
223 paddsw m2, m2 ; r2 = A. + A. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
224 movq m0, I(1) ; r0 = C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
225 paddsw m2, m6 ; r2 = A.. = F + A. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
226 paddw m4, m3 ; r4 = E = c4 * (i0 + i4) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
227 psubsw m2, m1 ; r2 = R2 = A.. - H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
228 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
229 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
230 ; RowIDCT gets ready to transpose |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
231 %macro RowIDCT 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
232 BeginIDCT |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
233 movq m3, I(2) ; r3 = D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
234 psubsw m4, m7 ; r4 = E. = E - G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
235 paddsw m1, m1 ; r1 = H. + H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
236 paddsw m7, m7 ; r7 = G + G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
237 paddsw m1, m2 ; r1 = R1 = A.. + H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
238 paddsw m7, m4 ; r1 = R1 = A.. + H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
239 psubsw m4, m3 ; r4 = R4 = E. - D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
240 paddsw m3, m3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
241 psubsw m6, m5 ; r6 = R6 = F. - B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
242 paddsw m5, m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
243 paddsw m3, m4 ; r3 = R3 = E. + D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
244 paddsw m5, m6 ; r5 = R5 = F. + B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
245 psubsw m7, m0 ; r7 = R7 = G. - C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
246 paddsw m0, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
247 movq I(1), m1 ; save R1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
248 paddsw m0, m7 ; r0 = R0 = G. + C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
249 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
250 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
251 ; Column IDCT normalizes and stores final results |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
252 %macro ColumnIDCT 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
253 BeginIDCT |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
254 paddsw m2, OC_8 ; adjust R2 (and R1) for shift |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
255 paddsw m1, m1 ; r1 = H. + H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
256 paddsw m1, m2 ; r1 = R1 = A.. + H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
257 psraw m2, 4 ; r2 = NR2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
258 psubsw m4, m7 ; r4 = E. = E - G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
259 psraw m1, 4 ; r1 = NR2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
260 movq m3, I(2) ; r3 = D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
261 paddsw m7, m7 ; r7 = G + G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
262 movq I(2), m2 ; store NR2 at I2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
263 paddsw m7, m4 ; r7 = G. = E + G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
264 movq I(1), m1 ; store NR1 at I1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
265 psubsw m4, m3 ; r4 = R4 = E. - D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
266 paddsw m4, OC_8 ; adjust R4 (and R3) for shift |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
267 paddsw m3, m3 ; r3 = D. + D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
268 paddsw m3, m4 ; r3 = R3 = E. + D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
269 psraw m4, 4 ; r4 = NR4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
270 psubsw m6, m5 ; r6 = R6 = F. - B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
271 psraw m3, 4 ; r3 = NR3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
272 paddsw m6, OC_8 ; adjust R6 (and R5) for shift |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
273 paddsw m5, m5 ; r5 = B.. + B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
274 paddsw m5, m6 ; r5 = R5 = F. + B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
275 psraw m6, 4 ; r6 = NR6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
276 movq J(4), m4 ; store NR4 at J4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
277 psraw m5, 4 ; r5 = NR5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
278 movq I(3), m3 ; store NR3 at I3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
279 psubsw m7, m0 ; r7 = R7 = G. - C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
280 paddsw m7, OC_8 ; adjust R7 (and R0) for shift |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
281 paddsw m0, m0 ; r0 = C. + C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
282 paddsw m0, m7 ; r0 = R0 = G. + C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
283 psraw m7, 4 ; r7 = NR7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
284 movq J(6), m6 ; store NR6 at J6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
285 psraw m0, 4 ; r0 = NR0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
286 movq J(5), m5 ; store NR5 at J5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
287 movq J(7), m7 ; store NR7 at J7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
288 movq I(0), m0 ; store NR0 at I0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
289 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
290 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
291 ; Following macro does two 4x4 transposes in place. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
292 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
293 ; At entry (we assume): |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
294 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
295 ; r0 = a3 a2 a1 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
296 ; I(1) = b3 b2 b1 b0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
297 ; r2 = c3 c2 c1 c0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
298 ; r3 = d3 d2 d1 d0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
299 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
300 ; r4 = e3 e2 e1 e0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
301 ; r5 = f3 f2 f1 f0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
302 ; r6 = g3 g2 g1 g0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
303 ; r7 = h3 h2 h1 h0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
304 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
305 ; At exit, we have: |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
306 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
307 ; I(0) = d0 c0 b0 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
308 ; I(1) = d1 c1 b1 a1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
309 ; I(2) = d2 c2 b2 a2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
310 ; I(3) = d3 c3 b3 a3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
311 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
312 ; J(4) = h0 g0 f0 e0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
313 ; J(5) = h1 g1 f1 e1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
314 ; J(6) = h2 g2 f2 e2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
315 ; J(7) = h3 g3 f3 e3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
316 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
317 ; I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
318 ; J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
319 ; |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
320 ; Since r1 is free at entry, we calculate the Js first. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
321 %macro Transpose 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
322 movq m1, m4 ; r1 = e3 e2 e1 e0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
323 punpcklwd m4, m5 ; r4 = f1 e1 f0 e0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
324 movq I(0), m0 ; save a3 a2 a1 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
325 punpckhwd m1, m5 ; r1 = f3 e3 f2 e2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
326 movq m0, m6 ; r0 = g3 g2 g1 g0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
327 punpcklwd m6, m7 ; r6 = h1 g1 h0 g0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
328 movq m5, m4 ; r5 = f1 e1 f0 e0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
329 punpckldq m4, m6 ; r4 = h0 g0 f0 e0 = R4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
330 punpckhdq m5, m6 ; r5 = h1 g1 f1 e1 = R5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
331 movq m6, m1 ; r6 = f3 e3 f2 e2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
332 movq J(4), m4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
333 punpckhwd m0, m7 ; r0 = h3 g3 h2 g2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
334 movq J(5), m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
335 punpckhdq m6, m0 ; r6 = h3 g3 f3 e3 = R7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
336 movq m4, I(0) ; r4 = a3 a2 a1 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
337 punpckldq m1, m0 ; r1 = h2 g2 f2 e2 = R6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
338 movq m5, I(1) ; r5 = b3 b2 b1 b0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
339 movq m0, m4 ; r0 = a3 a2 a1 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
340 movq J(7), m6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
341 punpcklwd m0, m5 ; r0 = b1 a1 b0 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
342 movq J(6), m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
343 punpckhwd m4, m5 ; r4 = b3 a3 b2 a2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
344 movq m5, m2 ; r5 = c3 c2 c1 c0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
345 punpcklwd m2, m3 ; r2 = d1 c1 d0 c0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
346 movq m1, m0 ; r1 = b1 a1 b0 a0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
347 punpckldq m0, m2 ; r0 = d0 c0 b0 a0 = R0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
348 punpckhdq m1, m2 ; r1 = d1 c1 b1 a1 = R1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
349 movq m2, m4 ; r2 = b3 a3 b2 a2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
350 movq I(0), m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
351 punpckhwd m5, m3 ; r5 = d3 c3 d2 c2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
352 movq I(1), m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
353 punpckhdq m4, m5 ; r4 = d3 c3 b3 a3 = R3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
354 punpckldq m2, m5 ; r2 = d2 c2 b2 a2 = R2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
355 movq I(3), m4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
356 movq I(2), m2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
357 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
358 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
359 %macro VP3_IDCT_mmx 1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
360 ; eax = quantized input |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
361 ; ebx = dequantizer matrix |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
362 ; ecx = IDCT constants |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
363 ; M(I) = ecx + MaskOffset(0) + I * 8 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
364 ; C(I) = ecx + CosineOffset(32) + (I-1) * 8 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
365 ; edx = output |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
366 ; r0..r7 = mm0..mm7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
367 %define OC_8 [pw_8] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
368 %define C(x) [vp3_idct_data+16*(x-1)] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
369 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
370 ; at this point, function has completed dequantization + dezigzag + |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
371 ; partial transposition; now do the idct itself |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
372 %define I(x) [%1+16* x ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
373 %define J(x) [%1+16*(x-4)+8] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
374 RowIDCT |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
375 Transpose |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
376 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
377 %define I(x) [%1+16* x +64] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
378 %define J(x) [%1+16*(x-4)+72] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
379 RowIDCT |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
380 Transpose |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
381 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
382 %define I(x) [%1+16*x] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
383 %define J(x) [%1+16*x] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
384 ColumnIDCT |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
385 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
386 %define I(x) [%1+16*x+8] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
387 %define J(x) [%1+16*x+8] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
388 ColumnIDCT |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
389 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
390 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
391 %macro VP3_1D_IDCT_SSE2 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
392 movdqa m2, I(3) ; xmm2 = i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
393 movdqa m6, C(3) ; xmm6 = c3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
394 movdqa m4, m2 ; xmm4 = i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
395 movdqa m7, I(5) ; xmm7 = i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
396 pmulhw m4, m6 ; xmm4 = c3 * i3 - i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
397 movdqa m1, C(5) ; xmm1 = c5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
398 pmulhw m6, m7 ; xmm6 = c3 * i5 - i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
399 movdqa m5, m1 ; xmm5 = c5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
400 pmulhw m1, m2 ; xmm1 = c5 * i3 - i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
401 movdqa m3, I(1) ; xmm3 = i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
402 pmulhw m5, m7 ; xmm5 = c5 * i5 - i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
403 movdqa m0, C(1) ; xmm0 = c1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
404 paddw m4, m2 ; xmm4 = c3 * i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
405 paddw m6, m7 ; xmm6 = c3 * i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
406 paddw m2, m1 ; xmm2 = c5 * i3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
407 movdqa m1, I(7) ; xmm1 = i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
408 paddw m7, m5 ; xmm7 = c5 * i5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
409 movdqa m5, m0 ; xmm5 = c1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
410 pmulhw m0, m3 ; xmm0 = c1 * i1 - i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
411 paddsw m4, m7 ; xmm4 = c3 * i3 + c5 * i5 = C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
412 pmulhw m5, m1 ; xmm5 = c1 * i7 - i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
413 movdqa m7, C(7) ; xmm7 = c7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
414 psubsw m6, m2 ; xmm6 = c3 * i5 - c5 * i3 = D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
415 paddw m0, m3 ; xmm0 = c1 * i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
416 pmulhw m3, m7 ; xmm3 = c7 * i1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
417 movdqa m2, I(2) ; xmm2 = i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
418 pmulhw m7, m1 ; xmm7 = c7 * i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
419 paddw m5, m1 ; xmm5 = c1 * i7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
420 movdqa m1, m2 ; xmm1 = i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
421 pmulhw m2, C(2) ; xmm2 = i2 * c2 -i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
422 psubsw m3, m5 ; xmm3 = c7 * i1 - c1 * i7 = B |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
423 movdqa m5, I(6) ; xmm5 = i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
424 paddsw m0, m7 ; xmm0 = c1 * i1 + c7 * i7 = A |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
425 movdqa m7, m5 ; xmm7 = i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
426 psubsw m0, m4 ; xmm0 = A - C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
427 pmulhw m5, C(2) ; xmm5 = c2 * i6 - i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
428 paddw m2, m1 ; xmm2 = i2 * c2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
429 pmulhw m1, C(6) ; xmm1 = c6 * i2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
430 paddsw m4, m4 ; xmm4 = C + C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
431 paddsw m4, m0 ; xmm4 = A + C = C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
432 psubsw m3, m6 ; xmm3 = B - D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
433 paddw m5, m7 ; xmm5 = c2 * i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
434 paddsw m6, m6 ; xmm6 = D + D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
435 pmulhw m7, C(6) ; xmm7 = c6 * i6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
436 paddsw m6, m3 ; xmm6 = B + D = D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
437 movdqa I(1), m4 ; Save C. at I(1) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
438 psubsw m1, m5 ; xmm1 = c6 * i2 - c2 * i6 = H |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
439 movdqa m4, C(4) ; xmm4 = C4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
440 movdqa m5, m3 ; xmm5 = B - D |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
441 pmulhw m3, m4 ; xmm3 = ( c4 -1 ) * ( B - D ) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
442 paddsw m7, m2 ; xmm7 = c2 * i2 + c6 * i6 = G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
443 movdqa I(2), m6 ; save D. at I(2) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
444 movdqa m2, m0 ; xmm2 = A - C |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
445 movdqa m6, I(0) ; xmm6 = i0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
446 pmulhw m0, m4 ; xmm0 = ( c4 - 1 ) * ( A - C ) = A. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
447 paddw m5, m3 ; xmm5 = c4 * ( B - D ) = B. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
448 movdqa m3, I(4) ; xmm3 = i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
449 psubsw m5, m1 ; xmm5 = B. - H = B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
450 paddw m2, m0 ; xmm2 = c4 * ( A - C) = A. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
451 psubsw m6, m3 ; xmm6 = i0 - i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
452 movdqa m0, m6 ; xmm0 = i0 - i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
453 pmulhw m6, m4 ; xmm6 = (c4 - 1) * (i0 - i4) = F |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
454 paddsw m3, m3 ; xmm3 = i4 + i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
455 paddsw m1, m1 ; xmm1 = H + H |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
456 paddsw m3, m0 ; xmm3 = i0 + i4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
457 paddsw m1, m5 ; xmm1 = B. + H = H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
458 pmulhw m4, m3 ; xmm4 = ( c4 - 1 ) * ( i0 + i4 ) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
459 paddw m6, m0 ; xmm6 = c4 * ( i0 - i4 ) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
460 psubsw m6, m2 ; xmm6 = F - A. = F. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
461 paddsw m2, m2 ; xmm2 = A. + A. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
462 movdqa m0, I(1) ; Load C. from I(1) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
463 paddsw m2, m6 ; xmm2 = F + A. = A.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
464 paddw m4, m3 ; xmm4 = c4 * ( i0 + i4 ) = 3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
465 psubsw m2, m1 ; xmm2 = A.. - H. = R2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
466 ADD(m2) ; Adjust R2 and R1 before shifting |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
467 paddsw m1, m1 ; xmm1 = H. + H. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
468 paddsw m1, m2 ; xmm1 = A.. + H. = R1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
469 SHIFT(m2) ; xmm2 = op2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
470 psubsw m4, m7 ; xmm4 = E - G = E. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
471 SHIFT(m1) ; xmm1 = op1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
472 movdqa m3, I(2) ; Load D. from I(2) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
473 paddsw m7, m7 ; xmm7 = G + G |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
474 paddsw m7, m4 ; xmm7 = E + G = G. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
475 psubsw m4, m3 ; xmm4 = E. - D. = R4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
476 ADD(m4) ; Adjust R4 and R3 before shifting |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
477 paddsw m3, m3 ; xmm3 = D. + D. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
478 paddsw m3, m4 ; xmm3 = E. + D. = R3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
479 SHIFT(m4) ; xmm4 = op4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
480 psubsw m6, m5 ; xmm6 = F. - B..= R6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
481 SHIFT(m3) ; xmm3 = op3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
482 ADD(m6) ; Adjust R6 and R5 before shifting |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
483 paddsw m5, m5 ; xmm5 = B.. + B.. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
484 paddsw m5, m6 ; xmm5 = F. + B.. = R5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
485 SHIFT(m6) ; xmm6 = op6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
486 SHIFT(m5) ; xmm5 = op5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
487 psubsw m7, m0 ; xmm7 = G. - C. = R7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
488 ADD(m7) ; Adjust R7 and R0 before shifting |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
489 paddsw m0, m0 ; xmm0 = C. + C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
490 paddsw m0, m7 ; xmm0 = G. + C. |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
491 SHIFT(m7) ; xmm7 = op7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
492 SHIFT(m0) ; xmm0 = op0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
493 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
494 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
495 %macro PUT_BLOCK 8 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
496 movdqa O(0), m%1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
497 movdqa O(1), m%2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
498 movdqa O(2), m%3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
499 movdqa O(3), m%4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
500 movdqa O(4), m%5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
501 movdqa O(5), m%6 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
502 movdqa O(6), m%7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
503 movdqa O(7), m%8 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
504 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
505 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
506 %macro VP3_IDCT_sse2 1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
507 %define I(x) [%1+16*x] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
508 %define O(x) [%1+16*x] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
509 %define C(x) [vp3_idct_data+16*(x-1)] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
510 %define SHIFT(x) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
511 %define ADD(x) |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
512 VP3_1D_IDCT_SSE2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
513 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
514 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
515 %else |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
516 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%1], [%1+16] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
517 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
518 PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
519 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
520 %define SHIFT(x) psraw x, 4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
521 %define ADD(x) paddsw x, [pw_8] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
522 VP3_1D_IDCT_SSE2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
523 PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
524 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
525 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
526 %macro vp3_idct_funcs 3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
527 cglobal vp3_idct_%1, 1, 1, %2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
528 VP3_IDCT_%1 r0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
529 RET |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
530 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
531 cglobal vp3_idct_put_%1, 3, %3, %2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
532 VP3_IDCT_%1 r2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
533 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
534 mov r3, r2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
535 mov r2, r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
536 mov r1, r0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
537 mov r0, r3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
538 %else |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
539 mov r0m, r2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
540 mov r1m, r0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
541 mov r2m, r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
542 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
543 %ifdef WIN64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
544 call put_signed_pixels_clamped_mmx |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
545 RET |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
546 %else |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
547 jmp put_signed_pixels_clamped_mmx |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
548 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
549 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
550 cglobal vp3_idct_add_%1, 3, %3, %2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
551 VP3_IDCT_%1 r2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
552 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
553 mov r3, r2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
554 mov r2, r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
555 mov r1, r0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
556 mov r0, r3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
557 %else |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
558 mov r0m, r2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
559 mov r1m, r0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
560 mov r2m, r1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
561 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
562 %ifdef WIN64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
563 call add_pixels_clamped_mmx |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
564 RET |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
565 %else |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
566 jmp add_pixels_clamped_mmx |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
567 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
568 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
569 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
570 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
571 %define REGS 4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
572 %else |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
573 %define REGS 3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
574 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
575 INIT_MMX |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
576 vp3_idct_funcs mmx, 0, REGS |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
577 INIT_XMM |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
578 vp3_idct_funcs sse2, 9, REGS |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
579 %undef REGS |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
580 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
581 %macro DC_ADD 0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
582 movq m2, [r0 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
583 movq m3, [r0+r1 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
584 paddusb m2, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
585 movq m4, [r0+r1*2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
586 paddusb m3, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
587 movq m5, [r0+r3 ] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
588 paddusb m4, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
589 paddusb m5, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
590 psubusb m2, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
591 psubusb m3, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
592 movq [r0 ], m2 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
593 psubusb m4, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
594 movq [r0+r1 ], m3 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
595 psubusb m5, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
596 movq [r0+r1*2], m4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
597 movq [r0+r3 ], m5 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
598 %endmacro |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
599 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
600 INIT_MMX |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
601 cglobal vp3_idct_dc_add_mmx2, 3, 4 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
602 %ifdef ARCH_X86_64 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
603 movsxd r1, r1d |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
604 %endif |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
605 lea r3, [r1*3] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
606 movsx r2, word [r2] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
607 add r2, 15 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
608 sar r2, 5 |
12457
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12436
diff
changeset
|
609 movd m0, r2d |
12436
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
610 pshufw m0, m0, 0x0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
611 pxor m1, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
612 psubw m1, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
613 packuswb m0, m0 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
614 packuswb m1, m1 |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
615 DC_ADD |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
616 lea r0, [r0+r1*4] |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
617 DC_ADD |
d6d0a43848b4
Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents:
diff
changeset
|
618 RET |