annotate x86/h264_chromamc.asm @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents b242eb86ea9a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12437
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
1 ;******************************************************************************
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
2 ;* MMX/SSSE3-optimized functions for H264 chroma MC
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
3 ;* Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
4 ;* 2005-2008 Loren Merritt
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
5 ;*
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
6 ;* This file is part of FFmpeg.
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
7 ;*
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
8 ;* FFmpeg is free software; you can redistribute it and/or
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
9 ;* modify it under the terms of the GNU Lesser General Public
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
10 ;* License as published by the Free Software Foundation; either
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
11 ;* version 2.1 of the License, or (at your option) any later version.
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
12 ;*
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
13 ;* FFmpeg is distributed in the hope that it will be useful,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
16 ;* Lesser General Public License for more details.
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
17 ;*
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
18 ;* You should have received a copy of the GNU Lesser General Public
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
19 ;* License along with FFmpeg; if not, write to the Free Software
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
20 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
21 ;******************************************************************************
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
22
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
23 %include "x86inc.asm"
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
24 %include "x86util.asm"
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
25
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
26 SECTION_RODATA
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
27
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
28 rnd_rv40_2d_tbl: times 4 dw 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
29 times 4 dw 16
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
30 times 4 dw 32
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
31 times 4 dw 16
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
32 times 4 dw 32
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
33 times 4 dw 28
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
34 times 4 dw 32
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
35 times 4 dw 28
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
36 times 4 dw 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
37 times 4 dw 32
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
38 times 4 dw 16
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
39 times 4 dw 32
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
40 times 4 dw 32
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
41 times 4 dw 28
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
42 times 4 dw 32
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
43 times 4 dw 28
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
44 rnd_rv40_1d_tbl: times 4 dw 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
45 times 4 dw 2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
46 times 4 dw 4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
47 times 4 dw 2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
48 times 4 dw 4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
49 times 4 dw 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
50 times 4 dw 4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
51 times 4 dw 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
52 times 4 dw 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
53 times 4 dw 4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
54 times 4 dw 2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
55 times 4 dw 4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
56 times 4 dw 4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
57 times 4 dw 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
58 times 4 dw 4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
59 times 4 dw 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
60
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
61 cextern pw_3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
62 cextern pw_4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
63 cextern pw_8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
64 cextern pw_28
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
65 cextern pw_32
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
66 cextern pw_64
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
67
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
68 SECTION .text
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
69
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
70 %macro mv0_pixels_mc8 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
71 lea r4, [r2*2 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
72 .next4rows
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
73 movq mm0, [r1 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
74 movq mm1, [r1+r2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
75 CHROMAMC_AVG mm0, [r0 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
76 CHROMAMC_AVG mm1, [r0+r2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
77 movq [r0 ], mm0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
78 movq [r0+r2], mm1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
79 add r0, r4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
80 add r1, r4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
81 movq mm0, [r1 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
82 movq mm1, [r1+r2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
83 CHROMAMC_AVG mm0, [r0 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
84 CHROMAMC_AVG mm1, [r0+r2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
85 add r1, r4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
86 movq [r0 ], mm0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
87 movq [r0+r2], mm1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
88 add r0, r4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
89 sub r3d, 4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
90 jne .next4rows
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
91 %endmacro
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
92
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
93 %macro chroma_mc8_mmx_func 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
94 ; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
95 ; int stride, int h, int mx, int my)
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
96 cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
97 %ifdef ARCH_X86_64
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
98 movsxd r2, r2d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
99 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
100 mov r6d, r5d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
101 or r6d, r4d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
102 jne .at_least_one_non_zero
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
103 ; mx == 0 AND my == 0 - no filter needed
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
104 mv0_pixels_mc8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
105 REP_RET
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
106
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
107 .at_least_one_non_zero
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
108 %ifidn %2, rv40
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
109 %ifdef PIC
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
110 %define rnd_1d_rv40 r11
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
111 %define rnd_2d_rv40 r11
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
112 %else ; no-PIC
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
113 %define rnd_1d_rv40 rnd_rv40_1d_tbl
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
114 %define rnd_2d_rv40 rnd_rv40_2d_tbl
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
115 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
116 %ifdef ARCH_X86_64
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
117 mov r10, r5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
118 and r10, 6 ; &~1 for mx/my=[0,7]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
119 lea r10, [r10*4+r4]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
120 sar r10d, 1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
121 %define rnd_bias r10
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
122 %define dest_reg r0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
123 %else ; x86-32
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
124 mov r0, r5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
125 and r0, 6 ; &~1 for mx/my=[0,7]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
126 lea r0, [r0*4+r4]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
127 sar r0d, 1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
128 %define rnd_bias r0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
129 %define dest_reg r5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
130 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
131 %else ; vc1, h264
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
132 %define rnd_bias 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
133 %define dest_reg r0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
134 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
135
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
136 test r5d, r5d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
137 mov r6, 1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
138 je .my_is_zero
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
139 test r4d, r4d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
140 mov r6, r2 ; dxy = x ? 1 : stride
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
141 jne .both_non_zero
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
142 .my_is_zero
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
143 ; mx == 0 XOR my == 0 - 1 dimensional filter only
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
144 or r4d, r5d ; x + y
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
145
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
146 %ifidn %2, rv40
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
147 %ifdef PIC
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
148 lea r11, [rnd_rv40_1d_tbl]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
149 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
150 %ifndef ARCH_X86_64
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
151 mov r5, r0m
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
152 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
153 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
154
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
155 movd m5, r4d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
156 movq m4, [pw_8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
157 movq m6, [rnd_1d_%2+rnd_bias*8] ; mm6 = rnd >> 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
158 punpcklwd m5, m5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
159 punpckldq m5, m5 ; mm5 = B = x
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
160 pxor m7, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
161 psubw m4, m5 ; mm4 = A = 8-x
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
162
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
163 .next1drow
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
164 movq m0, [r1 ] ; mm0 = src[0..7]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
165 movq m2, [r1+r6] ; mm1 = src[1..8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
166
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
167 movq m1, m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
168 movq m3, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
169 punpcklbw m0, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
170 punpckhbw m1, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
171 punpcklbw m2, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
172 punpckhbw m3, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
173 pmullw m0, m4 ; [mm0,mm1] = A * src[0..7]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
174 pmullw m1, m4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
175 pmullw m2, m5 ; [mm2,mm3] = B * src[1..8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
176 pmullw m3, m5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
177
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
178 paddw m0, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
179 paddw m1, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
180 paddw m0, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
181 paddw m1, m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
182 psrlw m0, 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
183 psrlw m1, 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
184 packuswb m0, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
185 CHROMAMC_AVG m0, [dest_reg]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
186 movq [dest_reg], m0 ; dst[0..7] = (A * src[0..7] + B * src[1..8] + (rnd >> 3)) >> 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
187
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
188 add dest_reg, r2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
189 add r1, r2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
190 dec r3d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
191 jne .next1drow
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
192 REP_RET
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
193
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
194 .both_non_zero ; general case, bilinear
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
195 movd m4, r4d ; x
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
196 movd m6, r5d ; y
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
197 %ifidn %2, rv40
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
198 %ifdef PIC
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
199 lea r11, [rnd_rv40_2d_tbl]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
200 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
201 %ifndef ARCH_X86_64
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
202 mov r5, r0m
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
203 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
204 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
205 mov r6, rsp ; backup stack pointer
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
206 and rsp, ~(mmsize-1) ; align stack
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
207 sub rsp, 16 ; AA and DD
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
208
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
209 punpcklwd m4, m4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
210 punpcklwd m6, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
211 punpckldq m4, m4 ; mm4 = x words
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
212 punpckldq m6, m6 ; mm6 = y words
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
213 movq m5, m4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
214 pmullw m4, m6 ; mm4 = x * y
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
215 psllw m5, 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
216 psllw m6, 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
217 movq m7, m5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
218 paddw m7, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
219 movq [rsp+8], m4 ; DD = x * y
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
220 psubw m5, m4 ; mm5 = B = 8x - xy
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
221 psubw m6, m4 ; mm6 = C = 8y - xy
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
222 paddw m4, [pw_64]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
223 psubw m4, m7 ; mm4 = A = xy - (8x+8y) + 64
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
224 pxor m7, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
225 movq [rsp ], m4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
226
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
227 movq m0, [r1 ] ; mm0 = src[0..7]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
228 movq m1, [r1+1] ; mm1 = src[1..8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
229 .next2drow
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
230 add r1, r2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
231
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
232 movq m2, m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
233 movq m3, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
234 punpckhbw m0, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
235 punpcklbw m1, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
236 punpcklbw m2, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
237 punpckhbw m3, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
238 pmullw m0, [rsp]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
239 pmullw m2, [rsp]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
240 pmullw m1, m5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
241 pmullw m3, m5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
242 paddw m2, m1 ; mm2 = A * src[0..3] + B * src[1..4]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
243 paddw m3, m0 ; mm3 = A * src[4..7] + B * src[5..8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
244
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
245 movq m0, [r1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
246 movq m1, m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
247 punpcklbw m0, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
248 punpckhbw m1, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
249 pmullw m0, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
250 pmullw m1, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
251 paddw m2, m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
252 paddw m3, m1 ; [mm2,mm3] += C * src[0..7]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
253
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
254 movq m1, [r1+1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
255 movq m0, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
256 movq m4, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
257 punpcklbw m0, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
258 punpckhbw m4, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
259 pmullw m0, [rsp+8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
260 pmullw m4, [rsp+8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
261 paddw m2, m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
262 paddw m3, m4 ; [mm2,mm3] += D * src[1..8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
263 movq m0, [r1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
264
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
265 paddw m2, [rnd_2d_%2+rnd_bias*8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
266 paddw m3, [rnd_2d_%2+rnd_bias*8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
267 psrlw m2, 6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
268 psrlw m3, 6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
269 packuswb m2, m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
270 CHROMAMC_AVG m2, [dest_reg]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
271 movq [dest_reg], m2 ; dst[0..7] = ([mm2,mm3] + rnd) >> 6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
272
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
273 add dest_reg, r2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
274 dec r3d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
275 jne .next2drow
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
276 mov rsp, r6 ; restore stack pointer
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
277 RET
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
278 %endmacro
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
279
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
280 %macro chroma_mc4_mmx_func 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
281 cglobal %1_%2_chroma_mc4_%3, 6, 6, 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
282 %ifdef ARCH_X86_64
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
283 movsxd r2, r2d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
284 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
285 pxor m7, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
286 movd m2, r4d ; x
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
287 movd m3, r5d ; y
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
288 movq m4, [pw_8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
289 movq m5, [pw_8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
290 punpcklwd m2, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
291 punpcklwd m3, m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
292 punpcklwd m2, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
293 punpcklwd m3, m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
294 psubw m4, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
295 psubw m5, m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
296
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
297 %ifidn %2, rv40
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
298 %ifdef PIC
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
299 lea r11, [rnd_rv40_2d_tbl]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
300 %define rnd_2d_rv40 r11
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
301 %else
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
302 %define rnd_2d_rv40 rnd_rv40_2d_tbl
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
303 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
304 and r5, 6 ; &~1 for mx/my=[0,7]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
305 lea r5, [r5*4+r4]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
306 sar r5d, 1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
307 %define rnd_bias r5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
308 %else ; vc1, h264
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
309 %define rnd_bias 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
310 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
311
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
312 movd m0, [r1 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
313 movd m6, [r1+1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
314 add r1, r2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
315 punpcklbw m0, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
316 punpcklbw m6, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
317 pmullw m0, m4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
318 pmullw m6, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
319 paddw m6, m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
320
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
321 .next2rows
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
322 movd m0, [r1 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
323 movd m1, [r1+1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
324 add r1, r2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
325 punpcklbw m0, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
326 punpcklbw m1, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
327 pmullw m0, m4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
328 pmullw m1, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
329 paddw m1, m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
330 movq m0, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
331
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
332 pmullw m6, m5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
333 pmullw m1, m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
334 paddw m6, [rnd_2d_%2+rnd_bias*8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
335 paddw m1, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
336 psrlw m1, 6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
337 packuswb m1, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
338 CHROMAMC_AVG4 m1, m6, [r0]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
339 movd [r0], m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
340 add r0, r2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
341
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
342 movd m6, [r1 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
343 movd m1, [r1+1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
344 add r1, r2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
345 punpcklbw m6, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
346 punpcklbw m1, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
347 pmullw m6, m4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
348 pmullw m1, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
349 paddw m1, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
350 movq m6, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
351 pmullw m0, m5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
352 pmullw m1, m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
353 paddw m0, [rnd_2d_%2+rnd_bias*8]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
354 paddw m1, m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
355 psrlw m1, 6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
356 packuswb m1, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
357 CHROMAMC_AVG4 m1, m0, [r0]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
358 movd [r0], m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
359 add r0, r2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
360 sub r3d, 2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
361 jnz .next2rows
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
362 REP_RET
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
363 %endmacro
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
364
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
365 %macro chroma_mc2_mmx_func 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
366 cglobal %1_%2_chroma_mc2_%3, 6, 7, 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
367 %ifdef ARCH_X86_64
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
368 movsxd r2, r2d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
369 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
370
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
371 mov r6d, r4d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
372 shl r4d, 16
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
373 sub r4d, r6d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
374 add r4d, 8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
375 imul r5d, r4d ; x*y<<16 | y*(8-x)
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
376 shl r4d, 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
377 sub r4d, r5d ; x*(8-y)<<16 | (8-x)*(8-y)
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
378
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
379 movd m5, r4d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
380 movd m6, r5d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
381 punpckldq m5, m5 ; mm5 = {A,B,A,B}
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
382 punpckldq m6, m6 ; mm6 = {C,D,C,D}
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
383 pxor m7, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
384 movd m2, [r1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
385 punpcklbw m2, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
386 pshufw m2, m2, 0x94 ; mm0 = src[0,1,1,2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
387
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
388 .nextrow
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
389 add r1, r2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
390 movq m1, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
391 pmaddwd m1, m5 ; mm1 = A * src[0,1] + B * src[1,2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
392 movd m0, [r1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
393 punpcklbw m0, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
394 pshufw m0, m0, 0x94 ; mm0 = src[0,1,1,2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
395 movq m2, m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
396 pmaddwd m0, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
397 paddw m1, [rnd_2d_%2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
398 paddw m1, m0 ; mm1 += C * src[0,1] + D * src[1,2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
399 psrlw m1, 6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
400 packssdw m1, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
401 packuswb m1, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
402 CHROMAMC_AVG4 m1, m3, [r0]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
403 movd r5d, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
404 mov [r0], r5w
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
405 add r0, r2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
406 sub r3d, 1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
407 jnz .nextrow
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
408 REP_RET
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
409 %endmacro
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
410
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
411 %define rnd_1d_h264 pw_4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
412 %define rnd_2d_h264 pw_32
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
413 %define rnd_1d_vc1 pw_3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
414 %define rnd_2d_vc1 pw_28
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
415
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
416 %macro NOTHING 2-3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
417 %endmacro
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
418 %macro DIRECT_AVG 2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
419 PAVG %1, %2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
420 %endmacro
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
421 %macro COPY_AVG 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
422 movd %2, %3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
423 PAVG %1, %2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
424 %endmacro
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
425
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
426 INIT_MMX
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
427 %define CHROMAMC_AVG NOTHING
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
428 %define CHROMAMC_AVG4 NOTHING
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
429 chroma_mc8_mmx_func put, h264, mmx_rnd
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
430 chroma_mc8_mmx_func put, vc1, mmx_nornd
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
431 chroma_mc8_mmx_func put, rv40, mmx
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
432 chroma_mc4_mmx_func put, h264, mmx
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
433 chroma_mc4_mmx_func put, rv40, mmx
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
434 chroma_mc2_mmx_func put, h264, mmx2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
435
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
436 %define CHROMAMC_AVG DIRECT_AVG
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
437 %define CHROMAMC_AVG4 COPY_AVG
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
438 %define PAVG pavgb
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
439 chroma_mc8_mmx_func avg, h264, mmx2_rnd
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
440 chroma_mc8_mmx_func avg, vc1, mmx2_nornd
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
441 chroma_mc8_mmx_func avg, rv40, mmx2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
442 chroma_mc4_mmx_func avg, h264, mmx2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
443 chroma_mc4_mmx_func avg, rv40, mmx2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
444 chroma_mc2_mmx_func avg, h264, mmx2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
445
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
446 %define PAVG pavgusb
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
447 chroma_mc8_mmx_func avg, h264, 3dnow_rnd
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
448 chroma_mc8_mmx_func avg, vc1, 3dnow_nornd
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
449 chroma_mc8_mmx_func avg, rv40, 3dnow
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
450 chroma_mc4_mmx_func avg, h264, 3dnow
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
451 chroma_mc4_mmx_func avg, rv40, 3dnow
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
452
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
453 %macro chroma_mc8_ssse3_func 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
454 cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
455 %ifdef ARCH_X86_64
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
456 movsxd r2, r2d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
457 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
458 mov r6d, r5d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
459 or r6d, r4d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
460 jne .at_least_one_non_zero
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
461 ; mx == 0 AND my == 0 - no filter needed
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
462 mv0_pixels_mc8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
463 REP_RET
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
464
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
465 .at_least_one_non_zero
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
466 test r5d, r5d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
467 je .my_is_zero
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
468 test r4d, r4d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
469 je .mx_is_zero
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
470
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
471 ; general case, bilinear
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
472 mov r6d, r4d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
473 shl r4d, 8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
474 sub r4, r6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
475 add r4, 8 ; x*288+8 = x<<8 | (8-x)
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
476 mov r6, 8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
477 sub r6d, r5d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
478 imul r6, r4 ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
479 imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x)
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
480
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
481 movd m7, r6d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
482 movd m6, r4d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
483 movdqa m5, [rnd_2d_%2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
484 pshuflw m7, m7, 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
485 pshuflw m6, m6, 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
486 movlhps m7, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
487 movlhps m6, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
488
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
489 movq m0, [r1 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
490 movq m1, [r1 +1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
491 punpcklbw m0, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
492 add r1, r2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
493 .next2rows
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
494 movq m1, [r1 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
495 movq m2, [r1 +1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
496 movq m3, [r1+r2 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
497 movq m4, [r1+r2+1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
498 lea r1, [r1+r2*2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
499 punpcklbw m1, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
500 punpcklbw m3, m4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
501 movdqa m2, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
502 movdqa m4, m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
503 pmaddubsw m0, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
504 pmaddubsw m1, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
505 pmaddubsw m2, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
506 pmaddubsw m3, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
507 paddw m0, m5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
508 paddw m2, m5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
509 paddw m1, m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
510 paddw m3, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
511 movdqa m0, m4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
512 psrlw m1, 6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
513 psrlw m3, 6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
514 %ifidn %1, avg
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
515 movq m2, [r0 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
516 movhps m2, [r0+r2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
517 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
518 packuswb m1, m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
519 CHROMAMC_AVG m1, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
520 movq [r0 ], m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
521 movhps [r0+r2], m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
522 sub r3d, 2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
523 lea r0, [r0+r2*2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
524 jg .next2rows
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
525 REP_RET
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
526
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
527 .my_is_zero
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
528 mov r5d, r4d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
529 shl r4d, 8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
530 add r4, 8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
531 sub r4, r5 ; 255*x+8 = x<<8 | (8-x)
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
532 movd m7, r4d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
533 movq m6, [rnd_1d_%2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
534 pshuflw m7, m7, 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
535 movlhps m6, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
536 movlhps m7, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
537
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
538 .next2xrows
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
539 movq m0, [r1 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
540 movq m1, [r1 +1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
541 movq m2, [r1+r2 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
542 movq m3, [r1+r2+1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
543 punpcklbw m0, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
544 punpcklbw m2, m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
545 pmaddubsw m0, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
546 pmaddubsw m2, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
547 %ifidn %1, avg
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
548 movq m4, [r0 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
549 movhps m4, [r0+r2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
550 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
551 paddw m0, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
552 paddw m2, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
553 psrlw m0, 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
554 psrlw m2, 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
555 packuswb m0, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
556 CHROMAMC_AVG m0, m4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
557 movq [r0 ], m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
558 movhps [r0+r2], m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
559 sub r3d, 2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
560 lea r0, [r0+r2*2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
561 lea r1, [r1+r2*2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
562 jg .next2xrows
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
563 REP_RET
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
564
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
565 .mx_is_zero
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
566 mov r4d, r5d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
567 shl r5d, 8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
568 add r5, 8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
569 sub r5, r4 ; 255*y+8 = y<<8 | (8-y)
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
570 movd m7, r5d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
571 movq m6, [rnd_1d_%2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
572 pshuflw m7, m7, 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
573 movlhps m6, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
574 movlhps m7, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
575
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
576 .next2yrows
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
577 movq m0, [r1 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
578 movq m1, [r1+r2 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
579 movdqa m2, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
580 movq m3, [r1+r2*2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
581 punpcklbw m0, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
582 punpcklbw m2, m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
583 pmaddubsw m0, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
584 pmaddubsw m2, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
585 %ifidn %1, avg
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
586 movq m4, [r0 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
587 movhps m4, [r0+r2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
588 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
589 paddw m0, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
590 paddw m2, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
591 psrlw m0, 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
592 psrlw m2, 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
593 packuswb m0, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
594 CHROMAMC_AVG m0, m4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
595 movq [r0 ], m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
596 movhps [r0+r2], m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
597 sub r3d, 2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
598 lea r0, [r0+r2*2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
599 lea r1, [r1+r2*2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
600 jg .next2yrows
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
601 REP_RET
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
602 %endmacro
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
603
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
604 %macro chroma_mc4_ssse3_func 3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
605 cglobal %1_%2_chroma_mc4_%3, 6, 7, 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
606 %ifdef ARCH_X86_64
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
607 movsxd r2, r2d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
608 %endif
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
609 mov r6, r4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
610 shl r4d, 8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
611 sub r4d, r6d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
612 add r4d, 8 ; x*288+8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
613 mov r6, 8
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
614 sub r6d, r5d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
615 imul r6d, r4d ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
616 imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x)
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
617
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
618 movd m7, r6d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
619 movd m6, r4d
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
620 movq m5, [pw_32]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
621 pshufw m7, m7, 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
622 pshufw m6, m6, 0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
623
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
624 movd m0, [r1 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
625 punpcklbw m0, [r1 +1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
626 add r1, r2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
627 .next2rows
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
628 movd m1, [r1 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
629 movd m3, [r1+r2 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
630 punpcklbw m1, [r1 +1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
631 punpcklbw m3, [r1+r2+1]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
632 lea r1, [r1+r2*2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
633 movq m2, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
634 movq m4, m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
635 pmaddubsw m0, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
636 pmaddubsw m1, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
637 pmaddubsw m2, m7
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
638 pmaddubsw m3, m6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
639 paddw m0, m5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
640 paddw m2, m5
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
641 paddw m1, m0
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
642 paddw m3, m2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
643 movq m0, m4
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
644 psrlw m1, 6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
645 psrlw m3, 6
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
646 packuswb m1, m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
647 packuswb m3, m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
648 CHROMAMC_AVG m1, [r0 ]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
649 CHROMAMC_AVG m3, [r0+r2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
650 movd [r0 ], m1
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
651 movd [r0+r2], m3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
652 sub r3d, 2
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
653 lea r0, [r0+r2*2]
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
654 jg .next2rows
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
655 REP_RET
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
656 %endmacro
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
657
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
658 %define CHROMAMC_AVG NOTHING
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
659 INIT_XMM
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
660 chroma_mc8_ssse3_func put, h264, ssse3_rnd
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
661 chroma_mc8_ssse3_func put, vc1, ssse3_nornd
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
662 INIT_MMX
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
663 chroma_mc4_ssse3_func put, h264, ssse3
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
664
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
665 %define CHROMAMC_AVG DIRECT_AVG
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
666 %define PAVG pavgb
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
667 INIT_XMM
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
668 chroma_mc8_ssse3_func avg, h264, ssse3_rnd
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
669 chroma_mc8_ssse3_func avg, vc1, ssse3_nornd
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
670 INIT_MMX
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff changeset
671 chroma_mc4_ssse3_func avg, h264, ssse3