Mercurial > libavcodec.hg
annotate x86/h264_chromamc.asm @ 12483:0159a19bfff7 libavcodec
aacdec: Rework channel mapping compatibility hacks.
For a PCE based configuration map the channels solely based on tags.
For an indexed configuration map the channels solely based on position.
This works with all known exotic samples including al17, elem_id0, bad_concat,
and lfe_is_sce.
author | alexc |
---|---|
date | Fri, 10 Sep 2010 18:01:48 +0000 |
parents | b242eb86ea9a |
children |
rev | line source |
---|---|
12437
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
1 ;****************************************************************************** |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
2 ;* MMX/SSSE3-optimized functions for H264 chroma MC |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
3 ;* Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>, |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
4 ;* 2005-2008 Loren Merritt |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
5 ;* |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
6 ;* This file is part of FFmpeg. |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
7 ;* |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
8 ;* FFmpeg is free software; you can redistribute it and/or |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
9 ;* modify it under the terms of the GNU Lesser General Public |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
10 ;* License as published by the Free Software Foundation; either |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
11 ;* version 2.1 of the License, or (at your option) any later version. |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
12 ;* |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
13 ;* FFmpeg is distributed in the hope that it will be useful, |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
16 ;* Lesser General Public License for more details. |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
17 ;* |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
18 ;* You should have received a copy of the GNU Lesser General Public |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
19 ;* License along with FFmpeg; if not, write to the Free Software |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
20 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
21 ;****************************************************************************** |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
22 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
23 %include "x86inc.asm" |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
24 %include "x86util.asm" |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
25 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
26 SECTION_RODATA |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
27 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
28 rnd_rv40_2d_tbl: times 4 dw 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
29 times 4 dw 16 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
30 times 4 dw 32 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
31 times 4 dw 16 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
32 times 4 dw 32 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
33 times 4 dw 28 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
34 times 4 dw 32 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
35 times 4 dw 28 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
36 times 4 dw 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
37 times 4 dw 32 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
38 times 4 dw 16 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
39 times 4 dw 32 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
40 times 4 dw 32 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
41 times 4 dw 28 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
42 times 4 dw 32 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
43 times 4 dw 28 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
44 rnd_rv40_1d_tbl: times 4 dw 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
45 times 4 dw 2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
46 times 4 dw 4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
47 times 4 dw 2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
48 times 4 dw 4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
49 times 4 dw 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
50 times 4 dw 4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
51 times 4 dw 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
52 times 4 dw 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
53 times 4 dw 4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
54 times 4 dw 2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
55 times 4 dw 4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
56 times 4 dw 4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
57 times 4 dw 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
58 times 4 dw 4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
59 times 4 dw 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
60 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
61 cextern pw_3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
62 cextern pw_4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
63 cextern pw_8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
64 cextern pw_28 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
65 cextern pw_32 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
66 cextern pw_64 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
67 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
68 SECTION .text |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
69 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
70 %macro mv0_pixels_mc8 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
71 lea r4, [r2*2 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
72 .next4rows |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
73 movq mm0, [r1 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
74 movq mm1, [r1+r2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
75 CHROMAMC_AVG mm0, [r0 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
76 CHROMAMC_AVG mm1, [r0+r2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
77 movq [r0 ], mm0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
78 movq [r0+r2], mm1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
79 add r0, r4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
80 add r1, r4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
81 movq mm0, [r1 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
82 movq mm1, [r1+r2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
83 CHROMAMC_AVG mm0, [r0 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
84 CHROMAMC_AVG mm1, [r0+r2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
85 add r1, r4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
86 movq [r0 ], mm0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
87 movq [r0+r2], mm1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
88 add r0, r4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
89 sub r3d, 4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
90 jne .next4rows |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
91 %endmacro |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
92 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
93 %macro chroma_mc8_mmx_func 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
94 ; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/, |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
95 ; int stride, int h, int mx, int my) |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
96 cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
97 %ifdef ARCH_X86_64 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
98 movsxd r2, r2d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
99 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
100 mov r6d, r5d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
101 or r6d, r4d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
102 jne .at_least_one_non_zero |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
103 ; mx == 0 AND my == 0 - no filter needed |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
104 mv0_pixels_mc8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
105 REP_RET |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
106 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
107 .at_least_one_non_zero |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
108 %ifidn %2, rv40 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
109 %ifdef PIC |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
110 %define rnd_1d_rv40 r11 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
111 %define rnd_2d_rv40 r11 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
112 %else ; no-PIC |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
113 %define rnd_1d_rv40 rnd_rv40_1d_tbl |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
114 %define rnd_2d_rv40 rnd_rv40_2d_tbl |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
115 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
116 %ifdef ARCH_X86_64 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
117 mov r10, r5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
118 and r10, 6 ; &~1 for mx/my=[0,7] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
119 lea r10, [r10*4+r4] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
120 sar r10d, 1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
121 %define rnd_bias r10 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
122 %define dest_reg r0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
123 %else ; x86-32 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
124 mov r0, r5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
125 and r0, 6 ; &~1 for mx/my=[0,7] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
126 lea r0, [r0*4+r4] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
127 sar r0d, 1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
128 %define rnd_bias r0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
129 %define dest_reg r5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
130 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
131 %else ; vc1, h264 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
132 %define rnd_bias 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
133 %define dest_reg r0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
134 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
135 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
136 test r5d, r5d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
137 mov r6, 1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
138 je .my_is_zero |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
139 test r4d, r4d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
140 mov r6, r2 ; dxy = x ? 1 : stride |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
141 jne .both_non_zero |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
142 .my_is_zero |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
143 ; mx == 0 XOR my == 0 - 1 dimensional filter only |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
144 or r4d, r5d ; x + y |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
145 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
146 %ifidn %2, rv40 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
147 %ifdef PIC |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
148 lea r11, [rnd_rv40_1d_tbl] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
149 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
150 %ifndef ARCH_X86_64 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
151 mov r5, r0m |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
152 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
153 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
154 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
155 movd m5, r4d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
156 movq m4, [pw_8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
157 movq m6, [rnd_1d_%2+rnd_bias*8] ; mm6 = rnd >> 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
158 punpcklwd m5, m5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
159 punpckldq m5, m5 ; mm5 = B = x |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
160 pxor m7, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
161 psubw m4, m5 ; mm4 = A = 8-x |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
162 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
163 .next1drow |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
164 movq m0, [r1 ] ; mm0 = src[0..7] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
165 movq m2, [r1+r6] ; mm1 = src[1..8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
166 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
167 movq m1, m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
168 movq m3, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
169 punpcklbw m0, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
170 punpckhbw m1, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
171 punpcklbw m2, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
172 punpckhbw m3, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
173 pmullw m0, m4 ; [mm0,mm1] = A * src[0..7] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
174 pmullw m1, m4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
175 pmullw m2, m5 ; [mm2,mm3] = B * src[1..8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
176 pmullw m3, m5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
177 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
178 paddw m0, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
179 paddw m1, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
180 paddw m0, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
181 paddw m1, m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
182 psrlw m0, 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
183 psrlw m1, 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
184 packuswb m0, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
185 CHROMAMC_AVG m0, [dest_reg] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
186 movq [dest_reg], m0 ; dst[0..7] = (A * src[0..7] + B * src[1..8] + (rnd >> 3)) >> 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
187 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
188 add dest_reg, r2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
189 add r1, r2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
190 dec r3d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
191 jne .next1drow |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
192 REP_RET |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
193 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
194 .both_non_zero ; general case, bilinear |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
195 movd m4, r4d ; x |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
196 movd m6, r5d ; y |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
197 %ifidn %2, rv40 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
198 %ifdef PIC |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
199 lea r11, [rnd_rv40_2d_tbl] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
200 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
201 %ifndef ARCH_X86_64 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
202 mov r5, r0m |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
203 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
204 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
205 mov r6, rsp ; backup stack pointer |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
206 and rsp, ~(mmsize-1) ; align stack |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
207 sub rsp, 16 ; AA and DD |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
208 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
209 punpcklwd m4, m4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
210 punpcklwd m6, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
211 punpckldq m4, m4 ; mm4 = x words |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
212 punpckldq m6, m6 ; mm6 = y words |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
213 movq m5, m4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
214 pmullw m4, m6 ; mm4 = x * y |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
215 psllw m5, 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
216 psllw m6, 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
217 movq m7, m5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
218 paddw m7, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
219 movq [rsp+8], m4 ; DD = x * y |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
220 psubw m5, m4 ; mm5 = B = 8x - xy |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
221 psubw m6, m4 ; mm6 = C = 8y - xy |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
222 paddw m4, [pw_64] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
223 psubw m4, m7 ; mm4 = A = xy - (8x+8y) + 64 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
224 pxor m7, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
225 movq [rsp ], m4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
226 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
227 movq m0, [r1 ] ; mm0 = src[0..7] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
228 movq m1, [r1+1] ; mm1 = src[1..8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
229 .next2drow |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
230 add r1, r2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
231 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
232 movq m2, m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
233 movq m3, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
234 punpckhbw m0, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
235 punpcklbw m1, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
236 punpcklbw m2, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
237 punpckhbw m3, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
238 pmullw m0, [rsp] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
239 pmullw m2, [rsp] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
240 pmullw m1, m5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
241 pmullw m3, m5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
242 paddw m2, m1 ; mm2 = A * src[0..3] + B * src[1..4] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
243 paddw m3, m0 ; mm3 = A * src[4..7] + B * src[5..8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
244 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
245 movq m0, [r1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
246 movq m1, m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
247 punpcklbw m0, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
248 punpckhbw m1, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
249 pmullw m0, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
250 pmullw m1, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
251 paddw m2, m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
252 paddw m3, m1 ; [mm2,mm3] += C * src[0..7] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
253 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
254 movq m1, [r1+1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
255 movq m0, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
256 movq m4, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
257 punpcklbw m0, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
258 punpckhbw m4, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
259 pmullw m0, [rsp+8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
260 pmullw m4, [rsp+8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
261 paddw m2, m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
262 paddw m3, m4 ; [mm2,mm3] += D * src[1..8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
263 movq m0, [r1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
264 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
265 paddw m2, [rnd_2d_%2+rnd_bias*8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
266 paddw m3, [rnd_2d_%2+rnd_bias*8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
267 psrlw m2, 6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
268 psrlw m3, 6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
269 packuswb m2, m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
270 CHROMAMC_AVG m2, [dest_reg] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
271 movq [dest_reg], m2 ; dst[0..7] = ([mm2,mm3] + rnd) >> 6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
272 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
273 add dest_reg, r2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
274 dec r3d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
275 jne .next2drow |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
276 mov rsp, r6 ; restore stack pointer |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
277 RET |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
278 %endmacro |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
279 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
280 %macro chroma_mc4_mmx_func 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
281 cglobal %1_%2_chroma_mc4_%3, 6, 6, 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
282 %ifdef ARCH_X86_64 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
283 movsxd r2, r2d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
284 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
285 pxor m7, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
286 movd m2, r4d ; x |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
287 movd m3, r5d ; y |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
288 movq m4, [pw_8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
289 movq m5, [pw_8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
290 punpcklwd m2, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
291 punpcklwd m3, m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
292 punpcklwd m2, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
293 punpcklwd m3, m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
294 psubw m4, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
295 psubw m5, m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
296 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
297 %ifidn %2, rv40 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
298 %ifdef PIC |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
299 lea r11, [rnd_rv40_2d_tbl] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
300 %define rnd_2d_rv40 r11 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
301 %else |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
302 %define rnd_2d_rv40 rnd_rv40_2d_tbl |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
303 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
304 and r5, 6 ; &~1 for mx/my=[0,7] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
305 lea r5, [r5*4+r4] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
306 sar r5d, 1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
307 %define rnd_bias r5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
308 %else ; vc1, h264 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
309 %define rnd_bias 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
310 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
311 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
312 movd m0, [r1 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
313 movd m6, [r1+1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
314 add r1, r2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
315 punpcklbw m0, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
316 punpcklbw m6, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
317 pmullw m0, m4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
318 pmullw m6, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
319 paddw m6, m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
320 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
321 .next2rows |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
322 movd m0, [r1 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
323 movd m1, [r1+1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
324 add r1, r2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
325 punpcklbw m0, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
326 punpcklbw m1, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
327 pmullw m0, m4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
328 pmullw m1, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
329 paddw m1, m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
330 movq m0, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
331 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
332 pmullw m6, m5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
333 pmullw m1, m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
334 paddw m6, [rnd_2d_%2+rnd_bias*8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
335 paddw m1, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
336 psrlw m1, 6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
337 packuswb m1, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
338 CHROMAMC_AVG4 m1, m6, [r0] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
339 movd [r0], m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
340 add r0, r2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
341 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
342 movd m6, [r1 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
343 movd m1, [r1+1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
344 add r1, r2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
345 punpcklbw m6, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
346 punpcklbw m1, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
347 pmullw m6, m4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
348 pmullw m1, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
349 paddw m1, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
350 movq m6, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
351 pmullw m0, m5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
352 pmullw m1, m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
353 paddw m0, [rnd_2d_%2+rnd_bias*8] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
354 paddw m1, m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
355 psrlw m1, 6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
356 packuswb m1, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
357 CHROMAMC_AVG4 m1, m0, [r0] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
358 movd [r0], m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
359 add r0, r2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
360 sub r3d, 2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
361 jnz .next2rows |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
362 REP_RET |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
363 %endmacro |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
364 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
365 %macro chroma_mc2_mmx_func 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
366 cglobal %1_%2_chroma_mc2_%3, 6, 7, 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
367 %ifdef ARCH_X86_64 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
368 movsxd r2, r2d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
369 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
370 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
371 mov r6d, r4d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
372 shl r4d, 16 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
373 sub r4d, r6d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
374 add r4d, 8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
375 imul r5d, r4d ; x*y<<16 | y*(8-x) |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
376 shl r4d, 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
377 sub r4d, r5d ; x*(8-y)<<16 | (8-x)*(8-y) |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
378 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
379 movd m5, r4d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
380 movd m6, r5d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
381 punpckldq m5, m5 ; mm5 = {A,B,A,B} |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
382 punpckldq m6, m6 ; mm6 = {C,D,C,D} |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
383 pxor m7, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
384 movd m2, [r1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
385 punpcklbw m2, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
386 pshufw m2, m2, 0x94 ; mm0 = src[0,1,1,2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
387 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
388 .nextrow |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
389 add r1, r2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
390 movq m1, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
391 pmaddwd m1, m5 ; mm1 = A * src[0,1] + B * src[1,2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
392 movd m0, [r1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
393 punpcklbw m0, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
394 pshufw m0, m0, 0x94 ; mm0 = src[0,1,1,2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
395 movq m2, m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
396 pmaddwd m0, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
397 paddw m1, [rnd_2d_%2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
398 paddw m1, m0 ; mm1 += C * src[0,1] + D * src[1,2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
399 psrlw m1, 6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
400 packssdw m1, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
401 packuswb m1, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
402 CHROMAMC_AVG4 m1, m3, [r0] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
403 movd r5d, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
404 mov [r0], r5w |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
405 add r0, r2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
406 sub r3d, 1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
407 jnz .nextrow |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
408 REP_RET |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
409 %endmacro |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
410 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
411 %define rnd_1d_h264 pw_4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
412 %define rnd_2d_h264 pw_32 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
413 %define rnd_1d_vc1 pw_3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
414 %define rnd_2d_vc1 pw_28 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
415 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
416 %macro NOTHING 2-3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
417 %endmacro |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
418 %macro DIRECT_AVG 2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
419 PAVG %1, %2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
420 %endmacro |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
421 %macro COPY_AVG 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
422 movd %2, %3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
423 PAVG %1, %2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
424 %endmacro |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
425 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
426 INIT_MMX |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
427 %define CHROMAMC_AVG NOTHING |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
428 %define CHROMAMC_AVG4 NOTHING |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
429 chroma_mc8_mmx_func put, h264, mmx_rnd |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
430 chroma_mc8_mmx_func put, vc1, mmx_nornd |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
431 chroma_mc8_mmx_func put, rv40, mmx |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
432 chroma_mc4_mmx_func put, h264, mmx |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
433 chroma_mc4_mmx_func put, rv40, mmx |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
434 chroma_mc2_mmx_func put, h264, mmx2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
435 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
436 %define CHROMAMC_AVG DIRECT_AVG |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
437 %define CHROMAMC_AVG4 COPY_AVG |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
438 %define PAVG pavgb |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
439 chroma_mc8_mmx_func avg, h264, mmx2_rnd |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
440 chroma_mc8_mmx_func avg, vc1, mmx2_nornd |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
441 chroma_mc8_mmx_func avg, rv40, mmx2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
442 chroma_mc4_mmx_func avg, h264, mmx2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
443 chroma_mc4_mmx_func avg, rv40, mmx2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
444 chroma_mc2_mmx_func avg, h264, mmx2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
445 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
446 %define PAVG pavgusb |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
447 chroma_mc8_mmx_func avg, h264, 3dnow_rnd |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
448 chroma_mc8_mmx_func avg, vc1, 3dnow_nornd |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
449 chroma_mc8_mmx_func avg, rv40, 3dnow |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
450 chroma_mc4_mmx_func avg, h264, 3dnow |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
451 chroma_mc4_mmx_func avg, rv40, 3dnow |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
452 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
453 %macro chroma_mc8_ssse3_func 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
454 cglobal %1_%2_chroma_mc8_%3, 6, 7, 8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
455 %ifdef ARCH_X86_64 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
456 movsxd r2, r2d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
457 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
458 mov r6d, r5d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
459 or r6d, r4d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
460 jne .at_least_one_non_zero |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
461 ; mx == 0 AND my == 0 - no filter needed |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
462 mv0_pixels_mc8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
463 REP_RET |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
464 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
465 .at_least_one_non_zero |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
466 test r5d, r5d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
467 je .my_is_zero |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
468 test r4d, r4d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
469 je .mx_is_zero |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
470 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
471 ; general case, bilinear |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
472 mov r6d, r4d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
473 shl r4d, 8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
474 sub r4, r6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
475 add r4, 8 ; x*288+8 = x<<8 | (8-x) |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
476 mov r6, 8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
477 sub r6d, r5d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
478 imul r6, r4 ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x) |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
479 imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x) |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
480 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
481 movd m7, r6d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
482 movd m6, r4d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
483 movdqa m5, [rnd_2d_%2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
484 pshuflw m7, m7, 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
485 pshuflw m6, m6, 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
486 movlhps m7, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
487 movlhps m6, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
488 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
489 movq m0, [r1 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
490 movq m1, [r1 +1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
491 punpcklbw m0, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
492 add r1, r2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
493 .next2rows |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
494 movq m1, [r1 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
495 movq m2, [r1 +1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
496 movq m3, [r1+r2 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
497 movq m4, [r1+r2+1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
498 lea r1, [r1+r2*2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
499 punpcklbw m1, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
500 punpcklbw m3, m4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
501 movdqa m2, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
502 movdqa m4, m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
503 pmaddubsw m0, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
504 pmaddubsw m1, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
505 pmaddubsw m2, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
506 pmaddubsw m3, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
507 paddw m0, m5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
508 paddw m2, m5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
509 paddw m1, m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
510 paddw m3, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
511 movdqa m0, m4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
512 psrlw m1, 6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
513 psrlw m3, 6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
514 %ifidn %1, avg |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
515 movq m2, [r0 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
516 movhps m2, [r0+r2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
517 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
518 packuswb m1, m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
519 CHROMAMC_AVG m1, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
520 movq [r0 ], m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
521 movhps [r0+r2], m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
522 sub r3d, 2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
523 lea r0, [r0+r2*2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
524 jg .next2rows |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
525 REP_RET |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
526 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
527 .my_is_zero |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
528 mov r5d, r4d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
529 shl r4d, 8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
530 add r4, 8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
531 sub r4, r5 ; 255*x+8 = x<<8 | (8-x) |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
532 movd m7, r4d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
533 movq m6, [rnd_1d_%2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
534 pshuflw m7, m7, 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
535 movlhps m6, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
536 movlhps m7, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
537 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
538 .next2xrows |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
539 movq m0, [r1 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
540 movq m1, [r1 +1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
541 movq m2, [r1+r2 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
542 movq m3, [r1+r2+1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
543 punpcklbw m0, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
544 punpcklbw m2, m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
545 pmaddubsw m0, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
546 pmaddubsw m2, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
547 %ifidn %1, avg |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
548 movq m4, [r0 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
549 movhps m4, [r0+r2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
550 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
551 paddw m0, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
552 paddw m2, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
553 psrlw m0, 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
554 psrlw m2, 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
555 packuswb m0, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
556 CHROMAMC_AVG m0, m4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
557 movq [r0 ], m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
558 movhps [r0+r2], m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
559 sub r3d, 2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
560 lea r0, [r0+r2*2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
561 lea r1, [r1+r2*2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
562 jg .next2xrows |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
563 REP_RET |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
564 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
565 .mx_is_zero |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
566 mov r4d, r5d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
567 shl r5d, 8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
568 add r5, 8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
569 sub r5, r4 ; 255*y+8 = y<<8 | (8-y) |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
570 movd m7, r5d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
571 movq m6, [rnd_1d_%2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
572 pshuflw m7, m7, 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
573 movlhps m6, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
574 movlhps m7, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
575 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
576 .next2yrows |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
577 movq m0, [r1 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
578 movq m1, [r1+r2 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
579 movdqa m2, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
580 movq m3, [r1+r2*2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
581 punpcklbw m0, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
582 punpcklbw m2, m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
583 pmaddubsw m0, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
584 pmaddubsw m2, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
585 %ifidn %1, avg |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
586 movq m4, [r0 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
587 movhps m4, [r0+r2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
588 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
589 paddw m0, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
590 paddw m2, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
591 psrlw m0, 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
592 psrlw m2, 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
593 packuswb m0, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
594 CHROMAMC_AVG m0, m4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
595 movq [r0 ], m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
596 movhps [r0+r2], m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
597 sub r3d, 2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
598 lea r0, [r0+r2*2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
599 lea r1, [r1+r2*2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
600 jg .next2yrows |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
601 REP_RET |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
602 %endmacro |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
603 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
604 %macro chroma_mc4_ssse3_func 3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
605 cglobal %1_%2_chroma_mc4_%3, 6, 7, 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
606 %ifdef ARCH_X86_64 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
607 movsxd r2, r2d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
608 %endif |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
609 mov r6, r4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
610 shl r4d, 8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
611 sub r4d, r6d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
612 add r4d, 8 ; x*288+8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
613 mov r6, 8 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
614 sub r6d, r5d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
615 imul r6d, r4d ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x) |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
616 imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x) |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
617 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
618 movd m7, r6d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
619 movd m6, r4d |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
620 movq m5, [pw_32] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
621 pshufw m7, m7, 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
622 pshufw m6, m6, 0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
623 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
624 movd m0, [r1 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
625 punpcklbw m0, [r1 +1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
626 add r1, r2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
627 .next2rows |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
628 movd m1, [r1 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
629 movd m3, [r1+r2 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
630 punpcklbw m1, [r1 +1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
631 punpcklbw m3, [r1+r2+1] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
632 lea r1, [r1+r2*2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
633 movq m2, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
634 movq m4, m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
635 pmaddubsw m0, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
636 pmaddubsw m1, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
637 pmaddubsw m2, m7 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
638 pmaddubsw m3, m6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
639 paddw m0, m5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
640 paddw m2, m5 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
641 paddw m1, m0 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
642 paddw m3, m2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
643 movq m0, m4 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
644 psrlw m1, 6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
645 psrlw m3, 6 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
646 packuswb m1, m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
647 packuswb m3, m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
648 CHROMAMC_AVG m1, [r0 ] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
649 CHROMAMC_AVG m3, [r0+r2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
650 movd [r0 ], m1 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
651 movd [r0+r2], m3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
652 sub r3d, 2 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
653 lea r0, [r0+r2*2] |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
654 jg .next2rows |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
655 REP_RET |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
656 %endmacro |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
657 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
658 %define CHROMAMC_AVG NOTHING |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
659 INIT_XMM |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
660 chroma_mc8_ssse3_func put, h264, ssse3_rnd |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
661 chroma_mc8_ssse3_func put, vc1, ssse3_nornd |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
662 INIT_MMX |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
663 chroma_mc4_ssse3_func put, h264, ssse3 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
664 |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
665 %define CHROMAMC_AVG DIRECT_AVG |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
666 %define PAVG pavgb |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
667 INIT_XMM |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
668 chroma_mc8_ssse3_func avg, h264, ssse3_rnd |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
669 chroma_mc8_ssse3_func avg, vc1, ssse3_nornd |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
670 INIT_MMX |
b242eb86ea9a
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents:
diff
changeset
|
671 chroma_mc4_ssse3_func avg, h264, ssse3 |