annotate x86/vp56dsp.asm @ 12418:e17840120b80 libavcodec

Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should help in fixing the Win64 fate failures.
author rbultje
date Wed, 25 Aug 2010 13:44:16 +0000
parents
children 37bb4de77908
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12418
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
1 ;******************************************************************************
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
2 ;* MMX/SSE2-optimized functions for the VP6 decoder
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
3 ;* Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
4 ;* Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
5 ;*
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
6 ;* This file is part of FFmpeg.
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
7 ;*
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
8 ;* FFmpeg is free software; you can redistribute it and/or
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
9 ;* modify it under the terms of the GNU Lesser General Public
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
10 ;* License as published by the Free Software Foundation; either
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
11 ;* version 2.1 of the License, or (at your option) any later version.
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
12 ;*
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
13 ;* FFmpeg is distributed in the hope that it will be useful,
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
16 ;* Lesser General Public License for more details.
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
17 ;*
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
18 ;* You should have received a copy of the GNU Lesser General Public
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
19 ;* License along with FFmpeg; if not, write to the Free Software
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
20 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
21 ;******************************************************************************
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
22
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
23 %include "x86inc.asm"
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
24 %include "x86util.asm"
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
25
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
26 cextern pw_64
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
27
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
28 SECTION .text
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
29
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
30 %macro DIAG4_MMX 6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
31 movq m0, [%1+%2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
32 movq m1, [%1+%3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
33 movq m3, m0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
34 movq m4, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
35 punpcklbw m0, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
36 punpcklbw m1, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
37 punpckhbw m3, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
38 punpckhbw m4, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
39 pmullw m0, [rsp+8*11] ; src[x-8 ] * biweight [0]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
40 pmullw m1, [rsp+8*12] ; src[x ] * biweight [1]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
41 pmullw m3, [rsp+8*11] ; src[x-8 ] * biweight [0]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
42 pmullw m4, [rsp+8*12] ; src[x ] * biweight [1]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
43 paddw m0, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
44 paddw m3, m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
45 movq m1, [%1+%4]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
46 movq m2, [%1+%5]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
47 movq m4, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
48 movq m5, m2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
49 punpcklbw m1, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
50 punpcklbw m2, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
51 punpcklbw m4, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
52 punpcklbw m5, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
53 pmullw m1, [rsp+8*13] ; src[x+8 ] * biweight [2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
54 pmullw m2, [rsp+8*14] ; src[x+16] * biweight [3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
55 pmullw m4, [rsp+8*13] ; src[x+8 ] * biweight [2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
56 pmullw m5, [rsp+8*14] ; src[x+16] * biweight [3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
57 paddw m1, m2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
58 paddw m4, m5
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
59 paddsw m0, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
60 paddsw m3, m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
61 paddsw m0, m6 ; Add 64
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
62 paddsw m3, m6 ; Add 64
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
63 psraw m0, 7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
64 psraw m3, 7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
65 packuswb m0, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
66 movq [%6], m0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
67 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
68
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
69 %macro DIAG4_SSE2 6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
70 movq m0, [%1+%2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
71 movq m1, [%1+%3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
72 punpcklbw m0, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
73 punpcklbw m1, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
74 pmullw m0, m4 ; src[x-8 ] * biweight [0]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
75 pmullw m1, m5 ; src[x ] * biweight [1]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
76 paddw m0, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
77 movq m1, [%1+%4]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
78 movq m2, [%1+%5]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
79 punpcklbw m1, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
80 punpcklbw m2, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
81 pmullw m1, m6 ; src[x+8 ] * biweight [2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
82 pmullw m2, m3 ; src[x+16] * biweight [3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
83 paddw m1, m2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
84 paddsw m0, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
85 paddsw m0, [pw_64] ; Add 64
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
86 psraw m0, 7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
87 packuswb m0, m0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
88 movq [%6], m0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
89 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
90
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
91 %macro SPLAT4REGS_MMX 0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
92 movq m5, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
93 punpcklwd m3, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
94 movq m4, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
95 punpckldq m3, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
96 punpckhdq m4, m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
97 punpckhwd m5, m5
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
98 movq m6, m5
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
99 punpckhdq m6, m6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
100 punpckldq m5, m5
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
101 movq [rsp+8*11], m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
102 movq [rsp+8*12], m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
103 movq [rsp+8*13], m5
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
104 movq [rsp+8*14], m6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
105 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
106
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
107 %macro SPLAT4REGS_SSE2 0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
108 pshuflw m4, m3, 0x0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
109 pshuflw m5, m3, 0x55
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
110 pshuflw m6, m3, 0xAA
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
111 pshuflw m3, m3, 0xFF
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
112 punpcklqdq m4, m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
113 punpcklqdq m5, m5
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
114 punpcklqdq m6, m6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
115 punpcklqdq m3, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
116 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
117
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
118 %macro vp6_filter_diag4 2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
119 ; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride,
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
120 ; const int16_t h_weight[4], const int16_t v_weights[4])
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
121 cglobal vp6_filter_diag4_%1, 5, 7, %2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
122 mov r5, rsp ; backup stack pointer
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
123 and rsp, ~(mmsize-1) ; align stack
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
124 %ifidn %1, sse2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
125 sub rsp, 8*11
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
126 %else
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
127 sub rsp, 8*15
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
128 movq m6, [pw_64]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
129 %endif
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
130
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
131 sub r1, r2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
132
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
133 pxor m7, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
134 movq m3, [r3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
135 SPLAT4REGS
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
136
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
137 mov r3, rsp
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
138 mov r6, 11
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
139 .nextrow
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
140 DIAG4 r1, -1, 0, 1, 2, r3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
141 add r3, 8
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
142 add r1, r2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
143 dec r6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
144 jnz .nextrow
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
145
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
146 movq m3, [r4]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
147 SPLAT4REGS
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
148
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
149 lea r3, [rsp+8]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
150 mov r6, 8
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
151 .nextcol
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
152 DIAG4 r3, -8, 0, 8, 16, r0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
153 add r3, 8
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
154 add r0, r2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
155 dec r6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
156 jnz .nextcol
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
157
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
158 mov rsp, r5 ; restore stack pointer
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
159 RET
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
160 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
161
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
162 INIT_MMX
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
163 %define DIAG4 DIAG4_MMX
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
164 %define SPLAT4REGS SPLAT4REGS_MMX
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
165 vp6_filter_diag4 mmx, 0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
166
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
167 INIT_XMM
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
168 %define DIAG4 DIAG4_SSE2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
169 %define SPLAT4REGS SPLAT4REGS_SSE2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
170 vp6_filter_diag4 sse2, 8