annotate x86/vp56dsp.asm @ 12495:fac8063ed1e7 libavcodec

Allow float values for libmp3lame quality. Patch by James Darnley, james D darnley A gmail
author cehoyos
date Wed, 15 Sep 2010 22:10:13 +0000
parents 417532548504
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12418
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
1 ;******************************************************************************
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
2 ;* MMX/SSE2-optimized functions for the VP6 decoder
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
3 ;* Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
4 ;* Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
5 ;*
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
6 ;* This file is part of FFmpeg.
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
7 ;*
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
8 ;* FFmpeg is free software; you can redistribute it and/or
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
9 ;* modify it under the terms of the GNU Lesser General Public
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
10 ;* License as published by the Free Software Foundation; either
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
11 ;* version 2.1 of the License, or (at your option) any later version.
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
12 ;*
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
13 ;* FFmpeg is distributed in the hope that it will be useful,
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
16 ;* Lesser General Public License for more details.
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
17 ;*
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
18 ;* You should have received a copy of the GNU Lesser General Public
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
19 ;* License along with FFmpeg; if not, write to the Free Software
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
20 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
21 ;******************************************************************************
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
22
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
23 %include "x86inc.asm"
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
24 %include "x86util.asm"
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
25
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
26 cextern pw_64
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
27
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
28 SECTION .text
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
29
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
30 %macro DIAG4_MMX 6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
31 movq m0, [%1+%2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
32 movq m1, [%1+%3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
33 movq m3, m0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
34 movq m4, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
35 punpcklbw m0, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
36 punpcklbw m1, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
37 punpckhbw m3, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
38 punpckhbw m4, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
39 pmullw m0, [rsp+8*11] ; src[x-8 ] * biweight [0]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
40 pmullw m1, [rsp+8*12] ; src[x ] * biweight [1]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
41 pmullw m3, [rsp+8*11] ; src[x-8 ] * biweight [0]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
42 pmullw m4, [rsp+8*12] ; src[x ] * biweight [1]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
43 paddw m0, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
44 paddw m3, m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
45 movq m1, [%1+%4]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
46 movq m2, [%1+%5]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
47 movq m4, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
48 movq m5, m2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
49 punpcklbw m1, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
50 punpcklbw m2, m7
12430
417532548504 Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents: 12420
diff changeset
51 punpckhbw m4, m7
417532548504 Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents: 12420
diff changeset
52 punpckhbw m5, m7
12418
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
53 pmullw m1, [rsp+8*13] ; src[x+8 ] * biweight [2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
54 pmullw m2, [rsp+8*14] ; src[x+16] * biweight [3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
55 pmullw m4, [rsp+8*13] ; src[x+8 ] * biweight [2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
56 pmullw m5, [rsp+8*14] ; src[x+16] * biweight [3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
57 paddw m1, m2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
58 paddw m4, m5
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
59 paddsw m0, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
60 paddsw m3, m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
61 paddsw m0, m6 ; Add 64
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
62 paddsw m3, m6 ; Add 64
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
63 psraw m0, 7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
64 psraw m3, 7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
65 packuswb m0, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
66 movq [%6], m0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
67 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
68
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
69 %macro DIAG4_SSE2 6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
70 movq m0, [%1+%2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
71 movq m1, [%1+%3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
72 punpcklbw m0, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
73 punpcklbw m1, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
74 pmullw m0, m4 ; src[x-8 ] * biweight [0]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
75 pmullw m1, m5 ; src[x ] * biweight [1]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
76 paddw m0, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
77 movq m1, [%1+%4]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
78 movq m2, [%1+%5]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
79 punpcklbw m1, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
80 punpcklbw m2, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
81 pmullw m1, m6 ; src[x+8 ] * biweight [2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
82 pmullw m2, m3 ; src[x+16] * biweight [3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
83 paddw m1, m2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
84 paddsw m0, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
85 paddsw m0, [pw_64] ; Add 64
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
86 psraw m0, 7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
87 packuswb m0, m0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
88 movq [%6], m0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
89 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
90
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
91 %macro SPLAT4REGS_MMX 0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
92 movq m5, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
93 punpcklwd m3, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
94 movq m4, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
95 punpckldq m3, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
96 punpckhdq m4, m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
97 punpckhwd m5, m5
12430
417532548504 Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents: 12420
diff changeset
98 movq m2, m5
417532548504 Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents: 12420
diff changeset
99 punpckhdq m2, m2
12418
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
100 punpckldq m5, m5
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
101 movq [rsp+8*11], m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
102 movq [rsp+8*12], m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
103 movq [rsp+8*13], m5
12430
417532548504 Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents: 12420
diff changeset
104 movq [rsp+8*14], m2
12418
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
105 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
106
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
107 %macro SPLAT4REGS_SSE2 0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
108 pshuflw m4, m3, 0x0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
109 pshuflw m5, m3, 0x55
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
110 pshuflw m6, m3, 0xAA
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
111 pshuflw m3, m3, 0xFF
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
112 punpcklqdq m4, m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
113 punpcklqdq m5, m5
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
114 punpcklqdq m6, m6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
115 punpcklqdq m3, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
116 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
117
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
118 %macro vp6_filter_diag4 2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
119 ; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride,
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
120 ; const int16_t h_weight[4], const int16_t v_weights[4])
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
121 cglobal vp6_filter_diag4_%1, 5, 7, %2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
122 mov r5, rsp ; backup stack pointer
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
123 and rsp, ~(mmsize-1) ; align stack
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
124 %ifidn %1, sse2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
125 sub rsp, 8*11
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
126 %else
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
127 sub rsp, 8*15
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
128 movq m6, [pw_64]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
129 %endif
12420
37bb4de77908 VP6: fix vp6_filter_diag4_mmx/sse on 64-bit
mru
parents: 12418
diff changeset
130 %ifdef ARCH_X86_64
37bb4de77908 VP6: fix vp6_filter_diag4_mmx/sse on 64-bit
mru
parents: 12418
diff changeset
131 movsxd r2, r2d
37bb4de77908 VP6: fix vp6_filter_diag4_mmx/sse on 64-bit
mru
parents: 12418
diff changeset
132 %endif
12418
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
133
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
134 sub r1, r2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
135
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
136 pxor m7, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
137 movq m3, [r3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
138 SPLAT4REGS
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
139
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
140 mov r3, rsp
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
141 mov r6, 11
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
142 .nextrow
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
143 DIAG4 r1, -1, 0, 1, 2, r3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
144 add r3, 8
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
145 add r1, r2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
146 dec r6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
147 jnz .nextrow
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
148
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
149 movq m3, [r4]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
150 SPLAT4REGS
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
151
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
152 lea r3, [rsp+8]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
153 mov r6, 8
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
154 .nextcol
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
155 DIAG4 r3, -8, 0, 8, 16, r0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
156 add r3, 8
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
157 add r0, r2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
158 dec r6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
159 jnz .nextcol
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
160
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
161 mov rsp, r5 ; restore stack pointer
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
162 RET
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
163 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
164
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
165 INIT_MMX
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
166 %define DIAG4 DIAG4_MMX
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
167 %define SPLAT4REGS SPLAT4REGS_MMX
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
168 vp6_filter_diag4 mmx, 0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
169
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
170 INIT_XMM
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
171 %define DIAG4 DIAG4_SSE2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
172 %define SPLAT4REGS SPLAT4REGS_SSE2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
173 vp6_filter_diag4 sse2, 8