annotate x86/vp56dsp.asm @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 417532548504
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12418
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
1 ;******************************************************************************
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
2 ;* MMX/SSE2-optimized functions for the VP6 decoder
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
3 ;* Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
4 ;* Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
5 ;*
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
6 ;* This file is part of FFmpeg.
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
7 ;*
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
8 ;* FFmpeg is free software; you can redistribute it and/or
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
9 ;* modify it under the terms of the GNU Lesser General Public
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
10 ;* License as published by the Free Software Foundation; either
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
11 ;* version 2.1 of the License, or (at your option) any later version.
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
12 ;*
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
13 ;* FFmpeg is distributed in the hope that it will be useful,
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
16 ;* Lesser General Public License for more details.
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
17 ;*
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
18 ;* You should have received a copy of the GNU Lesser General Public
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
19 ;* License along with FFmpeg; if not, write to the Free Software
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
20 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
21 ;******************************************************************************
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
22
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
23 %include "x86inc.asm"
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
24 %include "x86util.asm"
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
25
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
26 cextern pw_64
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
27
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
28 SECTION .text
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
29
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
30 %macro DIAG4_MMX 6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
31 movq m0, [%1+%2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
32 movq m1, [%1+%3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
33 movq m3, m0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
34 movq m4, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
35 punpcklbw m0, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
36 punpcklbw m1, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
37 punpckhbw m3, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
38 punpckhbw m4, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
39 pmullw m0, [rsp+8*11] ; src[x-8 ] * biweight [0]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
40 pmullw m1, [rsp+8*12] ; src[x ] * biweight [1]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
41 pmullw m3, [rsp+8*11] ; src[x-8 ] * biweight [0]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
42 pmullw m4, [rsp+8*12] ; src[x ] * biweight [1]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
43 paddw m0, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
44 paddw m3, m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
45 movq m1, [%1+%4]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
46 movq m2, [%1+%5]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
47 movq m4, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
48 movq m5, m2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
49 punpcklbw m1, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
50 punpcklbw m2, m7
12430
417532548504 Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents: 12420
diff changeset
51 punpckhbw m4, m7
417532548504 Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents: 12420
diff changeset
52 punpckhbw m5, m7
12418
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
53 pmullw m1, [rsp+8*13] ; src[x+8 ] * biweight [2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
54 pmullw m2, [rsp+8*14] ; src[x+16] * biweight [3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
55 pmullw m4, [rsp+8*13] ; src[x+8 ] * biweight [2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
56 pmullw m5, [rsp+8*14] ; src[x+16] * biweight [3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
57 paddw m1, m2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
58 paddw m4, m5
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
59 paddsw m0, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
60 paddsw m3, m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
61 paddsw m0, m6 ; Add 64
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
62 paddsw m3, m6 ; Add 64
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
63 psraw m0, 7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
64 psraw m3, 7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
65 packuswb m0, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
66 movq [%6], m0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
67 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
68
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
69 %macro DIAG4_SSE2 6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
70 movq m0, [%1+%2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
71 movq m1, [%1+%3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
72 punpcklbw m0, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
73 punpcklbw m1, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
74 pmullw m0, m4 ; src[x-8 ] * biweight [0]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
75 pmullw m1, m5 ; src[x ] * biweight [1]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
76 paddw m0, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
77 movq m1, [%1+%4]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
78 movq m2, [%1+%5]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
79 punpcklbw m1, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
80 punpcklbw m2, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
81 pmullw m1, m6 ; src[x+8 ] * biweight [2]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
82 pmullw m2, m3 ; src[x+16] * biweight [3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
83 paddw m1, m2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
84 paddsw m0, m1
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
85 paddsw m0, [pw_64] ; Add 64
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
86 psraw m0, 7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
87 packuswb m0, m0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
88 movq [%6], m0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
89 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
90
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
91 %macro SPLAT4REGS_MMX 0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
92 movq m5, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
93 punpcklwd m3, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
94 movq m4, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
95 punpckldq m3, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
96 punpckhdq m4, m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
97 punpckhwd m5, m5
12430
417532548504 Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents: 12420
diff changeset
98 movq m2, m5
417532548504 Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents: 12420
diff changeset
99 punpckhdq m2, m2
12418
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
100 punpckldq m5, m5
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
101 movq [rsp+8*11], m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
102 movq [rsp+8*12], m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
103 movq [rsp+8*13], m5
12430
417532548504 Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents: 12420
diff changeset
104 movq [rsp+8*14], m2
12418
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
105 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
106
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
107 %macro SPLAT4REGS_SSE2 0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
108 pshuflw m4, m3, 0x0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
109 pshuflw m5, m3, 0x55
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
110 pshuflw m6, m3, 0xAA
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
111 pshuflw m3, m3, 0xFF
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
112 punpcklqdq m4, m4
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
113 punpcklqdq m5, m5
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
114 punpcklqdq m6, m6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
115 punpcklqdq m3, m3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
116 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
117
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
118 %macro vp6_filter_diag4 2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
119 ; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride,
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
120 ; const int16_t h_weight[4], const int16_t v_weights[4])
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
121 cglobal vp6_filter_diag4_%1, 5, 7, %2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
122 mov r5, rsp ; backup stack pointer
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
123 and rsp, ~(mmsize-1) ; align stack
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
124 %ifidn %1, sse2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
125 sub rsp, 8*11
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
126 %else
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
127 sub rsp, 8*15
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
128 movq m6, [pw_64]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
129 %endif
12420
37bb4de77908 VP6: fix vp6_filter_diag4_mmx/sse on 64-bit
mru
parents: 12418
diff changeset
130 %ifdef ARCH_X86_64
37bb4de77908 VP6: fix vp6_filter_diag4_mmx/sse on 64-bit
mru
parents: 12418
diff changeset
131 movsxd r2, r2d
37bb4de77908 VP6: fix vp6_filter_diag4_mmx/sse on 64-bit
mru
parents: 12418
diff changeset
132 %endif
12418
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
133
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
134 sub r1, r2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
135
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
136 pxor m7, m7
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
137 movq m3, [r3]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
138 SPLAT4REGS
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
139
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
140 mov r3, rsp
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
141 mov r6, 11
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
142 .nextrow
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
143 DIAG4 r1, -1, 0, 1, 2, r3
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
144 add r3, 8
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
145 add r1, r2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
146 dec r6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
147 jnz .nextrow
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
148
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
149 movq m3, [r4]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
150 SPLAT4REGS
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
151
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
152 lea r3, [rsp+8]
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
153 mov r6, 8
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
154 .nextcol
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
155 DIAG4 r3, -8, 0, 8, 16, r0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
156 add r3, 8
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
157 add r0, r2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
158 dec r6
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
159 jnz .nextcol
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
160
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
161 mov rsp, r5 ; restore stack pointer
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
162 RET
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
163 %endmacro
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
164
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
165 INIT_MMX
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
166 %define DIAG4 DIAG4_MMX
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
167 %define SPLAT4REGS SPLAT4REGS_MMX
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
168 vp6_filter_diag4 mmx, 0
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
169
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
170 INIT_XMM
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
171 %define DIAG4 DIAG4_SSE2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
172 %define SPLAT4REGS SPLAT4REGS_SSE2
e17840120b80 Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff changeset
173 vp6_filter_diag4 sse2, 8