Mercurial > libavcodec.hg
annotate x86/vp56dsp.asm @ 12495:fac8063ed1e7 libavcodec
Allow float values for libmp3lame quality.
Patch by James Darnley, james D darnley A gmail
author | cehoyos |
---|---|
date | Wed, 15 Sep 2010 22:10:13 +0000 |
parents | 417532548504 |
children |
rev | line source |
---|---|
12418
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
1 ;****************************************************************************** |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
2 ;* MMX/SSE2-optimized functions for the VP6 decoder |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
3 ;* Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com> |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
4 ;* Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com> |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
5 ;* |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
6 ;* This file is part of FFmpeg. |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
7 ;* |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
8 ;* FFmpeg is free software; you can redistribute it and/or |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
9 ;* modify it under the terms of the GNU Lesser General Public |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
10 ;* License as published by the Free Software Foundation; either |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
11 ;* version 2.1 of the License, or (at your option) any later version. |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
12 ;* |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
13 ;* FFmpeg is distributed in the hope that it will be useful, |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
16 ;* Lesser General Public License for more details. |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
17 ;* |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
18 ;* You should have received a copy of the GNU Lesser General Public |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
19 ;* License along with FFmpeg; if not, write to the Free Software |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
20 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
21 ;****************************************************************************** |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
22 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
23 %include "x86inc.asm" |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
24 %include "x86util.asm" |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
25 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
26 cextern pw_64 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
27 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
28 SECTION .text |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
29 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
30 %macro DIAG4_MMX 6 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
31 movq m0, [%1+%2] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
32 movq m1, [%1+%3] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
33 movq m3, m0 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
34 movq m4, m1 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
35 punpcklbw m0, m7 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
36 punpcklbw m1, m7 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
37 punpckhbw m3, m7 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
38 punpckhbw m4, m7 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
39 pmullw m0, [rsp+8*11] ; src[x-8 ] * biweight [0] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
40 pmullw m1, [rsp+8*12] ; src[x ] * biweight [1] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
41 pmullw m3, [rsp+8*11] ; src[x-8 ] * biweight [0] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
42 pmullw m4, [rsp+8*12] ; src[x ] * biweight [1] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
43 paddw m0, m1 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
44 paddw m3, m4 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
45 movq m1, [%1+%4] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
46 movq m2, [%1+%5] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
47 movq m4, m1 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
48 movq m5, m2 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
49 punpcklbw m1, m7 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
50 punpcklbw m2, m7 |
12430
417532548504
Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents:
12420
diff
changeset
|
51 punpckhbw m4, m7 |
417532548504
Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents:
12420
diff
changeset
|
52 punpckhbw m5, m7 |
12418
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
53 pmullw m1, [rsp+8*13] ; src[x+8 ] * biweight [2] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
54 pmullw m2, [rsp+8*14] ; src[x+16] * biweight [3] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
55 pmullw m4, [rsp+8*13] ; src[x+8 ] * biweight [2] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
56 pmullw m5, [rsp+8*14] ; src[x+16] * biweight [3] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
57 paddw m1, m2 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
58 paddw m4, m5 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
59 paddsw m0, m1 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
60 paddsw m3, m4 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
61 paddsw m0, m6 ; Add 64 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
62 paddsw m3, m6 ; Add 64 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
63 psraw m0, 7 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
64 psraw m3, 7 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
65 packuswb m0, m3 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
66 movq [%6], m0 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
67 %endmacro |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
68 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
69 %macro DIAG4_SSE2 6 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
70 movq m0, [%1+%2] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
71 movq m1, [%1+%3] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
72 punpcklbw m0, m7 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
73 punpcklbw m1, m7 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
74 pmullw m0, m4 ; src[x-8 ] * biweight [0] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
75 pmullw m1, m5 ; src[x ] * biweight [1] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
76 paddw m0, m1 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
77 movq m1, [%1+%4] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
78 movq m2, [%1+%5] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
79 punpcklbw m1, m7 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
80 punpcklbw m2, m7 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
81 pmullw m1, m6 ; src[x+8 ] * biweight [2] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
82 pmullw m2, m3 ; src[x+16] * biweight [3] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
83 paddw m1, m2 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
84 paddsw m0, m1 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
85 paddsw m0, [pw_64] ; Add 64 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
86 psraw m0, 7 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
87 packuswb m0, m0 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
88 movq [%6], m0 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
89 %endmacro |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
90 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
91 %macro SPLAT4REGS_MMX 0 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
92 movq m5, m3 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
93 punpcklwd m3, m3 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
94 movq m4, m3 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
95 punpckldq m3, m3 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
96 punpckhdq m4, m4 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
97 punpckhwd m5, m5 |
12430
417532548504
Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents:
12420
diff
changeset
|
98 movq m2, m5 |
417532548504
Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents:
12420
diff
changeset
|
99 punpckhdq m2, m2 |
12418
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
100 punpckldq m5, m5 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
101 movq [rsp+8*11], m3 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
102 movq [rsp+8*12], m4 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
103 movq [rsp+8*13], m5 |
12430
417532548504
Fix typos when converting inline asm to yasm, fixes MMX-only fate-ea-vp61.
rbultje
parents:
12420
diff
changeset
|
104 movq [rsp+8*14], m2 |
12418
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
105 %endmacro |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
106 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
107 %macro SPLAT4REGS_SSE2 0 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
108 pshuflw m4, m3, 0x0 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
109 pshuflw m5, m3, 0x55 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
110 pshuflw m6, m3, 0xAA |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
111 pshuflw m3, m3, 0xFF |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
112 punpcklqdq m4, m4 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
113 punpcklqdq m5, m5 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
114 punpcklqdq m6, m6 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
115 punpcklqdq m3, m3 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
116 %endmacro |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
117 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
118 %macro vp6_filter_diag4 2 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
119 ; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride, |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
120 ; const int16_t h_weight[4], const int16_t v_weights[4]) |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
121 cglobal vp6_filter_diag4_%1, 5, 7, %2 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
122 mov r5, rsp ; backup stack pointer |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
123 and rsp, ~(mmsize-1) ; align stack |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
124 %ifidn %1, sse2 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
125 sub rsp, 8*11 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
126 %else |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
127 sub rsp, 8*15 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
128 movq m6, [pw_64] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
129 %endif |
12420 | 130 %ifdef ARCH_X86_64 |
131 movsxd r2, r2d | |
132 %endif | |
12418
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
133 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
134 sub r1, r2 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
135 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
136 pxor m7, m7 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
137 movq m3, [r3] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
138 SPLAT4REGS |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
139 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
140 mov r3, rsp |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
141 mov r6, 11 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
142 .nextrow |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
143 DIAG4 r1, -1, 0, 1, 2, r3 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
144 add r3, 8 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
145 add r1, r2 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
146 dec r6 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
147 jnz .nextrow |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
148 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
149 movq m3, [r4] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
150 SPLAT4REGS |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
151 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
152 lea r3, [rsp+8] |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
153 mov r6, 8 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
154 .nextcol |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
155 DIAG4 r3, -8, 0, 8, 16, r0 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
156 add r3, 8 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
157 add r0, r2 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
158 dec r6 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
159 jnz .nextcol |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
160 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
161 mov rsp, r5 ; restore stack pointer |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
162 RET |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
163 %endmacro |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
164 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
165 INIT_MMX |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
166 %define DIAG4 DIAG4_MMX |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
167 %define SPLAT4REGS SPLAT4REGS_MMX |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
168 vp6_filter_diag4 mmx, 0 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
169 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
170 INIT_XMM |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
171 %define DIAG4 DIAG4_SSE2 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
172 %define SPLAT4REGS SPLAT4REGS_SSE2 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
diff
changeset
|
173 vp6_filter_diag4 sse2, 8 |