Mercurial > libavcodec.hg
annotate x86/h264_deblock_sse2.asm @ 10893:2aafcafbe1f0 libavcodec
Replace cabac checks in inline functions from h264.h with constants.
No benchmark because its just replacing variables with litteral constants
(so no risk for slowdown outside gcc silliness) and i need sleep.
author | michael |
---|---|
date | Sat, 16 Jan 2010 05:41:33 +0000 |
parents | c08ca946c80a |
children | 980030a3e315 |
rev | line source |
---|---|
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
1 ;***************************************************************************** |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
2 ;* MMX/SSE2-optimized H.264 deblocking code |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
3 ;***************************************************************************** |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
4 ;* Copyright (C) 2005-2008 x264 project |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
5 ;* |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
6 ;* Authors: Loren Merritt <lorenm@u.washington.edu> |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
7 ;* |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
8 ;* This program is free software; you can redistribute it and/or modify |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
9 ;* it under the terms of the GNU General Public License as published by |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
10 ;* the Free Software Foundation; either version 2 of the License, or |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
11 ;* (at your option) any later version. |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
12 ;* |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
13 ;* This program is distributed in the hope that it will be useful, |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
16 ;* GNU General Public License for more details. |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
17 ;* |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
18 ;* You should have received a copy of the GNU General Public License |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
19 ;* along with this program; if not, write to the Free Software |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
20 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
21 ;***************************************************************************** |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
22 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
23 %include "x86inc.asm" |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
24 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
25 SECTION_RODATA |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
26 pb_00: times 16 db 0x00 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
27 pb_01: times 16 db 0x01 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
28 pb_03: times 16 db 0x03 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
29 pb_a1: times 16 db 0xa1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
30 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
31 SECTION .text |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
32 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
33 ; expands to [base],...,[base+7*stride] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
34 %define PASS8ROWS(base, base3, stride, stride3) \ |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
35 [base], [base+stride], [base+stride*2], [base3], \ |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
36 [base3+stride], [base3+stride*2], [base3+stride3], [base3+stride*4] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
37 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
38 ; in: 8 rows of 4 bytes in %1..%8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
39 ; out: 4 rows of 8 bytes in m0..m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
40 %macro TRANSPOSE4x8_LOAD 8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
41 movd m0, %1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
42 movd m2, %2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
43 movd m1, %3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
44 movd m3, %4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
45 punpcklbw m0, m2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
46 punpcklbw m1, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
47 movq m2, m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
48 punpcklwd m0, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
49 punpckhwd m2, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
50 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
51 movd m4, %5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
52 movd m6, %6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
53 movd m5, %7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
54 movd m7, %8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
55 punpcklbw m4, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
56 punpcklbw m5, m7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
57 movq m6, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
58 punpcklwd m4, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
59 punpckhwd m6, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
60 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
61 movq m1, m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
62 movq m3, m2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
63 punpckldq m0, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
64 punpckhdq m1, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
65 punpckldq m2, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
66 punpckhdq m3, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
67 %endmacro |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
68 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
69 ; in: 4 rows of 8 bytes in m0..m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
70 ; out: 8 rows of 4 bytes in %1..%8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
71 %macro TRANSPOSE8x4_STORE 8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
72 movq m4, m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
73 movq m5, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
74 movq m6, m2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
75 punpckhdq m4, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
76 punpckhdq m5, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
77 punpckhdq m6, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
78 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
79 punpcklbw m0, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
80 punpcklbw m2, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
81 movq m1, m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
82 punpcklwd m0, m2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
83 punpckhwd m1, m2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
84 movd %1, m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
85 punpckhdq m0, m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
86 movd %2, m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
87 movd %3, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
88 punpckhdq m1, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
89 movd %4, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
90 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
91 punpckhdq m3, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
92 punpcklbw m4, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
93 punpcklbw m6, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
94 movq m5, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
95 punpcklwd m4, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
96 punpckhwd m5, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
97 movd %5, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
98 punpckhdq m4, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
99 movd %6, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
100 movd %7, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
101 punpckhdq m5, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
102 movd %8, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
103 %endmacro |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
104 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
105 %macro SBUTTERFLY 4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
106 movq %4, %2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
107 punpckl%1 %2, %3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
108 punpckh%1 %4, %3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
109 %endmacro |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
110 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
111 ; in: 8 rows of 8 (only the middle 6 pels are used) in %1..%8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
112 ; out: 6 rows of 8 in [%9+0*16] .. [%9+5*16] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
113 %macro TRANSPOSE6x8_MEM 9 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
114 movq m0, %1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
115 movq m1, %2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
116 movq m2, %3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
117 movq m3, %4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
118 movq m4, %5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
119 movq m5, %6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
120 movq m6, %7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
121 SBUTTERFLY bw, m0, m1, m7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
122 SBUTTERFLY bw, m2, m3, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
123 SBUTTERFLY bw, m4, m5, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
124 movq [%9+0x10], m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
125 SBUTTERFLY bw, m6, %8, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
126 SBUTTERFLY wd, m0, m2, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
127 SBUTTERFLY wd, m4, m6, m2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
128 punpckhdq m0, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
129 movq [%9+0x00], m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
130 SBUTTERFLY wd, m7, [%9+0x10], m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
131 SBUTTERFLY wd, m3, m5, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
132 SBUTTERFLY dq, m7, m3, m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
133 SBUTTERFLY dq, m1, m2, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
134 punpckldq m6, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
135 movq [%9+0x10], m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
136 movq [%9+0x20], m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
137 movq [%9+0x30], m7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
138 movq [%9+0x40], m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
139 movq [%9+0x50], m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
140 %endmacro |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
141 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
142 ; in: 8 rows of 8 in %1..%8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
143 ; out: 8 rows of 8 in %9..%16 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
144 %macro TRANSPOSE8x8_MEM 16 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
145 movq m0, %1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
146 movq m1, %2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
147 movq m2, %3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
148 movq m3, %4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
149 movq m4, %5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
150 movq m5, %6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
151 movq m6, %7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
152 SBUTTERFLY bw, m0, m1, m7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
153 SBUTTERFLY bw, m2, m3, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
154 SBUTTERFLY bw, m4, m5, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
155 SBUTTERFLY bw, m6, %8, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
156 movq %9, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
157 SBUTTERFLY wd, m0, m2, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
158 SBUTTERFLY wd, m4, m6, m2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
159 SBUTTERFLY wd, m7, m1, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
160 movq %11, m2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
161 movq m2, %9 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
162 SBUTTERFLY wd, m2, m5, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
163 SBUTTERFLY dq, m0, m4, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
164 SBUTTERFLY dq, m7, m2, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
165 movq %9, m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
166 movq %10, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
167 movq %13, m7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
168 movq %14, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
169 SBUTTERFLY dq, m3, %11, m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
170 SBUTTERFLY dq, m6, m1, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
171 movq %11, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
172 movq %12, m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
173 movq %15, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
174 movq %16, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
175 %endmacro |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
176 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
177 ; out: %4 = |%1-%2|>%3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
178 ; clobbers: %5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
179 %macro DIFF_GT 5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
180 mova %5, %2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
181 mova %4, %1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
182 psubusb %5, %1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
183 psubusb %4, %2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
184 por %4, %5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
185 psubusb %4, %3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
186 %endmacro |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
187 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
188 ; out: %4 = |%1-%2|>%3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
189 ; clobbers: %5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
190 %macro DIFF_GT2 5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
191 mova %5, %2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
192 mova %4, %1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
193 psubusb %5, %1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
194 psubusb %4, %2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
195 psubusb %5, %3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
196 psubusb %4, %3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
197 pcmpeqb %4, %5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
198 %endmacro |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
199 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
200 %macro SPLATW 1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
201 %ifidn m0, xmm0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
202 pshuflw %1, %1, 0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
203 punpcklqdq %1, %1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
204 %else |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
205 pshufw %1, %1, 0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
206 %endif |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
207 %endmacro |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
208 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
209 ; in: m0=p1 m1=p0 m2=q0 m3=q1 %1=alpha-1 %2=beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
210 ; out: m5=beta-1, m7=mask, %3=alpha-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
211 ; clobbers: m4,m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
212 %macro LOAD_MASK 2-3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
213 movd m4, %1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
214 movd m5, %2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
215 SPLATW m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
216 SPLATW m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
217 packuswb m4, m4 ; 16x alpha-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
218 packuswb m5, m5 ; 16x beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
219 %if %0>2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
220 mova %3, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
221 %endif |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
222 DIFF_GT m1, m2, m4, m7, m6 ; |p0-q0| > alpha-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
223 DIFF_GT m0, m1, m5, m4, m6 ; |p1-p0| > beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
224 por m7, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
225 DIFF_GT m3, m2, m5, m4, m6 ; |q1-q0| > beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
226 por m7, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
227 pxor m6, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
228 pcmpeqb m7, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
229 %endmacro |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
230 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
231 ; in: m0=p1 m1=p0 m2=q0 m3=q1 m7=(tc&mask) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
232 ; out: m1=p0' m2=q0' |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
233 ; clobbers: m0,3-6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
234 %macro DEBLOCK_P0_Q0 0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
235 mova m5, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
236 pxor m5, m2 ; p0^q0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
237 pand m5, [pb_01 GLOBAL] ; (p0^q0)&1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
238 pcmpeqb m4, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
239 pxor m3, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
240 pavgb m3, m0 ; (p1 - q1 + 256)>>1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
241 pavgb m3, [pb_03 GLOBAL] ; (((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
242 pxor m4, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
243 pavgb m4, m2 ; (q0 - p0 + 256)>>1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
244 pavgb m3, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
245 paddusb m3, m4 ; d+128+33 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
246 mova m6, [pb_a1 GLOBAL] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
247 psubusb m6, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
248 psubusb m3, [pb_a1 GLOBAL] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
249 pminub m6, m7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
250 pminub m3, m7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
251 psubusb m1, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
252 psubusb m2, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
253 paddusb m1, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
254 paddusb m2, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
255 %endmacro |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
256 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
257 ; in: m1=p0 m2=q0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
258 ; %1=p1 %2=q2 %3=[q2] %4=[q1] %5=tc0 %6=tmp |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
259 ; out: [q1] = clip( (q2+((p0+q0+1)>>1))>>1, q1-tc0, q1+tc0 ) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
260 ; clobbers: q2, tmp, tc0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
261 %macro LUMA_Q1 6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
262 mova %6, m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
263 pavgb %6, m2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
264 pavgb %2, %6 ; avg(p2,avg(p0,q0)) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
265 pxor %6, %3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
266 pand %6, [pb_01 GLOBAL] ; (p2^avg(p0,q0))&1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
267 psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
268 mova %6, %1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
269 psubusb %6, %5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
270 paddusb %5, %1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
271 pmaxub %2, %6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
272 pminub %2, %5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
273 mova %4, %2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
274 %endmacro |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
275 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
276 %ifdef ARCH_X86_64 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
277 ;----------------------------------------------------------------------------- |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
278 ; void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
279 ;----------------------------------------------------------------------------- |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
280 INIT_XMM |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
281 cglobal x264_deblock_v_luma_sse2, 5,5,10 |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
282 movd m8, [r4] ; tc0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
283 lea r4, [r1*3] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
284 dec r2d ; alpha-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
285 neg r4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
286 dec r3d ; beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
287 add r4, r0 ; pix-3*stride |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
288 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
289 mova m0, [r4+r1] ; p1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
290 mova m1, [r4+2*r1] ; p0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
291 mova m2, [r0] ; q0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
292 mova m3, [r0+r1] ; q1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
293 LOAD_MASK r2d, r3d |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
294 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
295 punpcklbw m8, m8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
296 punpcklbw m8, m8 ; tc = 4x tc0[3], 4x tc0[2], 4x tc0[1], 4x tc0[0] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
297 pcmpeqb m9, m9 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
298 pcmpeqb m9, m8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
299 pandn m9, m7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
300 pand m8, m9 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
301 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
302 movdqa m3, [r4] ; p2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
303 DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
304 pand m6, m9 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
305 mova m7, m8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
306 psubb m7, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
307 pand m6, m8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
308 LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
309 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
310 movdqa m4, [r0+2*r1] ; q2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
311 DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
312 pand m6, m9 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
313 pand m8, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
314 psubb m7, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
315 mova m3, [r0+r1] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
316 LUMA_Q1 m3, m4, [r0+2*r1], [r0+r1], m8, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
317 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
318 DEBLOCK_P0_Q0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
319 mova [r4+2*r1], m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
320 mova [r0], m2 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
321 RET |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
322 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
323 ;----------------------------------------------------------------------------- |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
324 ; void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
325 ;----------------------------------------------------------------------------- |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
326 INIT_MMX |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
327 cglobal x264_deblock_h_luma_sse2, 5,7 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
328 movsxd r10, r1d |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
329 lea r11, [r10+r10*2] |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
330 lea r6, [r0-4] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
331 lea r5, [r0-4+r11] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
332 %ifdef WIN64 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
333 sub rsp, 0x98 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
334 %define pix_tmp rsp+0x30 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
335 %else |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
336 sub rsp, 0x68 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
337 %define pix_tmp rsp |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
338 %endif |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
339 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
340 ; transpose 6x16 -> tmp space |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
341 TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r10, r11), pix_tmp |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
342 lea r6, [r6+r10*8] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
343 lea r5, [r5+r10*8] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
344 TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r10, r11), pix_tmp+8 |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
345 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
346 ; vertical filter |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
347 ; alpha, beta, tc0 are still in r2d, r3d, r4 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
348 ; don't backup r6, r5, r10, r11 because x264_deblock_v_luma_sse2 doesn't use them |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
349 lea r0, [pix_tmp+0x30] |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
350 mov r1d, 0x10 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
351 %ifdef WIN64 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
352 mov [rsp+0x20], r4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
353 %endif |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
354 call x264_deblock_v_luma_sse2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
355 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
356 ; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter) |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
357 add r6, 2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
358 add r5, 2 |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
359 movq m0, [pix_tmp+0x18] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
360 movq m1, [pix_tmp+0x28] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
361 movq m2, [pix_tmp+0x38] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
362 movq m3, [pix_tmp+0x48] |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
363 TRANSPOSE8x4_STORE PASS8ROWS(r6, r5, r10, r11) |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
364 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
365 shl r10, 3 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
366 sub r6, r10 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
367 sub r5, r10 |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
368 shr r10, 3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
369 movq m0, [pix_tmp+0x10] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
370 movq m1, [pix_tmp+0x20] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
371 movq m2, [pix_tmp+0x30] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
372 movq m3, [pix_tmp+0x40] |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
373 TRANSPOSE8x4_STORE PASS8ROWS(r6, r5, r10, r11) |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
374 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
375 %ifdef WIN64 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
376 add rsp, 0x98 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
377 %else |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
378 add rsp, 0x68 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
379 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
380 RET |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
381 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
382 %else |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
383 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
384 %macro DEBLOCK_LUMA 3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
385 ;----------------------------------------------------------------------------- |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
386 ; void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
387 ;----------------------------------------------------------------------------- |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
388 cglobal x264_deblock_%2_luma_%1, 5,5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
389 lea r4, [r1*3] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
390 dec r2 ; alpha-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
391 neg r4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
392 dec r3 ; beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
393 add r4, r0 ; pix-3*stride |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
394 %assign pad 2*%3+12-(stack_offset&15) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
395 SUB esp, pad |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
396 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
397 mova m0, [r4+r1] ; p1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
398 mova m1, [r4+2*r1] ; p0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
399 mova m2, [r0] ; q0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
400 mova m3, [r0+r1] ; q1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
401 LOAD_MASK r2, r3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
402 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
403 mov r3, r4mp |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
404 movd m4, [r3] ; tc0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
405 punpcklbw m4, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
406 punpcklbw m4, m4 ; tc = 4x tc0[3], 4x tc0[2], 4x tc0[1], 4x tc0[0] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
407 mova [esp+%3], m4 ; tc |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
408 pcmpeqb m3, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
409 pcmpgtb m4, m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
410 pand m4, m7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
411 mova [esp], m4 ; mask |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
412 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
413 mova m3, [r4] ; p2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
414 DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
415 pand m6, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
416 pand m4, [esp+%3] ; tc |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
417 mova m7, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
418 psubb m7, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
419 pand m6, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
420 LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
421 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
422 mova m4, [r0+2*r1] ; q2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
423 DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
424 mova m5, [esp] ; mask |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
425 pand m6, m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
426 mova m5, [esp+%3] ; tc |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
427 pand m5, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
428 psubb m7, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
429 mova m3, [r0+r1] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
430 LUMA_Q1 m3, m4, [r0+2*r1], [r0+r1], m5, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
431 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
432 DEBLOCK_P0_Q0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
433 mova [r4+2*r1], m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
434 mova [r0], m2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
435 ADD esp, pad |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
436 RET |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
437 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
438 ;----------------------------------------------------------------------------- |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
439 ; void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
440 ;----------------------------------------------------------------------------- |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
441 INIT_MMX |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
442 cglobal x264_deblock_h_luma_%1, 0,5 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
443 mov r0, r0mp |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
444 mov r3, r1m |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
445 lea r4, [r3*3] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
446 sub r0, 4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
447 lea r1, [r0+r4] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
448 %assign pad 0x78-(stack_offset&15) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
449 SUB esp, pad |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
450 %define pix_tmp esp+12 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
451 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
452 ; transpose 6x16 -> tmp space |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
453 TRANSPOSE6x8_MEM PASS8ROWS(r0, r1, r3, r4), pix_tmp |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
454 lea r0, [r0+r3*8] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
455 lea r1, [r1+r3*8] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
456 TRANSPOSE6x8_MEM PASS8ROWS(r0, r1, r3, r4), pix_tmp+8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
457 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
458 ; vertical filter |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
459 lea r0, [pix_tmp+0x30] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
460 PUSH dword r4m |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
461 PUSH dword r3m |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
462 PUSH dword r2m |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
463 PUSH dword 16 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
464 PUSH dword r0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
465 call x264_deblock_%2_luma_%1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
466 %ifidn %2, v8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
467 add dword [esp ], 8 ; pix_tmp+0x38 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
468 add dword [esp+16], 2 ; tc0+2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
469 call x264_deblock_%2_luma_%1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
470 %endif |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
471 ADD esp, 20 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
472 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
473 ; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter) |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
474 mov r0, r0mp |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
475 sub r0, 2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
476 lea r1, [r0+r4] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
477 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
478 movq m0, [pix_tmp+0x10] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
479 movq m1, [pix_tmp+0x20] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
480 movq m2, [pix_tmp+0x30] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
481 movq m3, [pix_tmp+0x40] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
482 TRANSPOSE8x4_STORE PASS8ROWS(r0, r1, r3, r4) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
483 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
484 lea r0, [r0+r3*8] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
485 lea r1, [r1+r3*8] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
486 movq m0, [pix_tmp+0x18] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
487 movq m1, [pix_tmp+0x28] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
488 movq m2, [pix_tmp+0x38] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
489 movq m3, [pix_tmp+0x48] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
490 TRANSPOSE8x4_STORE PASS8ROWS(r0, r1, r3, r4) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
491 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
492 ADD esp, pad |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
493 RET |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
494 %endmacro ; DEBLOCK_LUMA |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
495 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
496 INIT_XMM |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
497 DEBLOCK_LUMA sse2, v, 16 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
498 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
499 %endif ; ARCH |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
500 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
501 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
502 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
503 %macro LUMA_INTRA_P012 4 ; p0..p3 in memory |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
504 mova t0, p2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
505 mova t1, p0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
506 pavgb t0, p1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
507 pavgb t1, q0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
508 pavgb t0, t1 ; ((p2+p1+1)/2 + (p0+q0+1)/2 + 1)/2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
509 mova t5, t1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
510 mova t2, p2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
511 mova t3, p0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
512 paddb t2, p1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
513 paddb t3, q0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
514 paddb t2, t3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
515 mova t3, t2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
516 mova t4, t2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
517 psrlw t2, 1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
518 pavgb t2, mpb_00 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
519 pxor t2, t0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
520 pand t2, mpb_01 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
521 psubb t0, t2 ; p1' = (p2+p1+p0+q0+2)/4; |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
522 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
523 mova t1, p2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
524 mova t2, p2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
525 pavgb t1, q1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
526 psubb t2, q1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
527 paddb t3, t3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
528 psubb t3, t2 ; p2+2*p1+2*p0+2*q0+q1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
529 pand t2, mpb_01 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
530 psubb t1, t2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
531 pavgb t1, p1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
532 pavgb t1, t5 ; (((p2+q1)/2 + p1+1)/2 + (p0+q0+1)/2 + 1)/2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
533 psrlw t3, 2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
534 pavgb t3, mpb_00 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
535 pxor t3, t1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
536 pand t3, mpb_01 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
537 psubb t1, t3 ; p0'a = (p2+2*p1+2*p0+2*q0+q1+4)/8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
538 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
539 mova t3, p0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
540 mova t2, p0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
541 pxor t3, q1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
542 pavgb t2, q1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
543 pand t3, mpb_01 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
544 psubb t2, t3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
545 pavgb t2, p1 ; p0'b = (2*p1+p0+q0+2)/4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
546 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
547 pxor t1, t2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
548 pxor t2, p0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
549 pand t1, mask1p |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
550 pand t2, mask0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
551 pxor t1, t2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
552 pxor t1, p0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
553 mova %1, t1 ; store p0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
554 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
555 mova t1, %4 ; p3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
556 mova t2, t1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
557 pavgb t1, p2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
558 paddb t2, p2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
559 pavgb t1, t0 ; (p3+p2+1)/2 + (p2+p1+p0+q0+2)/4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
560 paddb t2, t2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
561 paddb t2, t4 ; 2*p3+3*p2+p1+p0+q0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
562 psrlw t2, 2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
563 pavgb t2, mpb_00 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
564 pxor t2, t1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
565 pand t2, mpb_01 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
566 psubb t1, t2 ; p2' = (2*p3+3*p2+p1+p0+q0+4)/8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
567 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
568 pxor t0, p1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
569 pxor t1, p2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
570 pand t0, mask1p |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
571 pand t1, mask1p |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
572 pxor t0, p1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
573 pxor t1, p2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
574 mova %2, t0 ; store p1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
575 mova %3, t1 ; store p2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
576 %endmacro |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
577 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
578 %macro LUMA_INTRA_SWAP_PQ 0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
579 %define q1 m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
580 %define q0 m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
581 %define p0 m2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
582 %define p1 m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
583 %define p2 q2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
584 %define mask1p mask1q |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
585 %endmacro |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
586 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
587 %macro DEBLOCK_LUMA_INTRA 2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
588 %define p1 m0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
589 %define p0 m1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
590 %define q0 m2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
591 %define q1 m3 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
592 %define t0 m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
593 %define t1 m5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
594 %define t2 m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
595 %define t3 m7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
596 %ifdef ARCH_X86_64 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
597 %define p2 m8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
598 %define q2 m9 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
599 %define t4 m10 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
600 %define t5 m11 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
601 %define mask0 m12 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
602 %define mask1p m13 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
603 %define mask1q [rsp-24] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
604 %define mpb_00 m14 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
605 %define mpb_01 m15 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
606 %else |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
607 %define spill(x) [esp+16*x+((stack_offset+4)&15)] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
608 %define p2 [r4+r1] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
609 %define q2 [r0+2*r1] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
610 %define t4 spill(0) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
611 %define t5 spill(1) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
612 %define mask0 spill(2) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
613 %define mask1p spill(3) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
614 %define mask1q spill(4) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
615 %define mpb_00 [pb_00 GLOBAL] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
616 %define mpb_01 [pb_01 GLOBAL] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
617 %endif |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
618 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
619 ;----------------------------------------------------------------------------- |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
620 ; void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta ) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
621 ;----------------------------------------------------------------------------- |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
622 cglobal x264_deblock_%2_luma_intra_%1, 4,6,16 |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
623 %ifndef ARCH_X86_64 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
624 sub esp, 0x60 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
625 %endif |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
626 lea r4, [r1*4] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
627 lea r5, [r1*3] ; 3*stride |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
628 dec r2d ; alpha-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
629 jl .end |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
630 neg r4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
631 dec r3d ; beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
632 jl .end |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
633 add r4, r0 ; pix-4*stride |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
634 mova p1, [r4+2*r1] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
635 mova p0, [r4+r5] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
636 mova q0, [r0] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
637 mova q1, [r0+r1] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
638 %ifdef ARCH_X86_64 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
639 pxor mpb_00, mpb_00 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
640 mova mpb_01, [pb_01 GLOBAL] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
641 LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
642 SWAP 7, 12 ; m12=mask0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
643 pavgb t5, mpb_00 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
644 pavgb t5, mpb_01 ; alpha/4+1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
645 movdqa p2, [r4+r1] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
646 movdqa q2, [r0+2*r1] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
647 DIFF_GT2 p0, q0, t5, t0, t3 ; t0 = |p0-q0| > alpha/4+1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
648 DIFF_GT2 p0, p2, m5, t2, t5 ; mask1 = |p2-p0| > beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
649 DIFF_GT2 q0, q2, m5, t4, t5 ; t4 = |q2-q0| > beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
650 pand t0, mask0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
651 pand t4, t0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
652 pand t2, t0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
653 mova mask1q, t4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
654 mova mask1p, t2 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
655 %else |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
656 LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
657 mova m4, t5 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
658 mova mask0, m7 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
659 pavgb m4, [pb_00 GLOBAL] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
660 pavgb m4, [pb_01 GLOBAL] ; alpha/4+1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
661 DIFF_GT2 p0, q0, m4, m6, m7 ; m6 = |p0-q0| > alpha/4+1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
662 pand m6, mask0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
663 DIFF_GT2 p0, p2, m5, m4, m7 ; m4 = |p2-p0| > beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
664 pand m4, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
665 mova mask1p, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
666 DIFF_GT2 q0, q2, m5, m4, m7 ; m4 = |q2-q0| > beta-1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
667 pand m4, m6 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
668 mova mask1q, m4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
669 %endif |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
670 LUMA_INTRA_P012 [r4+r5], [r4+2*r1], [r4+r1], [r4] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
671 LUMA_INTRA_SWAP_PQ |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
672 LUMA_INTRA_P012 [r0], [r0+r1], [r0+2*r1], [r0+r5] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
673 .end: |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
674 %ifndef ARCH_X86_64 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
675 add esp, 0x60 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
676 %endif |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
677 RET |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
678 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
679 INIT_MMX |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
680 %ifdef ARCH_X86_64 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
681 ;----------------------------------------------------------------------------- |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
682 ; void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta ) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
683 ;----------------------------------------------------------------------------- |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
684 cglobal x264_deblock_h_luma_intra_%1, 4,7 |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
685 movsxd r10, r1d |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
686 lea r11, [r10*3] |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
687 lea r6, [r0-4] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
688 lea r5, [r0-4+r11] |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
689 sub rsp, 0x88 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
690 %define pix_tmp rsp |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
691 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
692 ; transpose 8x16 -> tmp space |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
693 TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r10, r11), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30) |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
694 lea r6, [r6+r10*8] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
695 lea r5, [r5+r10*8] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
696 TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r10, r11), PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30) |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
697 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
698 lea r0, [pix_tmp+0x40] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
699 mov r1, 0x10 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
700 call x264_deblock_v_luma_intra_%1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
701 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
702 ; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8) |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
703 lea r5, [r6+r11] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
704 TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(r6, r5, r10, r11) |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
705 shl r10, 3 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
706 sub r6, r10 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
707 sub r5, r10 |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
708 shr r10, 3 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
709 TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30), PASS8ROWS(r6, r5, r10, r11) |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
710 add rsp, 0x88 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
711 RET |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
712 %else |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
713 cglobal x264_deblock_h_luma_intra_%1, 2,4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
714 lea r3, [r1*3] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
715 sub r0, 4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
716 lea r2, [r0+r3] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
717 %assign pad 0x8c-(stack_offset&15) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
718 SUB rsp, pad |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
719 %define pix_tmp rsp |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
720 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
721 ; transpose 8x16 -> tmp space |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
722 TRANSPOSE8x8_MEM PASS8ROWS(r0, r2, r1, r3), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
723 lea r0, [r0+r1*8] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
724 lea r2, [r2+r1*8] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
725 TRANSPOSE8x8_MEM PASS8ROWS(r0, r2, r1, r3), PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
726 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
727 lea r0, [pix_tmp+0x40] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
728 PUSH dword r3m |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
729 PUSH dword r2m |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
730 PUSH dword 16 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
731 PUSH r0 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
732 call x264_deblock_%2_luma_intra_%1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
733 %ifidn %2, v8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
734 add dword [rsp], 8 ; pix_tmp+8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
735 call x264_deblock_%2_luma_intra_%1 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
736 %endif |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
737 ADD esp, 16 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
738 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
739 mov r1, r1m |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
9006
diff
changeset
|
740 mov r0, r0mp |
9006
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
741 lea r3, [r1*3] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
742 sub r0, 4 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
743 lea r2, [r0+r3] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
744 ; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
745 TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30), PASS8ROWS(r0, r2, r1, r3) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
746 lea r0, [r0+r1*8] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
747 lea r2, [r2+r1*8] |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
748 TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(r0, r2, r1, r3) |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
749 ADD rsp, pad |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
750 RET |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
751 %endif ; ARCH_X86_64 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
752 %endmacro ; DEBLOCK_LUMA_INTRA |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
753 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
754 INIT_XMM |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
755 DEBLOCK_LUMA_INTRA sse2, v |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
756 %ifndef ARCH_X86_64 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
757 INIT_MMX |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
758 DEBLOCK_LUMA_INTRA mmxext, v8 |
37ac731fe32c
Convert x264 asm files to proper unix line breaks
darkshikari
parents:
9005
diff
changeset
|
759 %endif |