Mercurial > libavcodec.hg
annotate x86/dsputil_yasm.asm @ 10534:4cd2ef16fb63 libavcodec
Cosmetics: Fix indentation.
author | cehoyos |
---|---|
date | Mon, 16 Nov 2009 01:56:47 +0000 |
parents | 276b3a342389 |
children | 66242b8fbd32 |
rev | line source |
---|---|
8430 | 1 ;****************************************************************************** |
2 ;* MMX optimized DSP utils | |
3 ;* Copyright (c) 2008 Loren Merritt | |
4 ;* | |
5 ;* This file is part of FFmpeg. | |
6 ;* | |
7 ;* FFmpeg is free software; you can redistribute it and/or | |
8 ;* modify it under the terms of the GNU Lesser General Public | |
9 ;* License as published by the Free Software Foundation; either | |
10 ;* version 2.1 of the License, or (at your option) any later version. | |
11 ;* | |
12 ;* FFmpeg is distributed in the hope that it will be useful, | |
13 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 ;* Lesser General Public License for more details. | |
16 ;* | |
17 ;* You should have received a copy of the GNU Lesser General Public | |
18 ;* License along with FFmpeg; if not, write to the Free Software | |
19 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 ;****************************************************************************** | |
21 | |
22 %include "x86inc.asm" | |
23 | |
10430 | 24 SECTION_RODATA |
25 pb_f: times 16 db 15 | |
26 pb_zzzzzzzz77777777: times 8 db -1 | |
27 pb_7: times 8 db 7 | |
28 pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11 | |
29 pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13 | |
30 | |
8430 | 31 section .text align=16 |
32 | |
33 %macro PSWAPD_SSE 2 | |
34 pshufw %1, %2, 0x4e | |
35 %endmacro | |
36 %macro PSWAPD_3DN1 2 | |
37 movq %1, %2 | |
38 psrlq %1, 32 | |
39 punpckldq %1, %2 | |
40 %endmacro | |
41 | |
42 %macro FLOAT_TO_INT16_INTERLEAVE6 1 | |
43 ; void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len) | |
44 cglobal float_to_int16_interleave6_%1, 2,7,0, dst, src, src1, src2, src3, src4, src5 | |
45 %ifdef ARCH_X86_64 | |
46 %define lend r10d | |
47 mov lend, r2d | |
48 %else | |
49 %define lend dword r2m | |
50 %endif | |
51 mov src1q, [srcq+1*gprsize] | |
52 mov src2q, [srcq+2*gprsize] | |
53 mov src3q, [srcq+3*gprsize] | |
54 mov src4q, [srcq+4*gprsize] | |
55 mov src5q, [srcq+5*gprsize] | |
56 mov srcq, [srcq] | |
57 sub src1q, srcq | |
58 sub src2q, srcq | |
59 sub src3q, srcq | |
60 sub src4q, srcq | |
61 sub src5q, srcq | |
62 .loop: | |
63 cvtps2pi mm0, [srcq] | |
64 cvtps2pi mm1, [srcq+src1q] | |
65 cvtps2pi mm2, [srcq+src2q] | |
66 cvtps2pi mm3, [srcq+src3q] | |
67 cvtps2pi mm4, [srcq+src4q] | |
68 cvtps2pi mm5, [srcq+src5q] | |
69 packssdw mm0, mm3 | |
70 packssdw mm1, mm4 | |
71 packssdw mm2, mm5 | |
72 pswapd mm3, mm0 | |
73 punpcklwd mm0, mm1 | |
74 punpckhwd mm1, mm2 | |
75 punpcklwd mm2, mm3 | |
76 pswapd mm3, mm0 | |
77 punpckldq mm0, mm2 | |
78 punpckhdq mm2, mm1 | |
79 punpckldq mm1, mm3 | |
80 movq [dstq ], mm0 | |
81 movq [dstq+16], mm2 | |
82 movq [dstq+ 8], mm1 | |
83 add srcq, 8 | |
84 add dstq, 24 | |
85 sub lend, 2 | |
86 jg .loop | |
87 emms | |
88 RET | |
89 %endmacro ; FLOAT_TO_INT16_INTERLEAVE6 | |
90 | |
91 %define pswapd PSWAPD_SSE | |
92 FLOAT_TO_INT16_INTERLEAVE6 sse | |
93 %define cvtps2pi pf2id | |
94 %define pswapd PSWAPD_3DN1 | |
95 FLOAT_TO_INT16_INTERLEAVE6 3dnow | |
96 %undef pswapd | |
97 FLOAT_TO_INT16_INTERLEAVE6 3dn2 | |
98 %undef cvtps2pi | |
99 | |
8760 | 100 |
101 | |
10431 | 102 ; void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top) |
8760 | 103 cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_top |
104 movq mm0, [topq] | |
105 movq mm2, mm0 | |
106 movd mm4, [left_topq] | |
107 psllq mm2, 8 | |
108 movq mm1, mm0 | |
109 por mm4, mm2 | |
110 movd mm3, [leftq] | |
111 psubb mm0, mm4 ; t-tl | |
112 add dstq, wq | |
113 add topq, wq | |
114 add diffq, wq | |
115 neg wq | |
116 jmp .skip | |
117 .loop: | |
118 movq mm4, [topq+wq] | |
119 movq mm0, mm4 | |
120 psllq mm4, 8 | |
121 por mm4, mm1 | |
122 movq mm1, mm0 ; t | |
123 psubb mm0, mm4 ; t-tl | |
124 .skip: | |
125 movq mm2, [diffq+wq] | |
126 %assign i 0 | |
127 %rep 8 | |
128 movq mm4, mm0 | |
129 paddb mm4, mm3 ; t-tl+l | |
130 movq mm5, mm3 | |
131 pmaxub mm3, mm1 | |
132 pminub mm5, mm1 | |
133 pminub mm3, mm4 | |
134 pmaxub mm3, mm5 ; median | |
135 paddb mm3, mm2 ; +residual | |
136 %if i==0 | |
137 movq mm7, mm3 | |
138 psllq mm7, 56 | |
139 %else | |
140 movq mm6, mm3 | |
141 psrlq mm7, 8 | |
142 psllq mm6, 56 | |
143 por mm7, mm6 | |
144 %endif | |
145 %if i<7 | |
146 psrlq mm0, 8 | |
147 psrlq mm1, 8 | |
148 psrlq mm2, 8 | |
149 %endif | |
150 %assign i i+1 | |
151 %endrep | |
152 movq [dstq+wq], mm7 | |
153 add wq, 8 | |
154 jl .loop | |
155 movzx r2d, byte [dstq-1] | |
156 mov [leftq], r2d | |
157 movzx r2d, byte [topq-1] | |
158 mov [left_topq], r2d | |
159 RET | |
10430 | 160 |
161 | |
162 %macro ADD_HFYU_LEFT_LOOP 1 ; %1 = is_aligned | |
163 add srcq, wq | |
164 add dstq, wq | |
165 neg wq | |
166 %%.loop: | |
167 mova m1, [srcq+wq] | |
168 mova m2, m1 | |
169 psllw m1, 8 | |
170 paddb m1, m2 | |
171 mova m2, m1 | |
172 pshufb m1, m3 | |
173 paddb m1, m2 | |
174 pshufb m0, m5 | |
175 mova m2, m1 | |
176 pshufb m1, m4 | |
177 paddb m1, m2 | |
178 %if mmsize == 16 | |
179 mova m2, m1 | |
180 pshufb m1, m6 | |
181 paddb m1, m2 | |
182 %endif | |
183 paddb m0, m1 | |
184 %if %1 | |
185 mova [dstq+wq], m0 | |
186 %else | |
187 movq [dstq+wq], m0 | |
188 movhps [dstq+wq+8], m0 | |
189 %endif | |
190 add wq, mmsize | |
191 jl %%.loop | |
192 mov eax, mmsize-1 | |
193 sub eax, wd | |
194 movd m1, eax | |
195 pshufb m0, m1 | |
196 movd eax, m0 | |
197 RET | |
198 %endmacro | |
199 | |
10431 | 200 ; int ff_add_hfyu_left_prediction(uint8_t *dst, const uint8_t *src, int w, int left) |
10430 | 201 INIT_MMX |
202 cglobal add_hfyu_left_prediction_ssse3, 3,3,7, dst, src, w, left | |
203 .skip_prologue: | |
204 mova m5, [pb_7 GLOBAL] | |
205 mova m4, [pb_zzzz3333zzzzbbbb GLOBAL] | |
206 mova m3, [pb_zz11zz55zz99zzdd GLOBAL] | |
207 movd m0, leftm | |
208 psllq m0, 56 | |
209 ADD_HFYU_LEFT_LOOP 1 | |
210 | |
211 INIT_XMM | |
212 cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left | |
213 mova m5, [pb_f GLOBAL] | |
214 mova m6, [pb_zzzzzzzz77777777 GLOBAL] | |
215 mova m4, [pb_zzzz3333zzzzbbbb GLOBAL] | |
216 mova m3, [pb_zz11zz55zz99zzdd GLOBAL] | |
217 movd m0, leftm | |
218 pslldq m0, 15 | |
219 test srcq, 15 | |
10434
276b3a342389
fix linking on systems with a function name prefix (10l in r20287)
lorenm
parents:
10431
diff
changeset
|
220 jnz add_hfyu_left_prediction_ssse3.skip_prologue |
10430 | 221 test dstq, 15 |
222 jnz .unaligned | |
223 ADD_HFYU_LEFT_LOOP 1 | |
224 .unaligned: | |
225 ADD_HFYU_LEFT_LOOP 0 | |
226 |