Mercurial > libavcodec.hg
annotate i386/motion_est_mmx.c @ 1708:dea5b2946999 libavcodec
interlaced motion estimation
interlaced mpeg2 encoding
P & B frames
rate distored interlaced mb decission
alternate scantable support
4mv encoding fixes (thats also why the regression tests change)
passing height to most dsp functions
interlaced mpeg4 encoding (no direct mode MBs yet)
various related cleanups
disabled old motion estimaton algorithms (log, full, ...) they will either be fixed or removed
author | michael |
---|---|
date | Tue, 30 Dec 2003 16:07:57 +0000 |
parents | 1f8d1e1173d8 |
children | 07a484280a82 |
rev | line source |
---|---|
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
1 /* |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
2 * MMX optimized motion estimation |
429 | 3 * Copyright (c) 2001 Fabrice Bellard. |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
4 * |
429 | 5 * This library is free software; you can redistribute it and/or |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
9 * |
429 | 10 * This library is distributed in the hope that it will be useful, |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 * Lesser General Public License for more details. | |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
14 * |
429 | 15 * You should have received a copy of the GNU Lesser General Public |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
18 * |
294 | 19 * mostly by Michael Niedermayer <michaelni@gmx.at> |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
20 */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
21 #include "../dsputil.h" |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
22 |
1064 | 23 static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={ |
1569
1f8d1e1173d8
Fixes GCC 3.3.2 warnings patch by (Panagiotis Issaris <takis at lumumba dot luc dot ac dot be>)
michael
parents:
1455
diff
changeset
|
24 0x0000000000000000ULL, |
1f8d1e1173d8
Fixes GCC 3.3.2 warnings patch by (Panagiotis Issaris <takis at lumumba dot luc dot ac dot be>)
michael
parents:
1455
diff
changeset
|
25 0x0001000100010001ULL, |
1f8d1e1173d8
Fixes GCC 3.3.2 warnings patch by (Panagiotis Issaris <takis at lumumba dot luc dot ac dot be>)
michael
parents:
1455
diff
changeset
|
26 0x0002000200020002ULL, |
294 | 27 }; |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
28 |
1455 | 29 static __attribute__ ((aligned(8), unused)) uint64_t bone= 0x0101010101010101LL; |
330 | 30 |
1708 | 31 static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
32 { |
1708 | 33 int len= -(stride*h); |
294 | 34 asm volatile( |
35 ".balign 16 \n\t" | |
36 "1: \n\t" | |
37 "movq (%1, %%eax), %%mm0 \n\t" | |
38 "movq (%2, %%eax), %%mm2 \n\t" | |
39 "movq (%2, %%eax), %%mm4 \n\t" | |
40 "addl %3, %%eax \n\t" | |
41 "psubusb %%mm0, %%mm2 \n\t" | |
42 "psubusb %%mm4, %%mm0 \n\t" | |
43 "movq (%1, %%eax), %%mm1 \n\t" | |
44 "movq (%2, %%eax), %%mm3 \n\t" | |
45 "movq (%2, %%eax), %%mm5 \n\t" | |
46 "psubusb %%mm1, %%mm3 \n\t" | |
47 "psubusb %%mm5, %%mm1 \n\t" | |
48 "por %%mm2, %%mm0 \n\t" | |
49 "por %%mm1, %%mm3 \n\t" | |
50 "movq %%mm0, %%mm1 \n\t" | |
51 "movq %%mm3, %%mm2 \n\t" | |
52 "punpcklbw %%mm7, %%mm0 \n\t" | |
53 "punpckhbw %%mm7, %%mm1 \n\t" | |
54 "punpcklbw %%mm7, %%mm3 \n\t" | |
55 "punpckhbw %%mm7, %%mm2 \n\t" | |
56 "paddw %%mm1, %%mm0 \n\t" | |
57 "paddw %%mm3, %%mm2 \n\t" | |
58 "paddw %%mm2, %%mm0 \n\t" | |
59 "paddw %%mm0, %%mm6 \n\t" | |
60 "addl %3, %%eax \n\t" | |
61 " js 1b \n\t" | |
62 : "+a" (len) | |
63 : "r" (blk1 - len), "r" (blk2 - len), "r" (stride) | |
64 ); | |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
65 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
66 |
1708 | 67 static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
68 { |
1708 | 69 int len= -(stride*h); |
294 | 70 asm volatile( |
71 ".balign 16 \n\t" | |
72 "1: \n\t" | |
73 "movq (%1, %%eax), %%mm0 \n\t" | |
74 "movq (%2, %%eax), %%mm2 \n\t" | |
75 "psadbw %%mm2, %%mm0 \n\t" | |
76 "addl %3, %%eax \n\t" | |
77 "movq (%1, %%eax), %%mm1 \n\t" | |
78 "movq (%2, %%eax), %%mm3 \n\t" | |
79 "psadbw %%mm1, %%mm3 \n\t" | |
80 "paddw %%mm3, %%mm0 \n\t" | |
81 "paddw %%mm0, %%mm6 \n\t" | |
82 "addl %3, %%eax \n\t" | |
83 " js 1b \n\t" | |
84 : "+a" (len) | |
85 : "r" (blk1 - len), "r" (blk2 - len), "r" (stride) | |
86 ); | |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
87 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
88 |
1064 | 89 static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
90 { |
1708 | 91 int len= -(stride*h); |
294 | 92 asm volatile( |
93 ".balign 16 \n\t" | |
94 "1: \n\t" | |
95 "movq (%1, %%eax), %%mm0 \n\t" | |
96 "movq (%2, %%eax), %%mm2 \n\t" | |
97 "pavgb %%mm2, %%mm0 \n\t" | |
98 "movq (%3, %%eax), %%mm2 \n\t" | |
99 "psadbw %%mm2, %%mm0 \n\t" | |
100 "addl %4, %%eax \n\t" | |
101 "movq (%1, %%eax), %%mm1 \n\t" | |
102 "movq (%2, %%eax), %%mm3 \n\t" | |
103 "pavgb %%mm1, %%mm3 \n\t" | |
104 "movq (%3, %%eax), %%mm1 \n\t" | |
105 "psadbw %%mm1, %%mm3 \n\t" | |
106 "paddw %%mm3, %%mm0 \n\t" | |
107 "paddw %%mm0, %%mm6 \n\t" | |
108 "addl %4, %%eax \n\t" | |
109 " js 1b \n\t" | |
110 : "+a" (len) | |
111 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride) | |
112 ); | |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
113 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
114 |
1064 | 115 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
294 | 116 { //FIXME reuse src |
1708 | 117 int len= -(stride*h); |
294 | 118 asm volatile( |
119 ".balign 16 \n\t" | |
330 | 120 "movq "MANGLE(bone)", %%mm5 \n\t" |
1057 | 121 "1: \n\t" |
294 | 122 "movq (%1, %%eax), %%mm0 \n\t" |
123 "movq (%2, %%eax), %%mm2 \n\t" | |
124 "movq 1(%1, %%eax), %%mm1 \n\t" | |
125 "movq 1(%2, %%eax), %%mm3 \n\t" | |
126 "pavgb %%mm2, %%mm0 \n\t" | |
127 "pavgb %%mm1, %%mm3 \n\t" | |
330 | 128 "psubusb %%mm5, %%mm3 \n\t" |
294 | 129 "pavgb %%mm3, %%mm0 \n\t" |
130 "movq (%3, %%eax), %%mm2 \n\t" | |
131 "psadbw %%mm2, %%mm0 \n\t" | |
132 "addl %4, %%eax \n\t" | |
133 "movq (%1, %%eax), %%mm1 \n\t" | |
134 "movq (%2, %%eax), %%mm3 \n\t" | |
135 "movq 1(%1, %%eax), %%mm2 \n\t" | |
136 "movq 1(%2, %%eax), %%mm4 \n\t" | |
137 "pavgb %%mm3, %%mm1 \n\t" | |
138 "pavgb %%mm4, %%mm2 \n\t" | |
330 | 139 "psubusb %%mm5, %%mm2 \n\t" |
294 | 140 "pavgb %%mm1, %%mm2 \n\t" |
141 "movq (%3, %%eax), %%mm1 \n\t" | |
142 "psadbw %%mm1, %%mm2 \n\t" | |
143 "paddw %%mm2, %%mm0 \n\t" | |
144 "paddw %%mm0, %%mm6 \n\t" | |
145 "addl %4, %%eax \n\t" | |
146 " js 1b \n\t" | |
147 : "+a" (len) | |
148 : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" (stride) | |
149 ); | |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
150 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
151 |
1064 | 152 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
153 { |
1708 | 154 int len= -(stride*h); |
294 | 155 asm volatile( |
156 ".balign 16 \n\t" | |
157 "1: \n\t" | |
158 "movq (%1, %%eax), %%mm0 \n\t" | |
159 "movq (%2, %%eax), %%mm1 \n\t" | |
160 "movq (%1, %%eax), %%mm2 \n\t" | |
161 "movq (%2, %%eax), %%mm3 \n\t" | |
162 "punpcklbw %%mm7, %%mm0 \n\t" | |
163 "punpcklbw %%mm7, %%mm1 \n\t" | |
164 "punpckhbw %%mm7, %%mm2 \n\t" | |
165 "punpckhbw %%mm7, %%mm3 \n\t" | |
166 "paddw %%mm0, %%mm1 \n\t" | |
167 "paddw %%mm2, %%mm3 \n\t" | |
1057 | 168 "movq (%3, %%eax), %%mm4 \n\t" |
294 | 169 "movq (%3, %%eax), %%mm2 \n\t" |
170 "paddw %%mm5, %%mm1 \n\t" | |
171 "paddw %%mm5, %%mm3 \n\t" | |
172 "psrlw $1, %%mm1 \n\t" | |
173 "psrlw $1, %%mm3 \n\t" | |
174 "packuswb %%mm3, %%mm1 \n\t" | |
175 "psubusb %%mm1, %%mm4 \n\t" | |
176 "psubusb %%mm2, %%mm1 \n\t" | |
177 "por %%mm4, %%mm1 \n\t" | |
178 "movq %%mm1, %%mm0 \n\t" | |
179 "punpcklbw %%mm7, %%mm0 \n\t" | |
180 "punpckhbw %%mm7, %%mm1 \n\t" | |
181 "paddw %%mm1, %%mm0 \n\t" | |
182 "paddw %%mm0, %%mm6 \n\t" | |
183 "addl %4, %%eax \n\t" | |
184 " js 1b \n\t" | |
185 : "+a" (len) | |
186 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride) | |
187 ); | |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
188 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
189 |
1064 | 190 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
191 { |
1708 | 192 int len= -(stride*h); |
294 | 193 asm volatile( |
194 ".balign 16 \n\t" | |
195 "1: \n\t" | |
196 "movq (%1, %%eax), %%mm0 \n\t" | |
197 "movq (%2, %%eax), %%mm1 \n\t" | |
198 "movq %%mm0, %%mm4 \n\t" | |
199 "movq %%mm1, %%mm2 \n\t" | |
200 "punpcklbw %%mm7, %%mm0 \n\t" | |
201 "punpcklbw %%mm7, %%mm1 \n\t" | |
202 "punpckhbw %%mm7, %%mm4 \n\t" | |
203 "punpckhbw %%mm7, %%mm2 \n\t" | |
204 "paddw %%mm1, %%mm0 \n\t" | |
205 "paddw %%mm2, %%mm4 \n\t" | |
206 "movq 1(%1, %%eax), %%mm2 \n\t" | |
207 "movq 1(%2, %%eax), %%mm3 \n\t" | |
208 "movq %%mm2, %%mm1 \n\t" | |
209 "punpcklbw %%mm7, %%mm2 \n\t" | |
210 "punpckhbw %%mm7, %%mm1 \n\t" | |
211 "paddw %%mm0, %%mm2 \n\t" | |
212 "paddw %%mm4, %%mm1 \n\t" | |
213 "movq %%mm3, %%mm4 \n\t" | |
214 "punpcklbw %%mm7, %%mm3 \n\t" | |
215 "punpckhbw %%mm7, %%mm4 \n\t" | |
216 "paddw %%mm3, %%mm2 \n\t" | |
217 "paddw %%mm4, %%mm1 \n\t" | |
1057 | 218 "movq (%3, %%eax), %%mm3 \n\t" |
219 "movq (%3, %%eax), %%mm4 \n\t" | |
294 | 220 "paddw %%mm5, %%mm2 \n\t" |
221 "paddw %%mm5, %%mm1 \n\t" | |
222 "psrlw $2, %%mm2 \n\t" | |
223 "psrlw $2, %%mm1 \n\t" | |
224 "packuswb %%mm1, %%mm2 \n\t" | |
225 "psubusb %%mm2, %%mm3 \n\t" | |
226 "psubusb %%mm4, %%mm2 \n\t" | |
227 "por %%mm3, %%mm2 \n\t" | |
228 "movq %%mm2, %%mm0 \n\t" | |
229 "punpcklbw %%mm7, %%mm0 \n\t" | |
230 "punpckhbw %%mm7, %%mm2 \n\t" | |
231 "paddw %%mm2, %%mm0 \n\t" | |
232 "paddw %%mm0, %%mm6 \n\t" | |
233 "addl %4, %%eax \n\t" | |
234 " js 1b \n\t" | |
235 : "+a" (len) | |
236 : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" (stride) | |
237 ); | |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
238 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
239 |
1057 | 240 static inline int sum_mmx(void) |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
241 { |
294 | 242 int ret; |
243 asm volatile( | |
244 "movq %%mm6, %%mm0 \n\t" | |
245 "psrlq $32, %%mm6 \n\t" | |
246 "paddw %%mm0, %%mm6 \n\t" | |
247 "movq %%mm6, %%mm0 \n\t" | |
248 "psrlq $16, %%mm6 \n\t" | |
249 "paddw %%mm0, %%mm6 \n\t" | |
250 "movd %%mm6, %0 \n\t" | |
251 : "=r" (ret) | |
252 ); | |
253 return ret&0xFFFF; | |
254 } | |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
255 |
1057 | 256 static inline int sum_mmx2(void) |
294 | 257 { |
258 int ret; | |
259 asm volatile( | |
260 "movd %%mm6, %0 \n\t" | |
261 : "=r" (ret) | |
262 ); | |
263 return ret; | |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
264 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
265 |
900 | 266 |
294 | 267 #define PIX_SAD(suf)\ |
1708 | 268 static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
269 {\ | |
270 assert(h==8);\ | |
271 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |
272 "pxor %%mm6, %%mm6 \n\t":);\ | |
273 \ | |
274 sad8_1_ ## suf(blk1, blk2, stride, 8);\ | |
275 \ | |
276 return sum_ ## suf();\ | |
277 }\ | |
278 static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |
279 {\ | |
280 assert(h==8);\ | |
281 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |
282 "pxor %%mm6, %%mm6 \n\t"\ | |
283 "movq %0, %%mm5 \n\t"\ | |
284 :: "m"(round_tab[1]) \ | |
285 );\ | |
286 \ | |
287 sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 8);\ | |
288 \ | |
289 return sum_ ## suf();\ | |
290 }\ | |
291 \ | |
292 static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |
293 {\ | |
294 assert(h==8);\ | |
295 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |
296 "pxor %%mm6, %%mm6 \n\t"\ | |
297 "movq %0, %%mm5 \n\t"\ | |
298 :: "m"(round_tab[1]) \ | |
299 );\ | |
300 \ | |
301 sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 8);\ | |
302 \ | |
303 return sum_ ## suf();\ | |
304 }\ | |
305 \ | |
306 static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |
307 {\ | |
308 assert(h==8);\ | |
309 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |
310 "pxor %%mm6, %%mm6 \n\t"\ | |
311 "movq %0, %%mm5 \n\t"\ | |
312 :: "m"(round_tab[2]) \ | |
313 );\ | |
314 \ | |
315 sad8_4_ ## suf(blk1, blk2, stride, 8);\ | |
316 \ | |
317 return sum_ ## suf();\ | |
318 }\ | |
319 \ | |
320 static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |
294 | 321 {\ |
322 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |
323 "pxor %%mm6, %%mm6 \n\t":);\ | |
324 \ | |
1708 | 325 sad8_1_ ## suf(blk1 , blk2 , stride, h);\ |
326 sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\ | |
294 | 327 \ |
328 return sum_ ## suf();\ | |
329 }\ | |
1708 | 330 static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
294 | 331 {\ |
332 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |
333 "pxor %%mm6, %%mm6 \n\t"\ | |
334 "movq %0, %%mm5 \n\t"\ | |
335 :: "m"(round_tab[1]) \ | |
336 );\ | |
337 \ | |
1708 | 338 sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, h);\ |
339 sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, h);\ | |
294 | 340 \ |
341 return sum_ ## suf();\ | |
342 }\ | |
1708 | 343 static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
294 | 344 {\ |
345 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |
346 "pxor %%mm6, %%mm6 \n\t"\ | |
347 "movq %0, %%mm5 \n\t"\ | |
348 :: "m"(round_tab[1]) \ | |
349 );\ | |
350 \ | |
1708 | 351 sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, h);\ |
352 sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, h);\ | |
294 | 353 \ |
354 return sum_ ## suf();\ | |
355 }\ | |
1708 | 356 static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
294 | 357 {\ |
358 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |
359 "pxor %%mm6, %%mm6 \n\t"\ | |
360 "movq %0, %%mm5 \n\t"\ | |
361 :: "m"(round_tab[2]) \ | |
362 );\ | |
363 \ | |
1708 | 364 sad8_4_ ## suf(blk1 , blk2 , stride, h);\ |
365 sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\ | |
294 | 366 \ |
367 return sum_ ## suf();\ | |
368 }\ | |
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
369 |
294 | 370 PIX_SAD(mmx) |
371 PIX_SAD(mmx2) | |
1057 | 372 |
1092 | 373 void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx) |
1057 | 374 { |
375 if (mm_flags & MM_MMX) { | |
1708 | 376 c->pix_abs[0][0] = sad16_mmx; |
377 c->pix_abs[0][1] = sad16_x2_mmx; | |
378 c->pix_abs[0][2] = sad16_y2_mmx; | |
379 c->pix_abs[0][3] = sad16_xy2_mmx; | |
380 c->pix_abs[1][0] = sad8_mmx; | |
381 c->pix_abs[1][1] = sad8_x2_mmx; | |
382 c->pix_abs[1][2] = sad8_y2_mmx; | |
383 c->pix_abs[1][3] = sad8_xy2_mmx; | |
1057 | 384 |
1708 | 385 c->sad[0]= sad16_mmx; |
386 c->sad[1]= sad8_mmx; | |
1057 | 387 } |
388 if (mm_flags & MM_MMXEXT) { | |
1708 | 389 c->pix_abs[0][0] = sad16_mmx2; |
390 c->pix_abs[1][0] = sad8_mmx2; | |
1057 | 391 |
1708 | 392 c->sad[0]= sad16_mmx2; |
393 c->sad[1]= sad8_mmx2; | |
1092 | 394 |
395 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | |
1708 | 396 c->pix_abs[0][1] = sad16_x2_mmx2; |
397 c->pix_abs[0][2] = sad16_y2_mmx2; | |
398 c->pix_abs[0][3] = sad16_xy2_mmx2; | |
399 c->pix_abs[1][1] = sad8_x2_mmx2; | |
400 c->pix_abs[1][2] = sad8_y2_mmx2; | |
401 c->pix_abs[1][3] = sad8_xy2_mmx2; | |
1092 | 402 } |
1057 | 403 } |
404 } |