Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 1984:ef919e9ef73e libavcodec
separate out put_signed_pixels_clamped() into its own function and
implement an optimized MMX version of the function
author | melanson |
---|---|
date | Tue, 27 Apr 2004 03:58:06 +0000 |
parents | 89422281f6f6 |
children | b2bc62fdecc0 |
comparison
equal
deleted
inserted
replaced
1983:1205bf58c420 | 1984:ef919e9ef73e |
---|---|
20 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | 20 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
21 */ | 21 */ |
22 | 22 |
23 #include "../dsputil.h" | 23 #include "../dsputil.h" |
24 #include "../simple_idct.h" | 24 #include "../simple_idct.h" |
25 #include "mmx.h" | |
25 | 26 |
26 //#undef NDEBUG | 27 //#undef NDEBUG |
27 //#include <assert.h> | 28 //#include <assert.h> |
28 | 29 |
29 extern const uint8_t ff_h263_loop_filter_strength[32]; | 30 extern const uint8_t ff_h263_loop_filter_strength[32]; |
289 "movq %%mm2, (%0, %1)\n\t" | 290 "movq %%mm2, (%0, %1)\n\t" |
290 "movq %%mm4, (%0, %1, 2)\n\t" | 291 "movq %%mm4, (%0, %1, 2)\n\t" |
291 "movq %%mm6, (%0, %2)\n\t" | 292 "movq %%mm6, (%0, %2)\n\t" |
292 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) | 293 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) |
293 :"memory"); | 294 :"memory"); |
295 } | |
296 | |
297 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) | |
298 { | |
299 int i; | |
300 unsigned char __align8 vector128[8] = | |
301 { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; | |
302 | |
303 movq_m2r(*vector128, mm1); | |
304 for (i = 0; i < 8; i++) { | |
305 movq_m2r(*(block), mm0); | |
306 packsswb_m2r(*(block + 4), mm0); | |
307 block += 8; | |
308 paddb_r2r(mm1, mm0); | |
309 movq_r2m(mm0, *pixels); | |
310 pixels += line_size; | |
311 } | |
294 } | 312 } |
295 | 313 |
296 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) | 314 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) |
297 { | 315 { |
298 const DCTELEM *p; | 316 const DCTELEM *p; |
2158 #ifdef CONFIG_ENCODERS | 2176 #ifdef CONFIG_ENCODERS |
2159 c->get_pixels = get_pixels_mmx; | 2177 c->get_pixels = get_pixels_mmx; |
2160 c->diff_pixels = diff_pixels_mmx; | 2178 c->diff_pixels = diff_pixels_mmx; |
2161 #endif //CONFIG_ENCODERS | 2179 #endif //CONFIG_ENCODERS |
2162 c->put_pixels_clamped = put_pixels_clamped_mmx; | 2180 c->put_pixels_clamped = put_pixels_clamped_mmx; |
2181 c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; | |
2163 c->add_pixels_clamped = add_pixels_clamped_mmx; | 2182 c->add_pixels_clamped = add_pixels_clamped_mmx; |
2164 c->clear_blocks = clear_blocks_mmx; | 2183 c->clear_blocks = clear_blocks_mmx; |
2165 #ifdef CONFIG_ENCODERS | 2184 #ifdef CONFIG_ENCODERS |
2166 c->pix_sum = pix_sum16_mmx; | 2185 c->pix_sum = pix_sum16_mmx; |
2167 #endif //CONFIG_ENCODERS | 2186 #endif //CONFIG_ENCODERS |