comparison i386/dsputil_mmx.c @ 1984:ef919e9ef73e libavcodec

separate out put_signed_pixels_clamped() into its own function and implement an optimized MMX version of the function
author melanson
date Tue, 27 Apr 2004 03:58:06 +0000
parents 89422281f6f6
children b2bc62fdecc0
comparison
equal deleted inserted replaced
1983:1205bf58c420 1984:ef919e9ef73e
20 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> 20 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
21 */ 21 */
22 22
23 #include "../dsputil.h" 23 #include "../dsputil.h"
24 #include "../simple_idct.h" 24 #include "../simple_idct.h"
25 #include "mmx.h"
25 26
26 //#undef NDEBUG 27 //#undef NDEBUG
27 //#include <assert.h> 28 //#include <assert.h>
28 29
29 extern const uint8_t ff_h263_loop_filter_strength[32]; 30 extern const uint8_t ff_h263_loop_filter_strength[32];
289 "movq %%mm2, (%0, %1)\n\t" 290 "movq %%mm2, (%0, %1)\n\t"
290 "movq %%mm4, (%0, %1, 2)\n\t" 291 "movq %%mm4, (%0, %1, 2)\n\t"
291 "movq %%mm6, (%0, %2)\n\t" 292 "movq %%mm6, (%0, %2)\n\t"
292 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) 293 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p)
293 :"memory"); 294 :"memory");
295 }
296
297 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
298 {
299 int i;
300 unsigned char __align8 vector128[8] =
301 { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
302
303 movq_m2r(*vector128, mm1);
304 for (i = 0; i < 8; i++) {
305 movq_m2r(*(block), mm0);
306 packsswb_m2r(*(block + 4), mm0);
307 block += 8;
308 paddb_r2r(mm1, mm0);
309 movq_r2m(mm0, *pixels);
310 pixels += line_size;
311 }
294 } 312 }
295 313
296 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) 314 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
297 { 315 {
298 const DCTELEM *p; 316 const DCTELEM *p;
2158 #ifdef CONFIG_ENCODERS 2176 #ifdef CONFIG_ENCODERS
2159 c->get_pixels = get_pixels_mmx; 2177 c->get_pixels = get_pixels_mmx;
2160 c->diff_pixels = diff_pixels_mmx; 2178 c->diff_pixels = diff_pixels_mmx;
2161 #endif //CONFIG_ENCODERS 2179 #endif //CONFIG_ENCODERS
2162 c->put_pixels_clamped = put_pixels_clamped_mmx; 2180 c->put_pixels_clamped = put_pixels_clamped_mmx;
2181 c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
2163 c->add_pixels_clamped = add_pixels_clamped_mmx; 2182 c->add_pixels_clamped = add_pixels_clamped_mmx;
2164 c->clear_blocks = clear_blocks_mmx; 2183 c->clear_blocks = clear_blocks_mmx;
2165 #ifdef CONFIG_ENCODERS 2184 #ifdef CONFIG_ENCODERS
2166 c->pix_sum = pix_sum16_mmx; 2185 c->pix_sum = pix_sum16_mmx;
2167 #endif //CONFIG_ENCODERS 2186 #endif //CONFIG_ENCODERS