Mercurial > libavcodec.hg
comparison i386/mpegvideo_mmx.c @ 2967:ef2149182f1c libavcodec
COSMETICS: Remove all trailing whitespace.
author | diego |
---|---|
date | Sat, 17 Dec 2005 18:14:38 +0000 |
parents | 15cfba1b97b5 |
children | bfabfdf9ce55 |
comparison
equal
deleted
inserted
replaced
2966:564788471dd4 | 2967:ef2149182f1c |
---|---|
38 long level, qmul, qadd, nCoeffs; | 38 long level, qmul, qadd, nCoeffs; |
39 | 39 |
40 qmul = qscale << 1; | 40 qmul = qscale << 1; |
41 | 41 |
42 assert(s->block_last_index[n]>=0 || s->h263_aic); | 42 assert(s->block_last_index[n]>=0 || s->h263_aic); |
43 | 43 |
44 if (!s->h263_aic) { | 44 if (!s->h263_aic) { |
45 if (n < 4) | 45 if (n < 4) |
46 level = block[0] * s->y_dc_scale; | 46 level = block[0] * s->y_dc_scale; |
47 else | 47 else |
48 level = block[0] * s->c_dc_scale; | 48 level = block[0] * s->c_dc_scale; |
114 | 114 |
115 qmul = qscale << 1; | 115 qmul = qscale << 1; |
116 qadd = (qscale - 1) | 1; | 116 qadd = (qscale - 1) | 1; |
117 | 117 |
118 assert(s->block_last_index[n]>=0 || s->h263_aic); | 118 assert(s->block_last_index[n]>=0 || s->h263_aic); |
119 | 119 |
120 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; | 120 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; |
121 //printf("%d %d ", qmul, qadd); | 121 //printf("%d %d ", qmul, qadd); |
122 asm volatile( | 122 asm volatile( |
123 "movd %1, %%mm6 \n\t" //qmul | 123 "movd %1, %%mm6 \n\t" //qmul |
124 "packssdw %%mm6, %%mm6 \n\t" | 124 "packssdw %%mm6, %%mm6 \n\t" |
207 | 207 |
208 assert(s->block_last_index[n]>=0); | 208 assert(s->block_last_index[n]>=0); |
209 | 209 |
210 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; | 210 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; |
211 | 211 |
212 if (n < 4) | 212 if (n < 4) |
213 block0 = block[0] * s->y_dc_scale; | 213 block0 = block[0] * s->y_dc_scale; |
214 else | 214 else |
215 block0 = block[0] * s->c_dc_scale; | 215 block0 = block[0] * s->c_dc_scale; |
216 /* XXX: only mpeg1 */ | 216 /* XXX: only mpeg1 */ |
217 quant_matrix = s->intra_matrix; | 217 quant_matrix = s->intra_matrix; |
261 | 261 |
262 "add $16, %%"REG_a" \n\t" | 262 "add $16, %%"REG_a" \n\t" |
263 "js 1b \n\t" | 263 "js 1b \n\t" |
264 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | 264 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) |
265 : "%"REG_a, "memory" | 265 : "%"REG_a, "memory" |
266 ); | 266 ); |
267 block[0]= block0; | 267 block[0]= block0; |
268 } | 268 } |
269 | 269 |
270 static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s, | 270 static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s, |
271 DCTELEM *block, int n, int qscale) | 271 DCTELEM *block, int n, int qscale) |
337 DCTELEM *block, int n, int qscale) | 337 DCTELEM *block, int n, int qscale) |
338 { | 338 { |
339 long nCoeffs; | 339 long nCoeffs; |
340 const uint16_t *quant_matrix; | 340 const uint16_t *quant_matrix; |
341 int block0; | 341 int block0; |
342 | 342 |
343 assert(s->block_last_index[n]>=0); | 343 assert(s->block_last_index[n]>=0); |
344 | 344 |
345 if(s->alternate_scan) nCoeffs= 63; //FIXME | 345 if(s->alternate_scan) nCoeffs= 63; //FIXME |
346 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; | 346 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; |
347 | 347 |
348 if (n < 4) | 348 if (n < 4) |
349 block0 = block[0] * s->y_dc_scale; | 349 block0 = block[0] * s->y_dc_scale; |
350 else | 350 else |
351 block0 = block[0] * s->c_dc_scale; | 351 block0 = block[0] * s->c_dc_scale; |
352 quant_matrix = s->intra_matrix; | 352 quant_matrix = s->intra_matrix; |
353 asm volatile( | 353 asm volatile( |
392 | 392 |
393 "add $16, %%"REG_a" \n\t" | 393 "add $16, %%"REG_a" \n\t" |
394 "jng 1b \n\t" | 394 "jng 1b \n\t" |
395 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | 395 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) |
396 : "%"REG_a, "memory" | 396 : "%"REG_a, "memory" |
397 ); | 397 ); |
398 block[0]= block0; | 398 block[0]= block0; |
399 //Note, we dont do mismatch control for intra as errors cannot accumulate | 399 //Note, we dont do mismatch control for intra as errors cannot accumulate |
400 } | 400 } |
401 | 401 |
402 static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, | 402 static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, |
403 DCTELEM *block, int n, int qscale) | 403 DCTELEM *block, int n, int qscale) |
404 { | 404 { |
405 long nCoeffs; | 405 long nCoeffs; |
406 const uint16_t *quant_matrix; | 406 const uint16_t *quant_matrix; |
407 | 407 |
408 assert(s->block_last_index[n]>=0); | 408 assert(s->block_last_index[n]>=0); |
409 | 409 |
410 if(s->alternate_scan) nCoeffs= 63; //FIXME | 410 if(s->alternate_scan) nCoeffs= 63; //FIXME |
411 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; | 411 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; |
412 | 412 |
468 "pxor %%mm6, %%mm7 \n\t" | 468 "pxor %%mm6, %%mm7 \n\t" |
469 "pslld $31, %%mm7 \n\t" | 469 "pslld $31, %%mm7 \n\t" |
470 "psrlq $15, %%mm7 \n\t" | 470 "psrlq $15, %%mm7 \n\t" |
471 "pxor %%mm7, %%mm0 \n\t" | 471 "pxor %%mm7, %%mm0 \n\t" |
472 "movd %%mm0, 124(%0, %3) \n\t" | 472 "movd %%mm0, 124(%0, %3) \n\t" |
473 | 473 |
474 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) | 474 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) |
475 : "%"REG_a, "memory" | 475 : "%"REG_a, "memory" |
476 ); | 476 ); |
477 } | 477 } |
478 | 478 |
479 /* draw the edges of width 'w' of an image of size width, height | 479 /* draw the edges of width 'w' of an image of size width, height |
480 this mmx version can only handle w==8 || w==16 */ | 480 this mmx version can only handle w==8 || w==16 */ |
481 static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) | 481 static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) |
482 { | 482 { |
483 uint8_t *ptr, *last_line; | 483 uint8_t *ptr, *last_line; |
484 int i; | 484 int i; |
489 if(w==8) | 489 if(w==8) |
490 { | 490 { |
491 asm volatile( | 491 asm volatile( |
492 "1: \n\t" | 492 "1: \n\t" |
493 "movd (%0), %%mm0 \n\t" | 493 "movd (%0), %%mm0 \n\t" |
494 "punpcklbw %%mm0, %%mm0 \n\t" | 494 "punpcklbw %%mm0, %%mm0 \n\t" |
495 "punpcklwd %%mm0, %%mm0 \n\t" | 495 "punpcklwd %%mm0, %%mm0 \n\t" |
496 "punpckldq %%mm0, %%mm0 \n\t" | 496 "punpckldq %%mm0, %%mm0 \n\t" |
497 "movq %%mm0, -8(%0) \n\t" | 497 "movq %%mm0, -8(%0) \n\t" |
498 "movq -8(%0, %2), %%mm1 \n\t" | 498 "movq -8(%0, %2), %%mm1 \n\t" |
499 "punpckhbw %%mm1, %%mm1 \n\t" | 499 "punpckhbw %%mm1, %%mm1 \n\t" |
510 else | 510 else |
511 { | 511 { |
512 asm volatile( | 512 asm volatile( |
513 "1: \n\t" | 513 "1: \n\t" |
514 "movd (%0), %%mm0 \n\t" | 514 "movd (%0), %%mm0 \n\t" |
515 "punpcklbw %%mm0, %%mm0 \n\t" | 515 "punpcklbw %%mm0, %%mm0 \n\t" |
516 "punpcklwd %%mm0, %%mm0 \n\t" | 516 "punpcklwd %%mm0, %%mm0 \n\t" |
517 "punpckldq %%mm0, %%mm0 \n\t" | 517 "punpckldq %%mm0, %%mm0 \n\t" |
518 "movq %%mm0, -8(%0) \n\t" | 518 "movq %%mm0, -8(%0) \n\t" |
519 "movq %%mm0, -16(%0) \n\t" | 519 "movq %%mm0, -16(%0) \n\t" |
520 "movq -8(%0, %2), %%mm1 \n\t" | 520 "movq -8(%0, %2), %%mm1 \n\t" |
523 "punpckhdq %%mm1, %%mm1 \n\t" | 523 "punpckhdq %%mm1, %%mm1 \n\t" |
524 "movq %%mm1, (%0, %2) \n\t" | 524 "movq %%mm1, (%0, %2) \n\t" |
525 "movq %%mm1, 8(%0, %2) \n\t" | 525 "movq %%mm1, 8(%0, %2) \n\t" |
526 "add %1, %0 \n\t" | 526 "add %1, %0 \n\t" |
527 "cmp %3, %0 \n\t" | 527 "cmp %3, %0 \n\t" |
528 " jb 1b \n\t" | 528 " jb 1b \n\t" |
529 : "+r" (ptr) | 529 : "+r" (ptr) |
530 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) | 530 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) |
531 ); | 531 ); |
532 } | 532 } |
533 | 533 |
534 for(i=0;i<w;i+=4) { | 534 for(i=0;i<w;i+=4) { |
535 /* top and bottom (and hopefully also the corners) */ | 535 /* top and bottom (and hopefully also the corners) */ |
536 ptr= buf - (i + 1) * wrap - w; | 536 ptr= buf - (i + 1) * wrap - w; |
537 asm volatile( | 537 asm volatile( |
538 "1: \n\t" | 538 "1: \n\t" |
692 | 692 |
693 void MPV_common_init_mmx(MpegEncContext *s) | 693 void MPV_common_init_mmx(MpegEncContext *s) |
694 { | 694 { |
695 if (mm_flags & MM_MMX) { | 695 if (mm_flags & MM_MMX) { |
696 const int dct_algo = s->avctx->dct_algo; | 696 const int dct_algo = s->avctx->dct_algo; |
697 | 697 |
698 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx; | 698 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx; |
699 s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx; | 699 s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx; |
700 s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx; | 700 s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx; |
701 s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx; | 701 s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx; |
702 s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx; | 702 s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx; |
703 s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx; | 703 s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx; |
704 | 704 |
705 draw_edges = draw_edges_mmx; | 705 draw_edges = draw_edges_mmx; |
706 | 706 |
707 if (mm_flags & MM_SSE2) { | 707 if (mm_flags & MM_SSE2) { |
708 s->denoise_dct= denoise_dct_sse2; | 708 s->denoise_dct= denoise_dct_sse2; |
709 } else { | 709 } else { |
710 s->denoise_dct= denoise_dct_mmx; | 710 s->denoise_dct= denoise_dct_mmx; |
711 } | 711 } |