comparison i386/dsputil_mmx.c @ 853:eacc2dd8fd9d libavcodec

* using DSPContext - so each codec could use its local (sub)set of CPU extension
author kabi
date Mon, 11 Nov 2002 09:40:17 +0000
parents e65798d228ea
children 725ef4ea3ecc
comparison
equal deleted inserted replaced
852:c01c98206ee6 853:eacc2dd8fd9d
20 */ 20 */
21 21
22 #include "../dsputil.h" 22 #include "../dsputil.h"
23 23
24 int mm_flags; /* multimedia extension flags */ 24 int mm_flags; /* multimedia extension flags */
25 25 /* FIXME use them in static form */
26 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx); 26 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
27 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); 27 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
28 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); 28 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
29 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); 29 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
30 30
240 : "r" (block+64), "r" (stride) 240 : "r" (block+64), "r" (stride)
241 : "%eax" 241 : "%eax"
242 ); 242 );
243 } 243 }
244 244
245 static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) 245 void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
246 { 246 {
247 const DCTELEM *p; 247 const DCTELEM *p;
248 UINT8 *pix; 248 UINT8 *pix;
249 249
250 /* read the pixels */ 250 /* read the pixels */
295 "movq %%mm6, (%0, %2)\n\t" 295 "movq %%mm6, (%0, %2)\n\t"
296 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) 296 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p)
297 :"memory"); 297 :"memory");
298 } 298 }
299 299
300 static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) 300 void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
301 { 301 {
302 const DCTELEM *p; 302 const DCTELEM *p;
303 UINT8 *pix; 303 UINT8 *pix;
304 int i; 304 int i;
305 305
455 455
456 #if 0 456 #if 0
457 static void just_return() { return; } 457 static void just_return() { return; }
458 #endif 458 #endif
459 459
460 void dsputil_init_mmx(void) 460 void dsputil_init_mmx(DSPContext* c, unsigned mask)
461 { 461 {
462 mm_flags = mm_support(); 462 mm_flags = mm_support();
463 #if 0 463 #if 0
464 fprintf(stderr, "libavcodec: CPU flags:"); 464 fprintf(stderr, "libavcodec: CPU flags:");
465 if (mm_flags & MM_MMX) 465 if (mm_flags & MM_MMX)
474 fprintf(stderr, " sse2"); 474 fprintf(stderr, " sse2");
475 fprintf(stderr, "\n"); 475 fprintf(stderr, "\n");
476 #endif 476 #endif
477 477
478 if (mm_flags & MM_MMX) { 478 if (mm_flags & MM_MMX) {
479 get_pixels = get_pixels_mmx; 479 c->get_pixels = get_pixels_mmx;
480 diff_pixels = diff_pixels_mmx; 480 c->diff_pixels = diff_pixels_mmx;
481 put_pixels_clamped = put_pixels_clamped_mmx; 481 c->put_pixels_clamped = put_pixels_clamped_mmx;
482 add_pixels_clamped = add_pixels_clamped_mmx; 482 c->add_pixels_clamped = add_pixels_clamped_mmx;
483 clear_blocks= clear_blocks_mmx; 483 c->clear_blocks = clear_blocks_mmx;
484 pix_sum= pix_sum16_mmx; 484 c->pix_sum = pix_sum16_mmx;
485 485
486 pix_abs16x16 = pix_abs16x16_mmx; 486 c->pix_abs16x16 = pix_abs16x16_mmx;
487 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; 487 c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
488 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; 488 c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
489 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; 489 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
490 pix_abs8x8 = pix_abs8x8_mmx; 490 c->pix_abs8x8 = pix_abs8x8_mmx;
491 pix_abs8x8_x2 = pix_abs8x8_x2_mmx; 491 c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
492 pix_abs8x8_y2 = pix_abs8x8_y2_mmx; 492 c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
493 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; 493 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx;
494 494
495 put_pixels_tab[0][0] = put_pixels16_mmx; 495 c->put_pixels_tab[0][0] = put_pixels16_mmx;
496 put_pixels_tab[0][1] = put_pixels16_x2_mmx; 496 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx;
497 put_pixels_tab[0][2] = put_pixels16_y2_mmx; 497 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx;
498 put_pixels_tab[0][3] = put_pixels16_xy2_mmx; 498 c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx;
499 499
500 put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx; 500 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx;
501 put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; 501 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
502 put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; 502 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
503 put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx; 503 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx;
504 504
505 avg_pixels_tab[0][0] = avg_pixels16_mmx; 505 c->avg_pixels_tab[0][0] = avg_pixels16_mmx;
506 avg_pixels_tab[0][1] = avg_pixels16_x2_mmx; 506 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx;
507 avg_pixels_tab[0][2] = avg_pixels16_y2_mmx; 507 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx;
508 avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; 508 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
509 509
510 avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx; 510 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx;
511 avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx; 511 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx;
512 avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx; 512 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx;
513 avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx; 513 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx;
514 514
515 put_pixels_tab[1][0] = put_pixels8_mmx; 515 c->put_pixels_tab[1][0] = put_pixels8_mmx;
516 put_pixels_tab[1][1] = put_pixels8_x2_mmx; 516 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx;
517 put_pixels_tab[1][2] = put_pixels8_y2_mmx; 517 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx;
518 put_pixels_tab[1][3] = put_pixels8_xy2_mmx; 518 c->put_pixels_tab[1][3] = put_pixels8_xy2_mmx;
519 519
520 put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx; 520 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx;
521 put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; 521 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
522 put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; 522 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
523 put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx; 523 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx;
524 524
525 avg_pixels_tab[1][0] = avg_pixels8_mmx; 525 c->avg_pixels_tab[1][0] = avg_pixels8_mmx;
526 avg_pixels_tab[1][1] = avg_pixels8_x2_mmx; 526 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx;
527 avg_pixels_tab[1][2] = avg_pixels8_y2_mmx; 527 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
528 avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; 528 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
529 529
530 avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx; 530 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx;
531 avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; 531 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
532 avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; 532 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
533 avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; 533 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
534 534
535 if (mm_flags & MM_MMXEXT) { 535 if (mm_flags & MM_MMXEXT) {
536 pix_abs16x16 = pix_abs16x16_mmx2; 536 c->pix_abs16x16 = pix_abs16x16_mmx2;
537 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; 537 c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
538 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; 538 c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
539 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; 539 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2;
540 540
541 pix_abs8x8 = pix_abs8x8_mmx2; 541 c->pix_abs8x8 = pix_abs8x8_mmx2;
542 pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; 542 c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
543 pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; 543 c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
544 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2; 544 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
545 545
546 put_pixels_tab[0][1] = put_pixels16_x2_mmx2; 546 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
547 put_pixels_tab[0][2] = put_pixels16_y2_mmx2; 547 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
548 put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; 548 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
549 put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; 549 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
550 550
551 avg_pixels_tab[0][0] = avg_pixels16_mmx2; 551 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
552 avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; 552 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
553 avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; 553 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
554 avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; 554 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
555 555
556 put_pixels_tab[1][1] = put_pixels8_x2_mmx2; 556 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
557 put_pixels_tab[1][2] = put_pixels8_y2_mmx2; 557 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
558 put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; 558 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
559 put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; 559 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
560 560
561 avg_pixels_tab[1][0] = avg_pixels8_mmx2; 561 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
562 avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; 562 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
563 avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; 563 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
564 avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; 564 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
565 } else if (mm_flags & MM_3DNOW) { 565 } else if (mm_flags & MM_3DNOW) {
566 put_pixels_tab[0][1] = put_pixels16_x2_3dnow; 566 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
567 put_pixels_tab[0][2] = put_pixels16_y2_3dnow; 567 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
568 put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; 568 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
569 put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; 569 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
570 570
571 avg_pixels_tab[0][0] = avg_pixels16_3dnow; 571 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
572 avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; 572 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
573 avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; 573 c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
574 avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; 574 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
575 575
576 put_pixels_tab[1][1] = put_pixels8_x2_3dnow; 576 c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
577 put_pixels_tab[1][2] = put_pixels8_y2_3dnow; 577 c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
578 put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; 578 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
579 put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; 579 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
580 580
581 avg_pixels_tab[1][0] = avg_pixels8_3dnow; 581 c->avg_pixels_tab[1][0] = avg_pixels8_3dnow;
582 avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; 582 c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
583 avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; 583 c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
584 avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; 584 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
585 } 585 }
586 } 586 }
587 587
588 #if 0 588 #if 0
589 // for speed testing 589 // for speed testing
622 } 622 }
623 623
624 /* remove any non bit exact operation (testing purpose). NOTE that 624 /* remove any non bit exact operation (testing purpose). NOTE that
625 this function should be kept as small as possible because it is 625 this function should be kept as small as possible because it is
626 always difficult to test automatically non bit exact cases. */ 626 always difficult to test automatically non bit exact cases. */
627 void dsputil_set_bit_exact_mmx(void) 627 void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask)
628 { 628 {
629 if (mm_flags & MM_MMX) { 629 if (mm_flags & MM_MMX) {
630
631 /* MMX2 & 3DNOW */ 630 /* MMX2 & 3DNOW */
632 put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; 631 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
633 put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; 632 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
634 avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; 633 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
635 put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; 634 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
636 put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; 635 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
637 avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; 636 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
638 637
639 if (mm_flags & MM_MMXEXT) { 638 if (mm_flags & MM_MMXEXT) {
640 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; 639 c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
641 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; 640 c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
642 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; 641 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
643 pix_abs8x8_x2 = pix_abs8x8_x2_mmx; 642 c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
644 pix_abs8x8_y2 = pix_abs8x8_y2_mmx; 643 c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
645 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; 644 c->pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
646 } 645 }
647 } 646 }
648 } 647 }