Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 853:eacc2dd8fd9d libavcodec
* using DSPContext - so each codec could use its local (sub)set of CPU extension
author | kabi |
---|---|
date | Mon, 11 Nov 2002 09:40:17 +0000 |
parents | e65798d228ea |
children | 725ef4ea3ecc |
comparison
equal
deleted
inserted
replaced
852:c01c98206ee6 | 853:eacc2dd8fd9d |
---|---|
20 */ | 20 */ |
21 | 21 |
22 #include "../dsputil.h" | 22 #include "../dsputil.h" |
23 | 23 |
24 int mm_flags; /* multimedia extension flags */ | 24 int mm_flags; /* multimedia extension flags */ |
25 | 25 /* FIXME use them in static form */ |
26 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | 26 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
27 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | 27 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
28 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | 28 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
29 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | 29 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
30 | 30 |
240 : "r" (block+64), "r" (stride) | 240 : "r" (block+64), "r" (stride) |
241 : "%eax" | 241 : "%eax" |
242 ); | 242 ); |
243 } | 243 } |
244 | 244 |
245 static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | 245 void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) |
246 { | 246 { |
247 const DCTELEM *p; | 247 const DCTELEM *p; |
248 UINT8 *pix; | 248 UINT8 *pix; |
249 | 249 |
250 /* read the pixels */ | 250 /* read the pixels */ |
295 "movq %%mm6, (%0, %2)\n\t" | 295 "movq %%mm6, (%0, %2)\n\t" |
296 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) | 296 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) |
297 :"memory"); | 297 :"memory"); |
298 } | 298 } |
299 | 299 |
300 static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | 300 void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) |
301 { | 301 { |
302 const DCTELEM *p; | 302 const DCTELEM *p; |
303 UINT8 *pix; | 303 UINT8 *pix; |
304 int i; | 304 int i; |
305 | 305 |
455 | 455 |
456 #if 0 | 456 #if 0 |
457 static void just_return() { return; } | 457 static void just_return() { return; } |
458 #endif | 458 #endif |
459 | 459 |
460 void dsputil_init_mmx(void) | 460 void dsputil_init_mmx(DSPContext* c, unsigned mask) |
461 { | 461 { |
462 mm_flags = mm_support(); | 462 mm_flags = mm_support(); |
463 #if 0 | 463 #if 0 |
464 fprintf(stderr, "libavcodec: CPU flags:"); | 464 fprintf(stderr, "libavcodec: CPU flags:"); |
465 if (mm_flags & MM_MMX) | 465 if (mm_flags & MM_MMX) |
474 fprintf(stderr, " sse2"); | 474 fprintf(stderr, " sse2"); |
475 fprintf(stderr, "\n"); | 475 fprintf(stderr, "\n"); |
476 #endif | 476 #endif |
477 | 477 |
478 if (mm_flags & MM_MMX) { | 478 if (mm_flags & MM_MMX) { |
479 get_pixels = get_pixels_mmx; | 479 c->get_pixels = get_pixels_mmx; |
480 diff_pixels = diff_pixels_mmx; | 480 c->diff_pixels = diff_pixels_mmx; |
481 put_pixels_clamped = put_pixels_clamped_mmx; | 481 c->put_pixels_clamped = put_pixels_clamped_mmx; |
482 add_pixels_clamped = add_pixels_clamped_mmx; | 482 c->add_pixels_clamped = add_pixels_clamped_mmx; |
483 clear_blocks= clear_blocks_mmx; | 483 c->clear_blocks = clear_blocks_mmx; |
484 pix_sum= pix_sum16_mmx; | 484 c->pix_sum = pix_sum16_mmx; |
485 | 485 |
486 pix_abs16x16 = pix_abs16x16_mmx; | 486 c->pix_abs16x16 = pix_abs16x16_mmx; |
487 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | 487 c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; |
488 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | 488 c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; |
489 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; | 489 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; |
490 pix_abs8x8 = pix_abs8x8_mmx; | 490 c->pix_abs8x8 = pix_abs8x8_mmx; |
491 pix_abs8x8_x2 = pix_abs8x8_x2_mmx; | 491 c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx; |
492 pix_abs8x8_y2 = pix_abs8x8_y2_mmx; | 492 c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx; |
493 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; | 493 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx; |
494 | 494 |
495 put_pixels_tab[0][0] = put_pixels16_mmx; | 495 c->put_pixels_tab[0][0] = put_pixels16_mmx; |
496 put_pixels_tab[0][1] = put_pixels16_x2_mmx; | 496 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx; |
497 put_pixels_tab[0][2] = put_pixels16_y2_mmx; | 497 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx; |
498 put_pixels_tab[0][3] = put_pixels16_xy2_mmx; | 498 c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx; |
499 | 499 |
500 put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx; | 500 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx; |
501 put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; | 501 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; |
502 put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; | 502 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; |
503 put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx; | 503 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx; |
504 | 504 |
505 avg_pixels_tab[0][0] = avg_pixels16_mmx; | 505 c->avg_pixels_tab[0][0] = avg_pixels16_mmx; |
506 avg_pixels_tab[0][1] = avg_pixels16_x2_mmx; | 506 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx; |
507 avg_pixels_tab[0][2] = avg_pixels16_y2_mmx; | 507 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx; |
508 avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; | 508 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; |
509 | 509 |
510 avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx; | 510 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx; |
511 avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx; | 511 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx; |
512 avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx; | 512 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx; |
513 avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx; | 513 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx; |
514 | 514 |
515 put_pixels_tab[1][0] = put_pixels8_mmx; | 515 c->put_pixels_tab[1][0] = put_pixels8_mmx; |
516 put_pixels_tab[1][1] = put_pixels8_x2_mmx; | 516 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx; |
517 put_pixels_tab[1][2] = put_pixels8_y2_mmx; | 517 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx; |
518 put_pixels_tab[1][3] = put_pixels8_xy2_mmx; | 518 c->put_pixels_tab[1][3] = put_pixels8_xy2_mmx; |
519 | 519 |
520 put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx; | 520 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx; |
521 put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; | 521 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; |
522 put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; | 522 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; |
523 put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx; | 523 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx; |
524 | 524 |
525 avg_pixels_tab[1][0] = avg_pixels8_mmx; | 525 c->avg_pixels_tab[1][0] = avg_pixels8_mmx; |
526 avg_pixels_tab[1][1] = avg_pixels8_x2_mmx; | 526 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx; |
527 avg_pixels_tab[1][2] = avg_pixels8_y2_mmx; | 527 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx; |
528 avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; | 528 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; |
529 | 529 |
530 avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx; | 530 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx; |
531 avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; | 531 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; |
532 avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; | 532 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; |
533 avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; | 533 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; |
534 | 534 |
535 if (mm_flags & MM_MMXEXT) { | 535 if (mm_flags & MM_MMXEXT) { |
536 pix_abs16x16 = pix_abs16x16_mmx2; | 536 c->pix_abs16x16 = pix_abs16x16_mmx2; |
537 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; | 537 c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; |
538 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; | 538 c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; |
539 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; | 539 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2; |
540 | 540 |
541 pix_abs8x8 = pix_abs8x8_mmx2; | 541 c->pix_abs8x8 = pix_abs8x8_mmx2; |
542 pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; | 542 c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; |
543 pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; | 543 c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; |
544 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2; | 544 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2; |
545 | 545 |
546 put_pixels_tab[0][1] = put_pixels16_x2_mmx2; | 546 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; |
547 put_pixels_tab[0][2] = put_pixels16_y2_mmx2; | 547 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; |
548 put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; | 548 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; |
549 put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; | 549 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; |
550 | 550 |
551 avg_pixels_tab[0][0] = avg_pixels16_mmx2; | 551 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; |
552 avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; | 552 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; |
553 avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; | 553 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; |
554 avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; | 554 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; |
555 | 555 |
556 put_pixels_tab[1][1] = put_pixels8_x2_mmx2; | 556 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; |
557 put_pixels_tab[1][2] = put_pixels8_y2_mmx2; | 557 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; |
558 put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; | 558 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; |
559 put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; | 559 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; |
560 | 560 |
561 avg_pixels_tab[1][0] = avg_pixels8_mmx2; | 561 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; |
562 avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; | 562 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; |
563 avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; | 563 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; |
564 avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; | 564 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; |
565 } else if (mm_flags & MM_3DNOW) { | 565 } else if (mm_flags & MM_3DNOW) { |
566 put_pixels_tab[0][1] = put_pixels16_x2_3dnow; | 566 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; |
567 put_pixels_tab[0][2] = put_pixels16_y2_3dnow; | 567 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; |
568 put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; | 568 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; |
569 put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; | 569 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; |
570 | 570 |
571 avg_pixels_tab[0][0] = avg_pixels16_3dnow; | 571 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; |
572 avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; | 572 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; |
573 avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; | 573 c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; |
574 avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; | 574 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; |
575 | 575 |
576 put_pixels_tab[1][1] = put_pixels8_x2_3dnow; | 576 c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; |
577 put_pixels_tab[1][2] = put_pixels8_y2_3dnow; | 577 c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow; |
578 put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; | 578 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; |
579 put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; | 579 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; |
580 | 580 |
581 avg_pixels_tab[1][0] = avg_pixels8_3dnow; | 581 c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; |
582 avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; | 582 c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; |
583 avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; | 583 c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; |
584 avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; | 584 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; |
585 } | 585 } |
586 } | 586 } |
587 | 587 |
588 #if 0 | 588 #if 0 |
589 // for speed testing | 589 // for speed testing |
622 } | 622 } |
623 | 623 |
624 /* remove any non bit exact operation (testing purpose). NOTE that | 624 /* remove any non bit exact operation (testing purpose). NOTE that |
625 this function should be kept as small as possible because it is | 625 this function should be kept as small as possible because it is |
626 always difficult to test automatically non bit exact cases. */ | 626 always difficult to test automatically non bit exact cases. */ |
627 void dsputil_set_bit_exact_mmx(void) | 627 void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask) |
628 { | 628 { |
629 if (mm_flags & MM_MMX) { | 629 if (mm_flags & MM_MMX) { |
630 | |
631 /* MMX2 & 3DNOW */ | 630 /* MMX2 & 3DNOW */ |
632 put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; | 631 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; |
633 put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; | 632 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; |
634 avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; | 633 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; |
635 put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; | 634 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; |
636 put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; | 635 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; |
637 avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; | 636 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; |
638 | 637 |
639 if (mm_flags & MM_MMXEXT) { | 638 if (mm_flags & MM_MMXEXT) { |
640 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | 639 c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; |
641 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | 640 c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; |
642 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; | 641 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; |
643 pix_abs8x8_x2 = pix_abs8x8_x2_mmx; | 642 c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx; |
644 pix_abs8x8_y2 = pix_abs8x8_y2_mmx; | 643 c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx; |
645 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; | 644 c->pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; |
646 } | 645 } |
647 } | 646 } |
648 } | 647 } |