Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 5:4479bcab253e libavcodec
suppressed no longer needed emms()
author | glantau |
---|---|
date | Tue, 24 Jul 2001 20:38:55 +0000 |
parents | 986e461dc072 |
children | 1b4461b5a7fb |
comparison
equal
deleted
inserted
replaced
4:b8374040680d | 5:4479bcab253e |
---|---|
18 * | 18 * |
19 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | 19 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
20 */ | 20 */ |
21 | 21 |
22 #include "../dsputil.h" | 22 #include "../dsputil.h" |
23 | |
24 int mm_flags; /* multimedia extension flags */ | |
23 | 25 |
24 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); | 26 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); |
25 int pix_abs16x16_sse(UINT8 *blk1, UINT8 *blk2, int lx, int h); | 27 int pix_abs16x16_sse(UINT8 *blk1, UINT8 *blk2, int lx, int h); |
26 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); | 28 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); |
27 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); | 29 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); |
87 :"m"(*pix), "m"(*(pix+line_size)) | 89 :"m"(*pix), "m"(*(pix+line_size)) |
88 :"memory"); | 90 :"memory"); |
89 pix += line_size*2; | 91 pix += line_size*2; |
90 p += 16; | 92 p += 16; |
91 } | 93 } |
92 emms(); | |
93 } | 94 } |
94 | 95 |
95 static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | 96 static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) |
96 { | 97 { |
97 const DCTELEM *p; | 98 const DCTELEM *p; |
124 :"m"(*p) | 125 :"m"(*p) |
125 :"memory"); | 126 :"memory"); |
126 pix += line_size*4; | 127 pix += line_size*4; |
127 p += 32; | 128 p += 32; |
128 } | 129 } |
129 emms(); | |
130 } | 130 } |
131 | 131 |
132 static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | 132 static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) |
133 { | 133 { |
134 const DCTELEM *p; | 134 const DCTELEM *p; |
165 :"m"(*p) | 165 :"m"(*p) |
166 :"memory"); | 166 :"memory"); |
167 pix += line_size*2; | 167 pix += line_size*2; |
168 p += 16; | 168 p += 16; |
169 } | 169 } |
170 emms(); | |
171 } | 170 } |
172 | 171 |
173 static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 172 static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
174 { | 173 { |
175 int dh, hh; | 174 int dh, hh; |
203 :"m"(*pix) | 202 :"m"(*pix) |
204 :"memory"); | 203 :"memory"); |
205 pix = pix + line_size; | 204 pix = pix + line_size; |
206 p = p + line_size; | 205 p = p + line_size; |
207 } | 206 } |
208 emms(); | |
209 } | 207 } |
210 | 208 |
211 static void put_pixels_x2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 209 static void put_pixels_x2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
212 { | 210 { |
213 UINT8 *p; | 211 UINT8 *p; |
239 :"=m"(*p) | 237 :"=m"(*p) |
240 :"m"(*pix) | 238 :"m"(*pix) |
241 :"memory"); | 239 :"memory"); |
242 pix += line_size; p += line_size; | 240 pix += line_size; p += line_size; |
243 } while (--h); | 241 } while (--h); |
244 emms(); | |
245 } | 242 } |
246 | 243 |
247 static void put_pixels_y2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 244 static void put_pixels_y2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
248 { | 245 { |
249 UINT8 *p; | 246 UINT8 *p; |
277 "m"(*(pix+line_size)) | 274 "m"(*(pix+line_size)) |
278 :"memory"); | 275 :"memory"); |
279 pix += line_size; | 276 pix += line_size; |
280 p += line_size; | 277 p += line_size; |
281 } while (--h); | 278 } while (--h); |
282 emms(); | |
283 } | 279 } |
284 | 280 |
285 static void put_pixels_xy2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 281 static void put_pixels_xy2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
286 { | 282 { |
287 UINT8 *p; | 283 UINT8 *p; |
327 "m"(*(pix+line_size)) | 323 "m"(*(pix+line_size)) |
328 :"memory"); | 324 :"memory"); |
329 pix += line_size; | 325 pix += line_size; |
330 p += line_size; | 326 p += line_size; |
331 } while(--h); | 327 } while(--h); |
332 emms(); | |
333 } | 328 } |
334 | 329 |
335 static void put_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | 330 static void put_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) |
336 { | 331 { |
337 UINT8 *p; | 332 UINT8 *p; |
359 :"m"(*pix) | 354 :"m"(*pix) |
360 :"memory"); | 355 :"memory"); |
361 pix += line_size; | 356 pix += line_size; |
362 p += line_size; | 357 p += line_size; |
363 } while (--h); | 358 } while (--h); |
364 emms(); | |
365 } | 359 } |
366 | 360 |
367 static void put_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | 361 static void put_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) |
368 { | 362 { |
369 UINT8 *p; | 363 UINT8 *p; |
392 "m"(*(pix+line_size)) | 386 "m"(*(pix+line_size)) |
393 :"memory"); | 387 :"memory"); |
394 pix += line_size; | 388 pix += line_size; |
395 p += line_size; | 389 p += line_size; |
396 } while(--h); | 390 } while(--h); |
397 emms(); | |
398 } | 391 } |
399 | 392 |
400 static void put_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | 393 static void put_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) |
401 { | 394 { |
402 UINT8 *p; | 395 UINT8 *p; |
442 "m"(*(pix+line_size)) | 435 "m"(*(pix+line_size)) |
443 :"memory"); | 436 :"memory"); |
444 pix += line_size; | 437 pix += line_size; |
445 p += line_size; | 438 p += line_size; |
446 } while(--h); | 439 } while(--h); |
447 emms(); | |
448 } | 440 } |
449 | 441 |
450 static void avg_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 442 static void avg_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
451 { | 443 { |
452 UINT8 *p; | 444 UINT8 *p; |
480 :"memory"); | 472 :"memory"); |
481 pix += line_size; | 473 pix += line_size; |
482 p += line_size; | 474 p += line_size; |
483 } | 475 } |
484 while (--h); | 476 while (--h); |
485 emms(); | |
486 } | 477 } |
487 | 478 |
488 static void avg_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | 479 static void avg_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) |
489 { | 480 { |
490 UINT8 *p; | 481 UINT8 *p; |
527 :"m"(*pix) | 518 :"m"(*pix) |
528 :"memory"); | 519 :"memory"); |
529 pix += line_size; | 520 pix += line_size; |
530 p += line_size; | 521 p += line_size; |
531 } while (--h); | 522 } while (--h); |
532 emms(); | |
533 } | 523 } |
534 | 524 |
535 static void avg_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | 525 static void avg_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) |
536 { | 526 { |
537 UINT8 *p; | 527 UINT8 *p; |
574 :"m"(*pix), "m"(*(pix+line_size)) | 564 :"m"(*pix), "m"(*(pix+line_size)) |
575 :"memory"); | 565 :"memory"); |
576 pix += line_size; | 566 pix += line_size; |
577 p += line_size ; | 567 p += line_size ; |
578 } while(--h); | 568 } while(--h); |
579 emms(); | |
580 } | 569 } |
581 | 570 |
582 static void avg_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | 571 static void avg_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) |
583 { | 572 { |
584 UINT8 *p; | 573 UINT8 *p; |
635 "m"(*(pix+line_size)), "m"(mm_wone[0]) | 624 "m"(*(pix+line_size)), "m"(mm_wone[0]) |
636 :"memory"); | 625 :"memory"); |
637 pix += line_size; | 626 pix += line_size; |
638 p += line_size ; | 627 p += line_size ; |
639 } while(--h); | 628 } while(--h); |
640 emms(); | |
641 } | 629 } |
642 | 630 |
643 static void avg_no_rnd_pixels_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | 631 static void avg_no_rnd_pixels_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) |
644 { | 632 { |
645 UINT8 *p; | 633 UINT8 *p; |
667 :"m"(*pix) | 655 :"m"(*pix) |
668 :"memory"); | 656 :"memory"); |
669 pix += line_size; | 657 pix += line_size; |
670 p += line_size ; | 658 p += line_size ; |
671 } while (--h); | 659 } while (--h); |
672 emms(); | |
673 } | 660 } |
674 | 661 |
675 static void avg_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | 662 static void avg_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) |
676 { | 663 { |
677 UINT8 *p; | 664 UINT8 *p; |
708 :"m"(*pix) | 695 :"m"(*pix) |
709 :"memory"); | 696 :"memory"); |
710 pix += line_size; | 697 pix += line_size; |
711 p += line_size; | 698 p += line_size; |
712 } while (--h); | 699 } while (--h); |
713 emms(); | |
714 } | 700 } |
715 | 701 |
716 static void avg_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | 702 static void avg_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) |
717 { | 703 { |
718 UINT8 *p; | 704 UINT8 *p; |
749 :"m"(*pix), "m"(*(pix+line_size)) | 735 :"m"(*pix), "m"(*(pix+line_size)) |
750 :"memory"); | 736 :"memory"); |
751 pix += line_size; | 737 pix += line_size; |
752 p += line_size ; | 738 p += line_size ; |
753 } while(--h); | 739 } while(--h); |
754 emms(); | |
755 } | 740 } |
756 | 741 |
757 static void avg_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | 742 static void avg_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) |
758 { | 743 { |
759 UINT8 *p; | 744 UINT8 *p; |
807 "m"(*(pix+line_size)) | 792 "m"(*(pix+line_size)) |
808 :"memory"); | 793 :"memory"); |
809 pix += line_size; | 794 pix += line_size; |
810 p += line_size; | 795 p += line_size; |
811 } while(--h); | 796 } while(--h); |
812 emms(); | |
813 } | 797 } |
814 | 798 |
815 static void sub_pixels_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) | 799 static void sub_pixels_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) |
816 { | 800 { |
817 DCTELEM *p; | 801 DCTELEM *p; |
835 :"m"(*pix) | 819 :"m"(*pix) |
836 :"memory"); | 820 :"memory"); |
837 pix += line_size; | 821 pix += line_size; |
838 p += 8; | 822 p += 8; |
839 } while (--h); | 823 } while (--h); |
840 emms(); | |
841 } | 824 } |
842 | 825 |
843 static void sub_pixels_x2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) | 826 static void sub_pixels_x2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) |
844 { | 827 { |
845 DCTELEM *p; | 828 DCTELEM *p; |
876 :"m"(*pix) | 859 :"m"(*pix) |
877 :"memory"); | 860 :"memory"); |
878 pix += line_size; | 861 pix += line_size; |
879 p += 8; | 862 p += 8; |
880 } while (--h); | 863 } while (--h); |
881 emms(); | |
882 } | 864 } |
883 | 865 |
884 static void sub_pixels_y2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) | 866 static void sub_pixels_y2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) |
885 { | 867 { |
886 DCTELEM *p; | 868 DCTELEM *p; |
917 :"m"(*pix), "m"(*(pix+line_size)) | 899 :"m"(*pix), "m"(*(pix+line_size)) |
918 :"memory"); | 900 :"memory"); |
919 pix += line_size; | 901 pix += line_size; |
920 p += 8; | 902 p += 8; |
921 } while (--h); | 903 } while (--h); |
922 emms(); | |
923 } | 904 } |
924 | 905 |
925 static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) | 906 static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) |
926 { | 907 { |
927 DCTELEM *p; | 908 DCTELEM *p; |
971 "m"(*(pix+line_size)) | 952 "m"(*(pix+line_size)) |
972 :"memory"); | 953 :"memory"); |
973 pix += line_size; | 954 pix += line_size; |
974 p += 8 ; | 955 p += 8 ; |
975 } while(--h); | 956 } while(--h); |
976 emms(); | |
977 } | 957 } |
978 | 958 |
979 void dsputil_init_mmx(void) | 959 void dsputil_init_mmx(void) |
980 { | 960 { |
981 mm_flags = mm_support(); | 961 mm_flags = mm_support(); |