Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 6437:5154ab444372 libavcodec
move draw_edges() into dsputil
author | aurel |
---|---|
date | Tue, 04 Mar 2008 00:07:41 +0000 |
parents | 9a736918fd90 |
children | 33ac9c5524cc |
comparison
equal
deleted
inserted
replaced
6436:6491d3284f89 | 6437:5154ab444372 |
---|---|
691 :: "r" (src), | 691 :: "r" (src), |
692 "r" (src + 4*stride), | 692 "r" (src + 4*stride), |
693 "r" ((long) stride ), | 693 "r" ((long) stride ), |
694 "r" ((long)(3*stride)) | 694 "r" ((long)(3*stride)) |
695 ); | 695 ); |
696 } | |
697 } | |
698 | |
699 /* draw the edges of width 'w' of an image of size width, height | |
700 this mmx version can only handle w==8 || w==16 */ | |
701 static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) | |
702 { | |
703 uint8_t *ptr, *last_line; | |
704 int i; | |
705 | |
706 last_line = buf + (height - 1) * wrap; | |
707 /* left and right */ | |
708 ptr = buf; | |
709 if(w==8) | |
710 { | |
711 asm volatile( | |
712 "1: \n\t" | |
713 "movd (%0), %%mm0 \n\t" | |
714 "punpcklbw %%mm0, %%mm0 \n\t" | |
715 "punpcklwd %%mm0, %%mm0 \n\t" | |
716 "punpckldq %%mm0, %%mm0 \n\t" | |
717 "movq %%mm0, -8(%0) \n\t" | |
718 "movq -8(%0, %2), %%mm1 \n\t" | |
719 "punpckhbw %%mm1, %%mm1 \n\t" | |
720 "punpckhwd %%mm1, %%mm1 \n\t" | |
721 "punpckhdq %%mm1, %%mm1 \n\t" | |
722 "movq %%mm1, (%0, %2) \n\t" | |
723 "add %1, %0 \n\t" | |
724 "cmp %3, %0 \n\t" | |
725 " jb 1b \n\t" | |
726 : "+r" (ptr) | |
727 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) | |
728 ); | |
729 } | |
730 else | |
731 { | |
732 asm volatile( | |
733 "1: \n\t" | |
734 "movd (%0), %%mm0 \n\t" | |
735 "punpcklbw %%mm0, %%mm0 \n\t" | |
736 "punpcklwd %%mm0, %%mm0 \n\t" | |
737 "punpckldq %%mm0, %%mm0 \n\t" | |
738 "movq %%mm0, -8(%0) \n\t" | |
739 "movq %%mm0, -16(%0) \n\t" | |
740 "movq -8(%0, %2), %%mm1 \n\t" | |
741 "punpckhbw %%mm1, %%mm1 \n\t" | |
742 "punpckhwd %%mm1, %%mm1 \n\t" | |
743 "punpckhdq %%mm1, %%mm1 \n\t" | |
744 "movq %%mm1, (%0, %2) \n\t" | |
745 "movq %%mm1, 8(%0, %2) \n\t" | |
746 "add %1, %0 \n\t" | |
747 "cmp %3, %0 \n\t" | |
748 " jb 1b \n\t" | |
749 : "+r" (ptr) | |
750 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) | |
751 ); | |
752 } | |
753 | |
754 for(i=0;i<w;i+=4) { | |
755 /* top and bottom (and hopefully also the corners) */ | |
756 ptr= buf - (i + 1) * wrap - w; | |
757 asm volatile( | |
758 "1: \n\t" | |
759 "movq (%1, %0), %%mm0 \n\t" | |
760 "movq %%mm0, (%0) \n\t" | |
761 "movq %%mm0, (%0, %2) \n\t" | |
762 "movq %%mm0, (%0, %2, 2) \n\t" | |
763 "movq %%mm0, (%0, %3) \n\t" | |
764 "add $8, %0 \n\t" | |
765 "cmp %4, %0 \n\t" | |
766 " jb 1b \n\t" | |
767 : "+r" (ptr) | |
768 : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w) | |
769 ); | |
770 ptr= last_line + (i + 1) * wrap - w; | |
771 asm volatile( | |
772 "1: \n\t" | |
773 "movq (%1, %0), %%mm0 \n\t" | |
774 "movq %%mm0, (%0) \n\t" | |
775 "movq %%mm0, (%0, %2) \n\t" | |
776 "movq %%mm0, (%0, %2, 2) \n\t" | |
777 "movq %%mm0, (%0, %3) \n\t" | |
778 "add $8, %0 \n\t" | |
779 "cmp %4, %0 \n\t" | |
780 " jb 1b \n\t" | |
781 : "+r" (ptr) | |
782 : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w) | |
783 ); | |
696 } | 784 } |
697 } | 785 } |
698 | 786 |
699 #define PAETH(cpu, abs3)\ | 787 #define PAETH(cpu, abs3)\ |
700 void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\ | 788 void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\ |
2073 c->gmc= gmc_mmx; | 2161 c->gmc= gmc_mmx; |
2074 | 2162 |
2075 c->add_bytes= add_bytes_mmx; | 2163 c->add_bytes= add_bytes_mmx; |
2076 c->add_bytes_l2= add_bytes_l2_mmx; | 2164 c->add_bytes_l2= add_bytes_l2_mmx; |
2077 | 2165 |
2166 c->draw_edges = draw_edges_mmx; | |
2167 | |
2078 if (ENABLE_ANY_H263) { | 2168 if (ENABLE_ANY_H263) { |
2079 c->h263_v_loop_filter= h263_v_loop_filter_mmx; | 2169 c->h263_v_loop_filter= h263_v_loop_filter_mmx; |
2080 c->h263_h_loop_filter= h263_h_loop_filter_mmx; | 2170 c->h263_h_loop_filter= h263_h_loop_filter_mmx; |
2081 } | 2171 } |
2082 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_rnd; | 2172 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_rnd; |