comparison i386/dsputil_mmx.c @ 6437:5154ab444372 libavcodec

move draw_edges() into dsputil
author aurel
date Tue, 04 Mar 2008 00:07:41 +0000
parents 9a736918fd90
children 33ac9c5524cc
comparison
equal deleted inserted replaced
6436:6491d3284f89 6437:5154ab444372
691 :: "r" (src), 691 :: "r" (src),
692 "r" (src + 4*stride), 692 "r" (src + 4*stride),
693 "r" ((long) stride ), 693 "r" ((long) stride ),
694 "r" ((long)(3*stride)) 694 "r" ((long)(3*stride))
695 ); 695 );
696 }
697 }
698
699 /* draw the edges of width 'w' of an image of size width, height
700 this mmx version can only handle w==8 || w==16 */
701 static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
702 {
703 uint8_t *ptr, *last_line;
704 int i;
705
706 last_line = buf + (height - 1) * wrap;
707 /* left and right */
708 ptr = buf;
709 if(w==8)
710 {
711 asm volatile(
712 "1: \n\t"
713 "movd (%0), %%mm0 \n\t"
714 "punpcklbw %%mm0, %%mm0 \n\t"
715 "punpcklwd %%mm0, %%mm0 \n\t"
716 "punpckldq %%mm0, %%mm0 \n\t"
717 "movq %%mm0, -8(%0) \n\t"
718 "movq -8(%0, %2), %%mm1 \n\t"
719 "punpckhbw %%mm1, %%mm1 \n\t"
720 "punpckhwd %%mm1, %%mm1 \n\t"
721 "punpckhdq %%mm1, %%mm1 \n\t"
722 "movq %%mm1, (%0, %2) \n\t"
723 "add %1, %0 \n\t"
724 "cmp %3, %0 \n\t"
725 " jb 1b \n\t"
726 : "+r" (ptr)
727 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
728 );
729 }
730 else
731 {
732 asm volatile(
733 "1: \n\t"
734 "movd (%0), %%mm0 \n\t"
735 "punpcklbw %%mm0, %%mm0 \n\t"
736 "punpcklwd %%mm0, %%mm0 \n\t"
737 "punpckldq %%mm0, %%mm0 \n\t"
738 "movq %%mm0, -8(%0) \n\t"
739 "movq %%mm0, -16(%0) \n\t"
740 "movq -8(%0, %2), %%mm1 \n\t"
741 "punpckhbw %%mm1, %%mm1 \n\t"
742 "punpckhwd %%mm1, %%mm1 \n\t"
743 "punpckhdq %%mm1, %%mm1 \n\t"
744 "movq %%mm1, (%0, %2) \n\t"
745 "movq %%mm1, 8(%0, %2) \n\t"
746 "add %1, %0 \n\t"
747 "cmp %3, %0 \n\t"
748 " jb 1b \n\t"
749 : "+r" (ptr)
750 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
751 );
752 }
753
754 for(i=0;i<w;i+=4) {
755 /* top and bottom (and hopefully also the corners) */
756 ptr= buf - (i + 1) * wrap - w;
757 asm volatile(
758 "1: \n\t"
759 "movq (%1, %0), %%mm0 \n\t"
760 "movq %%mm0, (%0) \n\t"
761 "movq %%mm0, (%0, %2) \n\t"
762 "movq %%mm0, (%0, %2, 2) \n\t"
763 "movq %%mm0, (%0, %3) \n\t"
764 "add $8, %0 \n\t"
765 "cmp %4, %0 \n\t"
766 " jb 1b \n\t"
767 : "+r" (ptr)
768 : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
769 );
770 ptr= last_line + (i + 1) * wrap - w;
771 asm volatile(
772 "1: \n\t"
773 "movq (%1, %0), %%mm0 \n\t"
774 "movq %%mm0, (%0) \n\t"
775 "movq %%mm0, (%0, %2) \n\t"
776 "movq %%mm0, (%0, %2, 2) \n\t"
777 "movq %%mm0, (%0, %3) \n\t"
778 "add $8, %0 \n\t"
779 "cmp %4, %0 \n\t"
780 " jb 1b \n\t"
781 : "+r" (ptr)
782 : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
783 );
696 } 784 }
697 } 785 }
698 786
699 #define PAETH(cpu, abs3)\ 787 #define PAETH(cpu, abs3)\
700 void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\ 788 void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
2073 c->gmc= gmc_mmx; 2161 c->gmc= gmc_mmx;
2074 2162
2075 c->add_bytes= add_bytes_mmx; 2163 c->add_bytes= add_bytes_mmx;
2076 c->add_bytes_l2= add_bytes_l2_mmx; 2164 c->add_bytes_l2= add_bytes_l2_mmx;
2077 2165
2166 c->draw_edges = draw_edges_mmx;
2167
2078 if (ENABLE_ANY_H263) { 2168 if (ENABLE_ANY_H263) {
2079 c->h263_v_loop_filter= h263_v_loop_filter_mmx; 2169 c->h263_v_loop_filter= h263_v_loop_filter_mmx;
2080 c->h263_h_loop_filter= h263_h_loop_filter_mmx; 2170 c->h263_h_loop_filter= h263_h_loop_filter_mmx;
2081 } 2171 }
2082 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_rnd; 2172 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_rnd;