comparison libswscale/rgb2rgb_template.c @ 28994:a03804d10dbf

Average chroma of 2 lines in packed 422 -> planar 420.
author michael
date Sat, 21 Mar 2009 23:45:20 +0000
parents e67354af521e
children d50adcfcf99c
comparison
equal deleted inserted replaced
28993:56d2b2886a28 28994:a03804d10dbf
2776 dst1[count]= src[4*count+2]; 2776 dst1[count]= src[4*count+2];
2777 count++; 2777 count++;
2778 } 2778 }
2779 } 2779 }
2780 2780
2781 static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
2782 {
2783 dst0 += count;
2784 dst1 += count;
2785 src0 += 4*count;
2786 src1 += 4*count;
2787 count= - count;
2788 #ifdef PAVGB
2789 if(count <= -8){
2790 count += 7;
2791 __asm__ volatile(
2792 "pcmpeqw %%mm7, %%mm7 \n\t"
2793 "psrlw $8, %%mm7 \n\t"
2794 "1: \n\t"
2795 "movq -28(%1, %0, 4), %%mm0 \n\t"
2796 "movq -20(%1, %0, 4), %%mm1 \n\t"
2797 "movq -12(%1, %0, 4), %%mm2 \n\t"
2798 "movq -4(%1, %0, 4), %%mm3 \n\t"
2799 PAVGB" -28(%2, %0, 4), %%mm0 \n\t"
2800 PAVGB" -20(%2, %0, 4), %%mm1 \n\t"
2801 PAVGB" -12(%2, %0, 4), %%mm2 \n\t"
2802 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t"
2803 "pand %%mm7, %%mm0 \n\t"
2804 "pand %%mm7, %%mm1 \n\t"
2805 "pand %%mm7, %%mm2 \n\t"
2806 "pand %%mm7, %%mm3 \n\t"
2807 "packuswb %%mm1, %%mm0 \n\t"
2808 "packuswb %%mm3, %%mm2 \n\t"
2809 "movq %%mm0, %%mm1 \n\t"
2810 "movq %%mm2, %%mm3 \n\t"
2811 "psrlw $8, %%mm0 \n\t"
2812 "psrlw $8, %%mm2 \n\t"
2813 "pand %%mm7, %%mm1 \n\t"
2814 "pand %%mm7, %%mm3 \n\t"
2815 "packuswb %%mm2, %%mm0 \n\t"
2816 "packuswb %%mm3, %%mm1 \n\t"
2817 MOVNTQ" %%mm0,- 7(%4, %0) \n\t"
2818 MOVNTQ" %%mm1,- 7(%3, %0) \n\t"
2819 "add $8, %0 \n\t"
2820 " js 1b \n\t"
2821 : "+r"(count)
2822 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
2823 );
2824 count -= 7;
2825 }
2826 #endif
2827 while(count<0){
2828 dst0[count]= src0[4*count+0]+src1[4*count+0];
2829 dst1[count]= src0[4*count+2]+src1[4*count+2];
2830 count++;
2831 }
2832 }
2833
2781 static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) 2834 static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
2782 { 2835 {
2783 dst0+= count; 2836 dst0+= count;
2784 dst1+= count; 2837 dst1+= count;
2785 src += 4*count; 2838 src += 4*count;
2824 dst1[count]= src[4*count+2]; 2877 dst1[count]= src[4*count+2];
2825 count++; 2878 count++;
2826 } 2879 }
2827 } 2880 }
2828 2881
2882 static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
2883 {
2884 dst0 += count;
2885 dst1 += count;
2886 src0 += 4*count;
2887 src1 += 4*count;
2888 count= - count;
2889 #ifdef PAVGB
2890 if(count <= -8){
2891 count += 7;
2892 __asm__ volatile(
2893 "pcmpeqw %%mm7, %%mm7 \n\t"
2894 "psrlw $8, %%mm7 \n\t"
2895 "1: \n\t"
2896 "movq -28(%1, %0, 4), %%mm0 \n\t"
2897 "movq -20(%1, %0, 4), %%mm1 \n\t"
2898 "movq -12(%1, %0, 4), %%mm2 \n\t"
2899 "movq -4(%1, %0, 4), %%mm3 \n\t"
2900 PAVGB" -28(%2, %0, 4), %%mm0 \n\t"
2901 PAVGB" -20(%2, %0, 4), %%mm1 \n\t"
2902 PAVGB" -12(%2, %0, 4), %%mm2 \n\t"
2903 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t"
2904 "psrlw $8, %%mm0 \n\t"
2905 "psrlw $8, %%mm1 \n\t"
2906 "psrlw $8, %%mm2 \n\t"
2907 "psrlw $8, %%mm3 \n\t"
2908 "packuswb %%mm1, %%mm0 \n\t"
2909 "packuswb %%mm3, %%mm2 \n\t"
2910 "movq %%mm0, %%mm1 \n\t"
2911 "movq %%mm2, %%mm3 \n\t"
2912 "psrlw $8, %%mm0 \n\t"
2913 "psrlw $8, %%mm2 \n\t"
2914 "pand %%mm7, %%mm1 \n\t"
2915 "pand %%mm7, %%mm3 \n\t"
2916 "packuswb %%mm2, %%mm0 \n\t"
2917 "packuswb %%mm3, %%mm1 \n\t"
2918 MOVNTQ" %%mm0,- 7(%4, %0) \n\t"
2919 MOVNTQ" %%mm1,- 7(%3, %0) \n\t"
2920 "add $8, %0 \n\t"
2921 " js 1b \n\t"
2922 : "+r"(count)
2923 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
2924 );
2925 count -= 7;
2926 }
2927 #endif
2928 while(count<0){
2929 dst0[count]= src0[4*count+0]+src1[4*count+0];
2930 dst1[count]= src0[4*count+2]+src1[4*count+2];
2931 count++;
2932 }
2933 }
2934
2829 static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, 2935 static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
2830 long width, long height, 2936 long width, long height,
2831 long lumStride, long chromStride, long srcStride) 2937 long lumStride, long chromStride, long srcStride)
2832 { 2938 {
2833 long y; 2939 long y;
2834 const long chromWidth= -((-width)>>1); 2940 const long chromWidth= -((-width)>>1);
2835 2941
2836 for (y=0; y<height; y++){ 2942 for (y=0; y<height; y++){
2837 RENAME(extract_even)(src, ydst, width); 2943 RENAME(extract_even)(src, ydst, width);
2838 if(!(y&1)){ 2944 if(y&1){
2839 RENAME(extract_odd2)(src, udst, vdst, chromWidth); 2945 RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth);
2840 udst+= chromStride; 2946 udst+= chromStride;
2841 vdst+= chromStride; 2947 vdst+= chromStride;
2842 } 2948 }
2843 2949
2844 src += srcStride; 2950 src += srcStride;
2885 long y; 2991 long y;
2886 const long chromWidth= -((-width)>>1); 2992 const long chromWidth= -((-width)>>1);
2887 2993
2888 for (y=0; y<height; y++){ 2994 for (y=0; y<height; y++){
2889 RENAME(extract_even)(src+1, ydst, width); 2995 RENAME(extract_even)(src+1, ydst, width);
2890 if(!(y&1)){ 2996 if(y&1){
2891 RENAME(extract_even2)(src, udst, vdst, chromWidth); 2997 RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth);
2892 udst+= chromStride; 2998 udst+= chromStride;
2893 vdst+= chromStride; 2999 vdst+= chromStride;
2894 } 3000 }
2895 3001
2896 src += srcStride; 3002 src += srcStride;