comparison postprocess_altivec_template.c @ 98:e565483b1193 libpostproc

typo fixes: tempBlured --> tempBlurred
author diego
date Sat, 22 Mar 2008 17:36:31 +0000
parents c24dab9bca80
children fa0ecbc87f51
comparison
equal deleted inserted replaced
97:2653d9f33b8a 98:e565483b1193
798 #define doHorizLowPass_altivec(a...) doHorizLowPass_C(a) 798 #define doHorizLowPass_altivec(a...) doHorizLowPass_C(a)
799 #define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a) 799 #define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a)
800 #define do_a_deblock_altivec(a...) do_a_deblock_C(a) 800 #define do_a_deblock_altivec(a...) do_a_deblock_C(a)
801 801
802 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, 802 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
803 uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) 803 uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
804 { 804 {
805 const vector signed int zero = vec_splat_s32(0); 805 const vector signed int zero = vec_splat_s32(0);
806 const vector signed short vsint16_1 = vec_splat_s16(1); 806 const vector signed short vsint16_1 = vec_splat_s16(1);
807 vector signed int v_dp = zero; 807 vector signed int v_dp = zero;
808 vector signed int v_sysdp = zero; 808 vector signed int v_sysdp = zero;
809 int d, sysd, i; 809 int d, sysd, i;
810 810
811 tempBluredPast[127]= maxNoise[0]; 811 tempBlurredPast[127]= maxNoise[0];
812 tempBluredPast[128]= maxNoise[1]; 812 tempBlurredPast[128]= maxNoise[1];
813 tempBluredPast[129]= maxNoise[2]; 813 tempBlurredPast[129]= maxNoise[2];
814 814
815 #define LOAD_LINE(src, i) \ 815 #define LOAD_LINE(src, i) \
816 register int j##src##i = i * stride; \ 816 register int j##src##i = i * stride; \
817 vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \ 817 vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \
818 const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \ 818 const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \
830 LOAD_LINE(src, 4); 830 LOAD_LINE(src, 4);
831 LOAD_LINE(src, 5); 831 LOAD_LINE(src, 5);
832 LOAD_LINE(src, 6); 832 LOAD_LINE(src, 6);
833 LOAD_LINE(src, 7); 833 LOAD_LINE(src, 7);
834 834
835 LOAD_LINE(tempBlured, 0); 835 LOAD_LINE(tempBlurred, 0);
836 LOAD_LINE(tempBlured, 1); 836 LOAD_LINE(tempBlurred, 1);
837 LOAD_LINE(tempBlured, 2); 837 LOAD_LINE(tempBlurred, 2);
838 LOAD_LINE(tempBlured, 3); 838 LOAD_LINE(tempBlurred, 3);
839 LOAD_LINE(tempBlured, 4); 839 LOAD_LINE(tempBlurred, 4);
840 LOAD_LINE(tempBlured, 5); 840 LOAD_LINE(tempBlurred, 5);
841 LOAD_LINE(tempBlured, 6); 841 LOAD_LINE(tempBlurred, 6);
842 LOAD_LINE(tempBlured, 7); 842 LOAD_LINE(tempBlurred, 7);
843 #undef LOAD_LINE 843 #undef LOAD_LINE
844 844
845 #define ACCUMULATE_DIFFS(i) \ 845 #define ACCUMULATE_DIFFS(i) \
846 vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \ 846 vector signed short v_d##i = vec_sub(v_tempBlurredAss##i, \
847 v_srcAss##i); \ 847 v_srcAss##i); \
848 v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ 848 v_dp = vec_msums(v_d##i, v_d##i, v_dp); \
849 v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) 849 v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp)
850 850
851 ACCUMULATE_DIFFS(0); 851 ACCUMULATE_DIFFS(0);
867 vec_ste(v_dp, 0, &d); 867 vec_ste(v_dp, 0, &d);
868 vec_ste(v_sysdp, 0, &sysd); 868 vec_ste(v_sysdp, 0, &sysd);
869 869
870 i = d; 870 i = d;
871 d = (4*d 871 d = (4*d
872 +(*(tempBluredPast-256)) 872 +(*(tempBlurredPast-256))
873 +(*(tempBluredPast-1))+ (*(tempBluredPast+1)) 873 +(*(tempBlurredPast-1))+ (*(tempBlurredPast+1))
874 +(*(tempBluredPast+256)) 874 +(*(tempBlurredPast+256))
875 +4)>>3; 875 +4)>>3;
876 876
877 *tempBluredPast=i; 877 *tempBlurredPast=i;
878 878
879 if (d > maxNoise[1]) { 879 if (d > maxNoise[1]) {
880 if (d < maxNoise[2]) { 880 if (d < maxNoise[2]) {
881 #define OP(i) v_tempBluredAss##i = vec_avg(v_tempBluredAss##i, v_srcAss##i); 881 #define OP(i) v_tempBlurredAss##i = vec_avg(v_tempBlurredAss##i, v_srcAss##i);
882 882
883 OP(0); 883 OP(0);
884 OP(1); 884 OP(1);
885 OP(2); 885 OP(2);
886 OP(3); 886 OP(3);
888 OP(5); 888 OP(5);
889 OP(6); 889 OP(6);
890 OP(7); 890 OP(7);
891 #undef OP 891 #undef OP
892 } else { 892 } else {
893 #define OP(i) v_tempBluredAss##i = v_srcAss##i; 893 #define OP(i) v_tempBlurredAss##i = v_srcAss##i;
894 894
895 OP(0); 895 OP(0);
896 OP(1); 896 OP(1);
897 OP(2); 897 OP(2);
898 OP(3); 898 OP(3);
908 const vector signed short vsint16_4 = vec_splat_s16(4); 908 const vector signed short vsint16_4 = vec_splat_s16(4);
909 const vector unsigned short vuint16_3 = vec_splat_u16(3); 909 const vector unsigned short vuint16_3 = vec_splat_u16(3);
910 910
911 #define OP(i) \ 911 #define OP(i) \
912 const vector signed short v_temp##i = \ 912 const vector signed short v_temp##i = \
913 vec_mladd(v_tempBluredAss##i, \ 913 vec_mladd(v_tempBlurredAss##i, \
914 vsint16_7, v_srcAss##i); \ 914 vsint16_7, v_srcAss##i); \
915 const vector signed short v_temp2##i = \ 915 const vector signed short v_temp2##i = \
916 vec_add(v_temp##i, vsint16_4); \ 916 vec_add(v_temp##i, vsint16_4); \
917 v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3) 917 v_tempBlurredAss##i = vec_sr(v_temp2##i, vuint16_3)
918 918
919 OP(0); 919 OP(0);
920 OP(1); 920 OP(1);
921 OP(2); 921 OP(2);
922 OP(3); 922 OP(3);
929 const vector signed short vsint16_3 = vec_splat_s16(3); 929 const vector signed short vsint16_3 = vec_splat_s16(3);
930 const vector signed short vsint16_2 = vec_splat_s16(2); 930 const vector signed short vsint16_2 = vec_splat_s16(2);
931 931
932 #define OP(i) \ 932 #define OP(i) \
933 const vector signed short v_temp##i = \ 933 const vector signed short v_temp##i = \
934 vec_mladd(v_tempBluredAss##i, \ 934 vec_mladd(v_tempBlurredAss##i, \
935 vsint16_3, v_srcAss##i); \ 935 vsint16_3, v_srcAss##i); \
936 const vector signed short v_temp2##i = \ 936 const vector signed short v_temp2##i = \
937 vec_add(v_temp##i, vsint16_2); \ 937 vec_add(v_temp##i, vsint16_2); \
938 v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) 938 v_tempBlurredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2)
939 939
940 OP(0); 940 OP(0);
941 OP(1); 941 OP(1);
942 OP(2); 942 OP(2);
943 OP(3); 943 OP(3);
955 955
956 #define PACK_AND_STORE(src, i) \ 956 #define PACK_AND_STORE(src, i) \
957 const vector unsigned char perms##src##i = \ 957 const vector unsigned char perms##src##i = \
958 vec_lvsr(i * stride, src); \ 958 vec_lvsr(i * stride, src); \
959 const vector unsigned char vf##src##i = \ 959 const vector unsigned char vf##src##i = \
960 vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \ 960 vec_packsu(v_tempBlurredAss##i, (vector signed short)zero); \
961 const vector unsigned char vg##src##i = \ 961 const vector unsigned char vg##src##i = \
962 vec_perm(vf##src##i, v_##src##A##i, permHH); \ 962 vec_perm(vf##src##i, v_##src##A##i, permHH); \
963 const vector unsigned char mask##src##i = \ 963 const vector unsigned char mask##src##i = \
964 vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \ 964 vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \
965 const vector unsigned char vg2##src##i = \ 965 const vector unsigned char vg2##src##i = \
977 PACK_AND_STORE(src, 3); 977 PACK_AND_STORE(src, 3);
978 PACK_AND_STORE(src, 4); 978 PACK_AND_STORE(src, 4);
979 PACK_AND_STORE(src, 5); 979 PACK_AND_STORE(src, 5);
980 PACK_AND_STORE(src, 6); 980 PACK_AND_STORE(src, 6);
981 PACK_AND_STORE(src, 7); 981 PACK_AND_STORE(src, 7);
982 PACK_AND_STORE(tempBlured, 0); 982 PACK_AND_STORE(tempBlurred, 0);
983 PACK_AND_STORE(tempBlured, 1); 983 PACK_AND_STORE(tempBlurred, 1);
984 PACK_AND_STORE(tempBlured, 2); 984 PACK_AND_STORE(tempBlurred, 2);
985 PACK_AND_STORE(tempBlured, 3); 985 PACK_AND_STORE(tempBlurred, 3);
986 PACK_AND_STORE(tempBlured, 4); 986 PACK_AND_STORE(tempBlurred, 4);
987 PACK_AND_STORE(tempBlured, 5); 987 PACK_AND_STORE(tempBlurred, 5);
988 PACK_AND_STORE(tempBlured, 6); 988 PACK_AND_STORE(tempBlurred, 6);
989 PACK_AND_STORE(tempBlured, 7); 989 PACK_AND_STORE(tempBlurred, 7);
990 #undef PACK_AND_STORE 990 #undef PACK_AND_STORE
991 } 991 }
992 992
993 static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) { 993 static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {
994 const vector unsigned char zero = vec_splat_u8(0); 994 const vector unsigned char zero = vec_splat_u8(0);