Mercurial > libpostproc.hg
comparison postprocess_altivec_template.c @ 98:e565483b1193 libpostproc
typo fixes: tempBlured --> tempBlurred
author | diego |
---|---|
date | Sat, 22 Mar 2008 17:36:31 +0000 |
parents | c24dab9bca80 |
children | fa0ecbc87f51 |
comparison
equal
deleted
inserted
replaced
97:2653d9f33b8a | 98:e565483b1193 |
---|---|
798 #define doHorizLowPass_altivec(a...) doHorizLowPass_C(a) | 798 #define doHorizLowPass_altivec(a...) doHorizLowPass_C(a) |
799 #define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a) | 799 #define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a) |
800 #define do_a_deblock_altivec(a...) do_a_deblock_C(a) | 800 #define do_a_deblock_altivec(a...) do_a_deblock_C(a) |
801 | 801 |
802 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | 802 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, |
803 uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) | 803 uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise) |
804 { | 804 { |
805 const vector signed int zero = vec_splat_s32(0); | 805 const vector signed int zero = vec_splat_s32(0); |
806 const vector signed short vsint16_1 = vec_splat_s16(1); | 806 const vector signed short vsint16_1 = vec_splat_s16(1); |
807 vector signed int v_dp = zero; | 807 vector signed int v_dp = zero; |
808 vector signed int v_sysdp = zero; | 808 vector signed int v_sysdp = zero; |
809 int d, sysd, i; | 809 int d, sysd, i; |
810 | 810 |
811 tempBluredPast[127]= maxNoise[0]; | 811 tempBlurredPast[127]= maxNoise[0]; |
812 tempBluredPast[128]= maxNoise[1]; | 812 tempBlurredPast[128]= maxNoise[1]; |
813 tempBluredPast[129]= maxNoise[2]; | 813 tempBlurredPast[129]= maxNoise[2]; |
814 | 814 |
815 #define LOAD_LINE(src, i) \ | 815 #define LOAD_LINE(src, i) \ |
816 register int j##src##i = i * stride; \ | 816 register int j##src##i = i * stride; \ |
817 vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \ | 817 vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \ |
818 const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \ | 818 const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \ |
830 LOAD_LINE(src, 4); | 830 LOAD_LINE(src, 4); |
831 LOAD_LINE(src, 5); | 831 LOAD_LINE(src, 5); |
832 LOAD_LINE(src, 6); | 832 LOAD_LINE(src, 6); |
833 LOAD_LINE(src, 7); | 833 LOAD_LINE(src, 7); |
834 | 834 |
835 LOAD_LINE(tempBlured, 0); | 835 LOAD_LINE(tempBlurred, 0); |
836 LOAD_LINE(tempBlured, 1); | 836 LOAD_LINE(tempBlurred, 1); |
837 LOAD_LINE(tempBlured, 2); | 837 LOAD_LINE(tempBlurred, 2); |
838 LOAD_LINE(tempBlured, 3); | 838 LOAD_LINE(tempBlurred, 3); |
839 LOAD_LINE(tempBlured, 4); | 839 LOAD_LINE(tempBlurred, 4); |
840 LOAD_LINE(tempBlured, 5); | 840 LOAD_LINE(tempBlurred, 5); |
841 LOAD_LINE(tempBlured, 6); | 841 LOAD_LINE(tempBlurred, 6); |
842 LOAD_LINE(tempBlured, 7); | 842 LOAD_LINE(tempBlurred, 7); |
843 #undef LOAD_LINE | 843 #undef LOAD_LINE |
844 | 844 |
845 #define ACCUMULATE_DIFFS(i) \ | 845 #define ACCUMULATE_DIFFS(i) \ |
846 vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \ | 846 vector signed short v_d##i = vec_sub(v_tempBlurredAss##i, \ |
847 v_srcAss##i); \ | 847 v_srcAss##i); \ |
848 v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ | 848 v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ |
849 v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) | 849 v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) |
850 | 850 |
851 ACCUMULATE_DIFFS(0); | 851 ACCUMULATE_DIFFS(0); |
867 vec_ste(v_dp, 0, &d); | 867 vec_ste(v_dp, 0, &d); |
868 vec_ste(v_sysdp, 0, &sysd); | 868 vec_ste(v_sysdp, 0, &sysd); |
869 | 869 |
870 i = d; | 870 i = d; |
871 d = (4*d | 871 d = (4*d |
872 +(*(tempBluredPast-256)) | 872 +(*(tempBlurredPast-256)) |
873 +(*(tempBluredPast-1))+ (*(tempBluredPast+1)) | 873 +(*(tempBlurredPast-1))+ (*(tempBlurredPast+1)) |
874 +(*(tempBluredPast+256)) | 874 +(*(tempBlurredPast+256)) |
875 +4)>>3; | 875 +4)>>3; |
876 | 876 |
877 *tempBluredPast=i; | 877 *tempBlurredPast=i; |
878 | 878 |
879 if (d > maxNoise[1]) { | 879 if (d > maxNoise[1]) { |
880 if (d < maxNoise[2]) { | 880 if (d < maxNoise[2]) { |
881 #define OP(i) v_tempBluredAss##i = vec_avg(v_tempBluredAss##i, v_srcAss##i); | 881 #define OP(i) v_tempBlurredAss##i = vec_avg(v_tempBlurredAss##i, v_srcAss##i); |
882 | 882 |
883 OP(0); | 883 OP(0); |
884 OP(1); | 884 OP(1); |
885 OP(2); | 885 OP(2); |
886 OP(3); | 886 OP(3); |
888 OP(5); | 888 OP(5); |
889 OP(6); | 889 OP(6); |
890 OP(7); | 890 OP(7); |
891 #undef OP | 891 #undef OP |
892 } else { | 892 } else { |
893 #define OP(i) v_tempBluredAss##i = v_srcAss##i; | 893 #define OP(i) v_tempBlurredAss##i = v_srcAss##i; |
894 | 894 |
895 OP(0); | 895 OP(0); |
896 OP(1); | 896 OP(1); |
897 OP(2); | 897 OP(2); |
898 OP(3); | 898 OP(3); |
908 const vector signed short vsint16_4 = vec_splat_s16(4); | 908 const vector signed short vsint16_4 = vec_splat_s16(4); |
909 const vector unsigned short vuint16_3 = vec_splat_u16(3); | 909 const vector unsigned short vuint16_3 = vec_splat_u16(3); |
910 | 910 |
911 #define OP(i) \ | 911 #define OP(i) \ |
912 const vector signed short v_temp##i = \ | 912 const vector signed short v_temp##i = \ |
913 vec_mladd(v_tempBluredAss##i, \ | 913 vec_mladd(v_tempBlurredAss##i, \ |
914 vsint16_7, v_srcAss##i); \ | 914 vsint16_7, v_srcAss##i); \ |
915 const vector signed short v_temp2##i = \ | 915 const vector signed short v_temp2##i = \ |
916 vec_add(v_temp##i, vsint16_4); \ | 916 vec_add(v_temp##i, vsint16_4); \ |
917 v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3) | 917 v_tempBlurredAss##i = vec_sr(v_temp2##i, vuint16_3) |
918 | 918 |
919 OP(0); | 919 OP(0); |
920 OP(1); | 920 OP(1); |
921 OP(2); | 921 OP(2); |
922 OP(3); | 922 OP(3); |
929 const vector signed short vsint16_3 = vec_splat_s16(3); | 929 const vector signed short vsint16_3 = vec_splat_s16(3); |
930 const vector signed short vsint16_2 = vec_splat_s16(2); | 930 const vector signed short vsint16_2 = vec_splat_s16(2); |
931 | 931 |
932 #define OP(i) \ | 932 #define OP(i) \ |
933 const vector signed short v_temp##i = \ | 933 const vector signed short v_temp##i = \ |
934 vec_mladd(v_tempBluredAss##i, \ | 934 vec_mladd(v_tempBlurredAss##i, \ |
935 vsint16_3, v_srcAss##i); \ | 935 vsint16_3, v_srcAss##i); \ |
936 const vector signed short v_temp2##i = \ | 936 const vector signed short v_temp2##i = \ |
937 vec_add(v_temp##i, vsint16_2); \ | 937 vec_add(v_temp##i, vsint16_2); \ |
938 v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) | 938 v_tempBlurredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) |
939 | 939 |
940 OP(0); | 940 OP(0); |
941 OP(1); | 941 OP(1); |
942 OP(2); | 942 OP(2); |
943 OP(3); | 943 OP(3); |
955 | 955 |
956 #define PACK_AND_STORE(src, i) \ | 956 #define PACK_AND_STORE(src, i) \ |
957 const vector unsigned char perms##src##i = \ | 957 const vector unsigned char perms##src##i = \ |
958 vec_lvsr(i * stride, src); \ | 958 vec_lvsr(i * stride, src); \ |
959 const vector unsigned char vf##src##i = \ | 959 const vector unsigned char vf##src##i = \ |
960 vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \ | 960 vec_packsu(v_tempBlurredAss##i, (vector signed short)zero); \ |
961 const vector unsigned char vg##src##i = \ | 961 const vector unsigned char vg##src##i = \ |
962 vec_perm(vf##src##i, v_##src##A##i, permHH); \ | 962 vec_perm(vf##src##i, v_##src##A##i, permHH); \ |
963 const vector unsigned char mask##src##i = \ | 963 const vector unsigned char mask##src##i = \ |
964 vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \ | 964 vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \ |
965 const vector unsigned char vg2##src##i = \ | 965 const vector unsigned char vg2##src##i = \ |
977 PACK_AND_STORE(src, 3); | 977 PACK_AND_STORE(src, 3); |
978 PACK_AND_STORE(src, 4); | 978 PACK_AND_STORE(src, 4); |
979 PACK_AND_STORE(src, 5); | 979 PACK_AND_STORE(src, 5); |
980 PACK_AND_STORE(src, 6); | 980 PACK_AND_STORE(src, 6); |
981 PACK_AND_STORE(src, 7); | 981 PACK_AND_STORE(src, 7); |
982 PACK_AND_STORE(tempBlured, 0); | 982 PACK_AND_STORE(tempBlurred, 0); |
983 PACK_AND_STORE(tempBlured, 1); | 983 PACK_AND_STORE(tempBlurred, 1); |
984 PACK_AND_STORE(tempBlured, 2); | 984 PACK_AND_STORE(tempBlurred, 2); |
985 PACK_AND_STORE(tempBlured, 3); | 985 PACK_AND_STORE(tempBlurred, 3); |
986 PACK_AND_STORE(tempBlured, 4); | 986 PACK_AND_STORE(tempBlurred, 4); |
987 PACK_AND_STORE(tempBlured, 5); | 987 PACK_AND_STORE(tempBlurred, 5); |
988 PACK_AND_STORE(tempBlured, 6); | 988 PACK_AND_STORE(tempBlurred, 6); |
989 PACK_AND_STORE(tempBlured, 7); | 989 PACK_AND_STORE(tempBlurred, 7); |
990 #undef PACK_AND_STORE | 990 #undef PACK_AND_STORE |
991 } | 991 } |
992 | 992 |
993 static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) { | 993 static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) { |
994 const vector unsigned char zero = vec_splat_u8(0); | 994 const vector unsigned char zero = vec_splat_u8(0); |