comparison ppc/h264_altivec.c @ 8535:8f3e20061aff libavcodec

offset and weights are signed, fixes some non-bitexact issues. Patch by David Conrad %lessen42 A gmail P com%
author gpoirier
date Tue, 06 Jan 2009 10:35:06 +0000
parents 961e40a13102
children 87450160a913
comparison
equal deleted inserted replaced
8534:b80cf351176e 8535:8f3e20061aff
940 void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, 940 void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
941 int weightd, int weights, int offset, int w, int h) 941 int weightd, int weights, int offset, int w, int h)
942 { 942 {
943 int y, dst_aligned, src_aligned; 943 int y, dst_aligned, src_aligned;
944 vec_u8 vsrc, vdst; 944 vec_u8 vsrc, vdst;
945 vec_u16 vtemp, vlog2_denom, vweights, vweightd, voffset, v0, v1, v2, v3; 945 vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3;
946 vec_u16 vlog2_denom;
946 DECLARE_ALIGNED_16(int32_t, temp[4]); 947 DECLARE_ALIGNED_16(int32_t, temp[4]);
947 LOAD_ZERO; 948 LOAD_ZERO;
948 949
949 offset = ((offset + 1) | 1) << log2_denom; 950 offset = ((offset + 1) | 1) << log2_denom;
950 temp[0] = log2_denom+1; 951 temp[0] = log2_denom+1;
975 else 976 else
976 v2 = v3; 977 v2 = v3;
977 } 978 }
978 979
979 if (w == 16 || dst_aligned) { 980 if (w == 16 || dst_aligned) {
980 v0 = vec_mladd(v0, vweightd, zero_u16v); 981 v0 = vec_mladd(v0, vweightd, zero_s16v);
981 v2 = vec_mladd(v2, vweights, zero_u16v); 982 v2 = vec_mladd(v2, vweights, zero_s16v);
982 983
983 v0 = vec_adds(v0, voffset); 984 v0 = vec_adds(v0, voffset);
984 v0 = vec_adds(v0, v2); 985 v0 = vec_adds(v0, v2);
985 v0 = vec_sra(v0, vlog2_denom); 986 v0 = vec_sra(v0, vlog2_denom);
986 } 987 }
987 if (w == 16 || !dst_aligned) { 988 if (w == 16 || !dst_aligned) {
988 v1 = vec_mladd(v1, vweightd, zero_u16v); 989 v1 = vec_mladd(v1, vweightd, zero_s16v);
989 v3 = vec_mladd(v3, vweights, zero_u16v); 990 v3 = vec_mladd(v3, vweights, zero_s16v);
990 991
991 v1 = vec_adds(v1, voffset); 992 v1 = vec_adds(v1, voffset);
992 v1 = vec_adds(v1, v3); 993 v1 = vec_adds(v1, v3);
993 v1 = vec_sra(v1, vlog2_denom); 994 v1 = vec_sra(v1, vlog2_denom);
994 } 995 }