Mercurial > libavcodec.hg
comparison ppc/h264_altivec.c @ 8535:8f3e20061aff libavcodec
offset and weights are signed, fixes some non-bitexact issues.
Patch by David Conrad %lessen42 A gmail P com%
author | gpoirier |
---|---|
date | Tue, 06 Jan 2009 10:35:06 +0000 |
parents | 961e40a13102 |
children | 87450160a913 |
comparison
equal
deleted
inserted
replaced
8534:b80cf351176e | 8535:8f3e20061aff |
---|---|
940 void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, | 940 void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, |
941 int weightd, int weights, int offset, int w, int h) | 941 int weightd, int weights, int offset, int w, int h) |
942 { | 942 { |
943 int y, dst_aligned, src_aligned; | 943 int y, dst_aligned, src_aligned; |
944 vec_u8 vsrc, vdst; | 944 vec_u8 vsrc, vdst; |
945 vec_u16 vtemp, vlog2_denom, vweights, vweightd, voffset, v0, v1, v2, v3; | 945 vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3; |
946 vec_u16 vlog2_denom; | |
946 DECLARE_ALIGNED_16(int32_t, temp[4]); | 947 DECLARE_ALIGNED_16(int32_t, temp[4]); |
947 LOAD_ZERO; | 948 LOAD_ZERO; |
948 | 949 |
949 offset = ((offset + 1) | 1) << log2_denom; | 950 offset = ((offset + 1) | 1) << log2_denom; |
950 temp[0] = log2_denom+1; | 951 temp[0] = log2_denom+1; |
975 else | 976 else |
976 v2 = v3; | 977 v2 = v3; |
977 } | 978 } |
978 | 979 |
979 if (w == 16 || dst_aligned) { | 980 if (w == 16 || dst_aligned) { |
980 v0 = vec_mladd(v0, vweightd, zero_u16v); | 981 v0 = vec_mladd(v0, vweightd, zero_s16v); |
981 v2 = vec_mladd(v2, vweights, zero_u16v); | 982 v2 = vec_mladd(v2, vweights, zero_s16v); |
982 | 983 |
983 v0 = vec_adds(v0, voffset); | 984 v0 = vec_adds(v0, voffset); |
984 v0 = vec_adds(v0, v2); | 985 v0 = vec_adds(v0, v2); |
985 v0 = vec_sra(v0, vlog2_denom); | 986 v0 = vec_sra(v0, vlog2_denom); |
986 } | 987 } |
987 if (w == 16 || !dst_aligned) { | 988 if (w == 16 || !dst_aligned) { |
988 v1 = vec_mladd(v1, vweightd, zero_u16v); | 989 v1 = vec_mladd(v1, vweightd, zero_s16v); |
989 v3 = vec_mladd(v3, vweights, zero_u16v); | 990 v3 = vec_mladd(v3, vweights, zero_s16v); |
990 | 991 |
991 v1 = vec_adds(v1, voffset); | 992 v1 = vec_adds(v1, voffset); |
992 v1 = vec_adds(v1, v3); | 993 v1 = vec_adds(v1, v3); |
993 v1 = vec_sra(v1, vlog2_denom); | 994 v1 = vec_sra(v1, vlog2_denom); |
994 } | 995 } |