comparison ppc/h264_template_altivec.c @ 6059:8c1a381bddb6 libavcodec

Factorize common code (almost cosmetic)
author lu_zero
date Sat, 22 Dec 2007 02:46:32 +0000
parents 93089aed00cb
children b1e3368c0d5e
comparison
equal deleted inserted replaced
6058:93089aed00cb 6059:8c1a381bddb6
24 #else 24 #else
25 #define ASSERT_ALIGNED(ptr) ; 25 #define ASSERT_ALIGNED(ptr) ;
26 #endif 26 #endif
27 27
28 /* this code assume that stride % 16 == 0 */ 28 /* this code assume that stride % 16 == 0 */
29
30 #define CHROMA_MC8_ALTIVEC_CORE \
31 vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc);\
32 vsrc3ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc3uc);\
33 \
34 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));\
35 psum = vec_mladd(vB, vsrc1ssH, psum);\
36 psum = vec_mladd(vC, vsrc2ssH, psum);\
37 psum = vec_mladd(vD, vsrc3ssH, psum);\
38 psum = vec_add(v32ss, psum);\
39 psum = vec_sr(psum, v6us);\
40 \
41 vdst = vec_ld(0, dst);\
42 ppsum = (vec_u8_t)vec_pack(psum, psum);\
43 vfdst = vec_perm(vdst, ppsum, fperm);\
44 \
45 OP_U8_ALTIVEC(fsum, vfdst, vdst);\
46 \
47 vec_st(fsum, 0, dst);\
48 \
49 vsrc0ssH = vsrc2ssH;\
50 vsrc1ssH = vsrc3ssH;\
51 \
52 dst += stride;\
53 src += stride;
54
55
29 void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { 56 void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
30 POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1); 57 POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
31 DECLARE_ALIGNED_16(signed int, ABCD[4]) = 58 DECLARE_ALIGNED_16(signed int, ABCD[4]) =
32 {((8 - x) * (8 - y)), 59 {((8 - x) * (8 - y)),
33 (( x) * (8 - y)), 60 (( x) * (8 - y)),
90 vsrcCuc = vec_ld(stride + 0, src); 117 vsrcCuc = vec_ld(stride + 0, src);
91 118
92 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); 119 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
93 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); 120 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
94 121
95 vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc); 122 CHROMA_MC8_ALTIVEC_CORE
96 vsrc3ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc3uc);
97
98 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
99 psum = vec_mladd(vB, vsrc1ssH, psum);
100 psum = vec_mladd(vC, vsrc2ssH, psum);
101 psum = vec_mladd(vD, vsrc3ssH, psum);
102 psum = vec_add(v32ss, psum);
103 psum = vec_sra(psum, v6us);
104
105 vdst = vec_ld(0, dst);
106 ppsum = (vec_u8_t)vec_packsu(psum, psum);
107 vfdst = vec_perm(vdst, ppsum, fperm);
108
109 OP_U8_ALTIVEC(fsum, vfdst, vdst);
110
111 vec_st(fsum, 0, dst);
112
113 vsrc0ssH = vsrc2ssH;
114 vsrc1ssH = vsrc3ssH;
115
116 dst += stride;
117 src += stride;
118 } 123 }
119 } else { 124 } else {
120 vec_u8_t vsrcDuc; 125 vec_u8_t vsrcDuc;
121 for (i = 0 ; i < h ; i++) { 126 for (i = 0 ; i < h ; i++) {
122 vsrcCuc = vec_ld(stride + 0, src); 127 vsrcCuc = vec_ld(stride + 0, src);
126 if (reallyBadAlign) 131 if (reallyBadAlign)
127 vsrc3uc = vsrcDuc; 132 vsrc3uc = vsrcDuc;
128 else 133 else
129 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); 134 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
130 135
131 vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc); 136 CHROMA_MC8_ALTIVEC_CORE
132 vsrc3ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc3uc);
133
134 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
135 psum = vec_mladd(vB, vsrc1ssH, psum);
136 psum = vec_mladd(vC, vsrc2ssH, psum);
137 psum = vec_mladd(vD, vsrc3ssH, psum);
138 psum = vec_add(v32ss, psum);
139 psum = vec_sr(psum, v6us);
140
141 vdst = vec_ld(0, dst);
142 ppsum = (vec_u8_t)vec_pack(psum, psum);
143 vfdst = vec_perm(vdst, ppsum, fperm);
144
145 OP_U8_ALTIVEC(fsum, vfdst, vdst);
146
147 vec_st(fsum, 0, dst);
148
149 vsrc0ssH = vsrc2ssH;
150 vsrc1ssH = vsrc3ssH;
151
152 dst += stride;
153 src += stride;
154 } 137 }
155 } 138 }
156 POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1); 139 POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
157 } 140 }
141
142 #undef CHROMA_MC8_ALTIVEC_CORE
158 143
159 /* this code assume stride % 16 == 0 */ 144 /* this code assume stride % 16 == 0 */
160 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { 145 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
161 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1); 146 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
162 register int i; 147 register int i;