Mercurial > libavcodec.hg
comparison ppc/idct_altivec.c @ 8494:1615d6b75ada libavcodec
Cleanup _t types in libavcodec/ppc
author | lu_zero |
---|---|
date | Sat, 27 Dec 2008 11:21:28 +0000 |
parents | 266d4949aa15 |
children | 7a463923ecd1 |
comparison
equal
deleted
inserted
replaced
8493:469f3e5bcf13 | 8494:1615d6b75ada |
---|---|
38 #include <stdlib.h> /* malloc(), free() */ | 38 #include <stdlib.h> /* malloc(), free() */ |
39 #include <string.h> | 39 #include <string.h> |
40 #include "libavcodec/dsputil.h" | 40 #include "libavcodec/dsputil.h" |
41 | 41 |
42 #include "gcc_fixes.h" | 42 #include "gcc_fixes.h" |
43 | 43 #include "types_altivec.h" |
44 #include "dsputil_ppc.h" | 44 #include "dsputil_ppc.h" |
45 | |
46 #define vector_s16_t vector signed short | |
47 #define const_vector_s16_t const vector signed short | |
48 #define vector_u16_t vector unsigned short | |
49 #define vector_s8_t vector signed char | |
50 #define vector_u8_t vector unsigned char | |
51 #define vector_s32_t vector signed int | |
52 #define vector_u32_t vector unsigned int | |
53 | 45 |
54 #define IDCT_HALF \ | 46 #define IDCT_HALF \ |
55 /* 1st stage */ \ | 47 /* 1st stage */ \ |
56 t1 = vec_mradds (a1, vx7, vx1 ); \ | 48 t1 = vec_mradds (a1, vx7, vx1 ); \ |
57 t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ | 49 t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ |
86 vy3 = vec_adds (t2, t6); \ | 78 vy3 = vec_adds (t2, t6); \ |
87 vy4 = vec_subs (t2, t6); | 79 vy4 = vec_subs (t2, t6); |
88 | 80 |
89 | 81 |
90 #define IDCT \ | 82 #define IDCT \ |
91 vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ | 83 vec_s16 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ |
92 vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ | 84 vec_s16 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ |
93 vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \ | 85 vec_s16 a0, a1, a2, ma2, c4, mc4, zero, bias; \ |
94 vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \ | 86 vec_s16 t0, t1, t2, t3, t4, t5, t6, t7, t8; \ |
95 vector_u16_t shift; \ | 87 vec_u16 shift; \ |
96 \ | 88 \ |
97 c4 = vec_splat (constants[0], 0); \ | 89 c4 = vec_splat (constants[0], 0); \ |
98 a0 = vec_splat (constants[0], 1); \ | 90 a0 = vec_splat (constants[0], 1); \ |
99 a1 = vec_splat (constants[0], 2); \ | 91 a1 = vec_splat (constants[0], 2); \ |
100 a2 = vec_splat (constants[0], 3); \ | 92 a2 = vec_splat (constants[0], 3); \ |
101 mc4 = vec_splat (constants[0], 4); \ | 93 mc4 = vec_splat (constants[0], 4); \ |
102 ma2 = vec_splat (constants[0], 5); \ | 94 ma2 = vec_splat (constants[0], 5); \ |
103 bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \ | 95 bias = (vec_s16)vec_splat ((vec_s32)constants[0], 3); \ |
104 \ | 96 \ |
105 zero = vec_splat_s16 (0); \ | 97 zero = vec_splat_s16 (0); \ |
106 shift = vec_splat_u16 (4); \ | 98 shift = vec_splat_u16 (4); \ |
107 \ | 99 \ |
108 vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \ | 100 vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \ |
154 vx5 = vec_sra (vy5, shift); \ | 146 vx5 = vec_sra (vy5, shift); \ |
155 vx6 = vec_sra (vy6, shift); \ | 147 vx6 = vec_sra (vy6, shift); \ |
156 vx7 = vec_sra (vy7, shift); | 148 vx7 = vec_sra (vy7, shift); |
157 | 149 |
158 | 150 |
159 static const_vector_s16_t constants[5] = { | 151 static const vec_s16 constants[5] = { |
160 {23170, 13573, 6518, 21895, -23170, -21895, 32, 31}, | 152 {23170, 13573, 6518, 21895, -23170, -21895, 32, 31}, |
161 {16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725}, | 153 {16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725}, |
162 {22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521}, | 154 {22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521}, |
163 {21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692}, | 155 {21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692}, |
164 {19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722} | 156 {19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722} |
165 }; | 157 }; |
166 | 158 |
167 void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block) | 159 void idct_put_altivec(uint8_t* dest, int stride, vec_s16* block) |
168 { | 160 { |
169 POWERPC_PERF_DECLARE(altivec_idct_put_num, 1); | 161 POWERPC_PERF_DECLARE(altivec_idct_put_num, 1); |
170 vector_u8_t tmp; | 162 vec_u8 tmp; |
171 | 163 |
172 #ifdef CONFIG_POWERPC_PERF | 164 #ifdef CONFIG_POWERPC_PERF |
173 POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1); | 165 POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1); |
174 #endif | 166 #endif |
175 IDCT | 167 IDCT |
176 | 168 |
177 #define COPY(dest,src) \ | 169 #define COPY(dest,src) \ |
178 tmp = vec_packsu (src, src); \ | 170 tmp = vec_packsu (src, src); \ |
179 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ | 171 vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest); \ |
180 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | 172 vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest); |
181 | 173 |
182 COPY (dest, vx0) dest += stride; | 174 COPY (dest, vx0) dest += stride; |
183 COPY (dest, vx1) dest += stride; | 175 COPY (dest, vx1) dest += stride; |
184 COPY (dest, vx2) dest += stride; | 176 COPY (dest, vx2) dest += stride; |
185 COPY (dest, vx3) dest += stride; | 177 COPY (dest, vx3) dest += stride; |
189 COPY (dest, vx7) | 181 COPY (dest, vx7) |
190 | 182 |
191 POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1); | 183 POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1); |
192 } | 184 } |
193 | 185 |
194 void idct_add_altivec(uint8_t* dest, int stride, vector_s16_t* block) | 186 void idct_add_altivec(uint8_t* dest, int stride, vec_s16* block) |
195 { | 187 { |
196 POWERPC_PERF_DECLARE(altivec_idct_add_num, 1); | 188 POWERPC_PERF_DECLARE(altivec_idct_add_num, 1); |
197 vector_u8_t tmp; | 189 vec_u8 tmp; |
198 vector_s16_t tmp2, tmp3; | 190 vec_s16 tmp2, tmp3; |
199 vector_u8_t perm0; | 191 vec_u8 perm0; |
200 vector_u8_t perm1; | 192 vec_u8 perm1; |
201 vector_u8_t p0, p1, p; | 193 vec_u8 p0, p1, p; |
202 | 194 |
203 #ifdef CONFIG_POWERPC_PERF | 195 #ifdef CONFIG_POWERPC_PERF |
204 POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1); | 196 POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1); |
205 #endif | 197 #endif |
206 | 198 |
213 perm1 = vec_mergeh (p, p1); | 205 perm1 = vec_mergeh (p, p1); |
214 | 206 |
215 #define ADD(dest,src,perm) \ | 207 #define ADD(dest,src,perm) \ |
216 /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ | 208 /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ |
217 tmp = vec_ld (0, dest); \ | 209 tmp = vec_ld (0, dest); \ |
218 tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \ | 210 tmp2 = (vec_s16)vec_perm (tmp, (vec_u8)zero, perm); \ |
219 tmp3 = vec_adds (tmp2, src); \ | 211 tmp3 = vec_adds (tmp2, src); \ |
220 tmp = vec_packsu (tmp3, tmp3); \ | 212 tmp = vec_packsu (tmp3, tmp3); \ |
221 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ | 213 vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest); \ |
222 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | 214 vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest); |
223 | 215 |
224 ADD (dest, vx0, perm0) dest += stride; | 216 ADD (dest, vx0, perm0) dest += stride; |
225 ADD (dest, vx1, perm1) dest += stride; | 217 ADD (dest, vx1, perm1) dest += stride; |
226 ADD (dest, vx2, perm0) dest += stride; | 218 ADD (dest, vx2, perm0) dest += stride; |
227 ADD (dest, vx3, perm1) dest += stride; | 219 ADD (dest, vx3, perm1) dest += stride; |