comparison ppc/idct_altivec.c @ 8494:1615d6b75ada libavcodec

Cleanup _t types in libavcodec/ppc
author lu_zero
date Sat, 27 Dec 2008 11:21:28 +0000
parents 266d4949aa15
children 7a463923ecd1
comparison
equal deleted inserted replaced
8493:469f3e5bcf13 8494:1615d6b75ada
38 #include <stdlib.h> /* malloc(), free() */ 38 #include <stdlib.h> /* malloc(), free() */
39 #include <string.h> 39 #include <string.h>
40 #include "libavcodec/dsputil.h" 40 #include "libavcodec/dsputil.h"
41 41
42 #include "gcc_fixes.h" 42 #include "gcc_fixes.h"
43 43 #include "types_altivec.h"
44 #include "dsputil_ppc.h" 44 #include "dsputil_ppc.h"
45
46 #define vector_s16_t vector signed short
47 #define const_vector_s16_t const vector signed short
48 #define vector_u16_t vector unsigned short
49 #define vector_s8_t vector signed char
50 #define vector_u8_t vector unsigned char
51 #define vector_s32_t vector signed int
52 #define vector_u32_t vector unsigned int
53 45
54 #define IDCT_HALF \ 46 #define IDCT_HALF \
55 /* 1st stage */ \ 47 /* 1st stage */ \
56 t1 = vec_mradds (a1, vx7, vx1 ); \ 48 t1 = vec_mradds (a1, vx7, vx1 ); \
57 t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ 49 t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \
86 vy3 = vec_adds (t2, t6); \ 78 vy3 = vec_adds (t2, t6); \
87 vy4 = vec_subs (t2, t6); 79 vy4 = vec_subs (t2, t6);
88 80
89 81
90 #define IDCT \ 82 #define IDCT \
91 vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ 83 vec_s16 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
92 vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ 84 vec_s16 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
93 vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \ 85 vec_s16 a0, a1, a2, ma2, c4, mc4, zero, bias; \
94 vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \ 86 vec_s16 t0, t1, t2, t3, t4, t5, t6, t7, t8; \
95 vector_u16_t shift; \ 87 vec_u16 shift; \
96 \ 88 \
97 c4 = vec_splat (constants[0], 0); \ 89 c4 = vec_splat (constants[0], 0); \
98 a0 = vec_splat (constants[0], 1); \ 90 a0 = vec_splat (constants[0], 1); \
99 a1 = vec_splat (constants[0], 2); \ 91 a1 = vec_splat (constants[0], 2); \
100 a2 = vec_splat (constants[0], 3); \ 92 a2 = vec_splat (constants[0], 3); \
101 mc4 = vec_splat (constants[0], 4); \ 93 mc4 = vec_splat (constants[0], 4); \
102 ma2 = vec_splat (constants[0], 5); \ 94 ma2 = vec_splat (constants[0], 5); \
103 bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \ 95 bias = (vec_s16)vec_splat ((vec_s32)constants[0], 3); \
104 \ 96 \
105 zero = vec_splat_s16 (0); \ 97 zero = vec_splat_s16 (0); \
106 shift = vec_splat_u16 (4); \ 98 shift = vec_splat_u16 (4); \
107 \ 99 \
108 vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \ 100 vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \
154 vx5 = vec_sra (vy5, shift); \ 146 vx5 = vec_sra (vy5, shift); \
155 vx6 = vec_sra (vy6, shift); \ 147 vx6 = vec_sra (vy6, shift); \
156 vx7 = vec_sra (vy7, shift); 148 vx7 = vec_sra (vy7, shift);
157 149
158 150
159 static const_vector_s16_t constants[5] = { 151 static const vec_s16 constants[5] = {
160 {23170, 13573, 6518, 21895, -23170, -21895, 32, 31}, 152 {23170, 13573, 6518, 21895, -23170, -21895, 32, 31},
161 {16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725}, 153 {16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725},
162 {22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521}, 154 {22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521},
163 {21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692}, 155 {21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692},
164 {19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722} 156 {19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722}
165 }; 157 };
166 158
167 void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block) 159 void idct_put_altivec(uint8_t* dest, int stride, vec_s16* block)
168 { 160 {
169 POWERPC_PERF_DECLARE(altivec_idct_put_num, 1); 161 POWERPC_PERF_DECLARE(altivec_idct_put_num, 1);
170 vector_u8_t tmp; 162 vec_u8 tmp;
171 163
172 #ifdef CONFIG_POWERPC_PERF 164 #ifdef CONFIG_POWERPC_PERF
173 POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1); 165 POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
174 #endif 166 #endif
175 IDCT 167 IDCT
176 168
177 #define COPY(dest,src) \ 169 #define COPY(dest,src) \
178 tmp = vec_packsu (src, src); \ 170 tmp = vec_packsu (src, src); \
179 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ 171 vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest); \
180 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); 172 vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest);
181 173
182 COPY (dest, vx0) dest += stride; 174 COPY (dest, vx0) dest += stride;
183 COPY (dest, vx1) dest += stride; 175 COPY (dest, vx1) dest += stride;
184 COPY (dest, vx2) dest += stride; 176 COPY (dest, vx2) dest += stride;
185 COPY (dest, vx3) dest += stride; 177 COPY (dest, vx3) dest += stride;
189 COPY (dest, vx7) 181 COPY (dest, vx7)
190 182
191 POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1); 183 POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
192 } 184 }
193 185
194 void idct_add_altivec(uint8_t* dest, int stride, vector_s16_t* block) 186 void idct_add_altivec(uint8_t* dest, int stride, vec_s16* block)
195 { 187 {
196 POWERPC_PERF_DECLARE(altivec_idct_add_num, 1); 188 POWERPC_PERF_DECLARE(altivec_idct_add_num, 1);
197 vector_u8_t tmp; 189 vec_u8 tmp;
198 vector_s16_t tmp2, tmp3; 190 vec_s16 tmp2, tmp3;
199 vector_u8_t perm0; 191 vec_u8 perm0;
200 vector_u8_t perm1; 192 vec_u8 perm1;
201 vector_u8_t p0, p1, p; 193 vec_u8 p0, p1, p;
202 194
203 #ifdef CONFIG_POWERPC_PERF 195 #ifdef CONFIG_POWERPC_PERF
204 POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1); 196 POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
205 #endif 197 #endif
206 198
213 perm1 = vec_mergeh (p, p1); 205 perm1 = vec_mergeh (p, p1);
214 206
215 #define ADD(dest,src,perm) \ 207 #define ADD(dest,src,perm) \
216 /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ 208 /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \
217 tmp = vec_ld (0, dest); \ 209 tmp = vec_ld (0, dest); \
218 tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \ 210 tmp2 = (vec_s16)vec_perm (tmp, (vec_u8)zero, perm); \
219 tmp3 = vec_adds (tmp2, src); \ 211 tmp3 = vec_adds (tmp2, src); \
220 tmp = vec_packsu (tmp3, tmp3); \ 212 tmp = vec_packsu (tmp3, tmp3); \
221 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ 213 vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest); \
222 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); 214 vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest);
223 215
224 ADD (dest, vx0, perm0) dest += stride; 216 ADD (dest, vx0, perm0) dest += stride;
225 ADD (dest, vx1, perm1) dest += stride; 217 ADD (dest, vx1, perm1) dest += stride;
226 ADD (dest, vx2, perm0) dest += stride; 218 ADD (dest, vx2, perm0) dest += stride;
227 ADD (dest, vx3, perm1) dest += stride; 219 ADD (dest, vx3, perm1) dest += stride;