Mercurial > libpostproc.hg
comparison postprocess_altivec_template.c @ 101:db57626d7d76 libpostproc
Remove declarations after statements from doVertLowPass_altivec
author | lu_zero |
---|---|
date | Sun, 23 Mar 2008 15:35:19 +0000 |
parents | b944f0b99b23 |
children | eeb4558c950a |
comparison
equal
deleted
inserted
replaced
100:b944f0b99b23 | 101:db57626d7d76 |
---|---|
221 */ | 221 */ |
222 uint8_t *src2 = src; | 222 uint8_t *src2 = src; |
223 const vector signed int zero = vec_splat_s32(0); | 223 const vector signed int zero = vec_splat_s32(0); |
224 const int properStride = (stride % 16); | 224 const int properStride = (stride % 16); |
225 const int srcAlign = ((unsigned long)src2 % 16); | 225 const int srcAlign = ((unsigned long)src2 % 16); |
226 DECLARE_ALIGNED(16, short, qp[8]); | 226 DECLARE_ALIGNED(16, short, qp[8]) = {c->QP}; |
227 qp[0] = c->QP; | |
228 vector signed short vqp = vec_ld(0, qp); | 227 vector signed short vqp = vec_ld(0, qp); |
229 vqp = vec_splat(vqp, 0); | |
230 | |
231 src2 += stride*3; | |
232 | |
233 vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9; | 228 vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9; |
234 vector unsigned char vbA0, vbA1, vbA2, vbA3, vbA4, vbA5, vbA6, vbA7, vbA8, vbA9; | 229 vector unsigned char vbA0, vbA1, vbA2, vbA3, vbA4, vbA5, vbA6, vbA7, vbA8, vbA9; |
235 vector unsigned char vbB0, vbB1, vbB2, vbB3, vbB4, vbB5, vbB6, vbB7, vbB8, vbB9; | 230 vector unsigned char vbB0, vbB1, vbB2, vbB3, vbB4, vbB5, vbB6, vbB7, vbB8, vbB9; |
236 vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9; | 231 vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9; |
232 vector unsigned char perml0, perml1, perml2, perml3, perml4, | |
233 perml5, perml6, perml7, perml8, perml9; | |
234 register int j0 = 0, | |
235 j1 = stride, | |
236 j2 = 2 * stride, | |
237 j3 = 3 * stride, | |
238 j4 = 4 * stride, | |
239 j5 = 5 * stride, | |
240 j6 = 6 * stride, | |
241 j7 = 7 * stride, | |
242 j8 = 8 * stride, | |
243 j9 = 9 * stride; | |
244 | |
245 vqp = vec_splat(vqp, 0); | |
246 | |
247 src2 += stride*3; | |
237 | 248 |
238 #define LOAD_LINE(i) \ | 249 #define LOAD_LINE(i) \ |
239 const vector unsigned char perml##i = \ | 250 perml##i = vec_lvsl(i * stride, src2); \ |
240 vec_lvsl(i * stride, src2); \ | |
241 vbA##i = vec_ld(i * stride, src2); \ | 251 vbA##i = vec_ld(i * stride, src2); \ |
242 vbB##i = vec_ld(i * stride + 16, src2); \ | 252 vbB##i = vec_ld(i * stride + 16, src2); \ |
243 vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \ | 253 vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \ |
244 vb##i = \ | 254 vb##i = \ |
245 (vector signed short)vec_mergeh((vector unsigned char)zero, \ | 255 (vector signed short)vec_mergeh((vector unsigned char)zero, \ |
246 (vector unsigned char)vbT##i) | 256 (vector unsigned char)vbT##i) |
247 | 257 |
248 #define LOAD_LINE_ALIGNED(i) \ | 258 #define LOAD_LINE_ALIGNED(i) \ |
249 register int j##i = i * stride; \ | |
250 vbT##i = vec_ld(j##i, src2); \ | 259 vbT##i = vec_ld(j##i, src2); \ |
251 vb##i = \ | 260 vb##i = \ |
252 (vector signed short)vec_mergeh((vector signed char)zero, \ | 261 (vector signed short)vec_mergeh((vector signed char)zero, \ |
253 (vector signed char)vbT##i) | 262 (vector signed char)vbT##i) |
254 | 263 |
255 /* Special-casing the aligned case is worthwhile, as all calls from | 264 /* Special-casing the aligned case is worthwhile, as all calls from |
256 * the (transposed) horizontable deblocks will be aligned, in addition | 265 * the (transposed) horizontable deblocks will be aligned, in addition |
257 * to the naturally aligned vertical deblocks. */ | 266 * to the naturally aligned vertical deblocks. */ |
258 if (properStride && srcAlign) { | 267 if (properStride && srcAlign) { |
259 LOAD_LINE_ALIGNED(0); | 268 LOAD_LINE_ALIGNED(0); |
260 LOAD_LINE_ALIGNED(1); | 269 LOAD_LINE_ALIGNED(1); |
261 LOAD_LINE_ALIGNED(2); | 270 LOAD_LINE_ALIGNED(2); |
262 LOAD_LINE_ALIGNED(3); | 271 LOAD_LINE_ALIGNED(3); |
263 LOAD_LINE_ALIGNED(4); | 272 LOAD_LINE_ALIGNED(4); |
264 LOAD_LINE_ALIGNED(5); | 273 LOAD_LINE_ALIGNED(5); |
265 LOAD_LINE_ALIGNED(6); | 274 LOAD_LINE_ALIGNED(6); |
266 LOAD_LINE_ALIGNED(7); | 275 LOAD_LINE_ALIGNED(7); |
267 LOAD_LINE_ALIGNED(8); | 276 LOAD_LINE_ALIGNED(8); |
268 LOAD_LINE_ALIGNED(9); | 277 LOAD_LINE_ALIGNED(9); |
269 } else { | 278 } else { |
270 LOAD_LINE(0); | 279 LOAD_LINE(0); |
271 LOAD_LINE(1); | 280 LOAD_LINE(1); |
272 LOAD_LINE(2); | 281 LOAD_LINE(2); |
273 LOAD_LINE(3); | 282 LOAD_LINE(3); |
274 LOAD_LINE(4); | 283 LOAD_LINE(4); |
278 LOAD_LINE(8); | 287 LOAD_LINE(8); |
279 LOAD_LINE(9); | 288 LOAD_LINE(9); |
280 } | 289 } |
281 #undef LOAD_LINE | 290 #undef LOAD_LINE |
282 #undef LOAD_LINE_ALIGNED | 291 #undef LOAD_LINE_ALIGNED |
283 | 292 { |
284 const vector unsigned short v_2 = vec_splat_u16(2); | 293 const vector unsigned short v_2 = vec_splat_u16(2); |
285 const vector unsigned short v_4 = vec_splat_u16(4); | 294 const vector unsigned short v_4 = vec_splat_u16(4); |
286 | 295 |
287 const vector signed short v_diff01 = vec_sub(vb0, vb1); | 296 const vector signed short v_diff01 = vec_sub(vb0, vb1); |
288 const vector unsigned short v_cmp01 = | 297 const vector unsigned short v_cmp01 = |
344 const vector signed char neg1 = vec_splat_s8(-1); | 353 const vector signed char neg1 = vec_splat_s8(-1); |
345 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | 354 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
346 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | 355 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); |
347 | 356 |
348 #define PACK_AND_STORE(i) \ | 357 #define PACK_AND_STORE(i) \ |
349 const vector unsigned char perms##i = \ | 358 { const vector unsigned char perms##i = \ |
350 vec_lvsr(i * stride, src2); \ | 359 vec_lvsr(i * stride, src2); \ |
351 const vector unsigned char vf##i = \ | 360 const vector unsigned char vf##i = \ |
352 vec_packsu(vr##i, (vector signed short)zero); \ | 361 vec_packsu(vr##i, (vector signed short)zero); \ |
353 const vector unsigned char vg##i = \ | 362 const vector unsigned char vg##i = \ |
354 vec_perm(vf##i, vbT##i, permHH); \ | 363 vec_perm(vf##i, vbT##i, permHH); \ |
359 const vector unsigned char svA##i = \ | 368 const vector unsigned char svA##i = \ |
360 vec_sel(vbA##i, vg2##i, mask##i); \ | 369 vec_sel(vbA##i, vg2##i, mask##i); \ |
361 const vector unsigned char svB##i = \ | 370 const vector unsigned char svB##i = \ |
362 vec_sel(vg2##i, vbB##i, mask##i); \ | 371 vec_sel(vg2##i, vbB##i, mask##i); \ |
363 vec_st(svA##i, i * stride, src2); \ | 372 vec_st(svA##i, i * stride, src2); \ |
364 vec_st(svB##i, i * stride + 16, src2) | 373 vec_st(svB##i, i * stride + 16, src2);} |
365 | 374 |
366 #define PACK_AND_STORE_ALIGNED(i) \ | 375 #define PACK_AND_STORE_ALIGNED(i) \ |
367 const vector unsigned char vf##i = \ | 376 { const vector unsigned char vf##i = \ |
368 vec_packsu(vr##i, (vector signed short)zero); \ | 377 vec_packsu(vr##i, (vector signed short)zero); \ |
369 const vector unsigned char vg##i = \ | 378 const vector unsigned char vg##i = \ |
370 vec_perm(vf##i, vbT##i, permHH); \ | 379 vec_perm(vf##i, vbT##i, permHH); \ |
371 vec_st(vg##i, i * stride, src2) | 380 vec_st(vg##i, i * stride, src2);} |
372 | 381 |
373 /* Special-casing the aligned case is worthwhile, as all calls from | 382 /* Special-casing the aligned case is worthwhile, as all calls from |
374 * the (transposed) horizontable deblocks will be aligned, in addition | 383 * the (transposed) horizontable deblocks will be aligned, in addition |
375 * to the naturally aligned vertical deblocks. */ | 384 * to the naturally aligned vertical deblocks. */ |
376 if (properStride && srcAlign) { | 385 if (properStride && srcAlign) { |
377 PACK_AND_STORE_ALIGNED(1); | 386 PACK_AND_STORE_ALIGNED(1) |
378 PACK_AND_STORE_ALIGNED(2); | 387 PACK_AND_STORE_ALIGNED(2) |
379 PACK_AND_STORE_ALIGNED(3); | 388 PACK_AND_STORE_ALIGNED(3) |
380 PACK_AND_STORE_ALIGNED(4); | 389 PACK_AND_STORE_ALIGNED(4) |
381 PACK_AND_STORE_ALIGNED(5); | 390 PACK_AND_STORE_ALIGNED(5) |
382 PACK_AND_STORE_ALIGNED(6); | 391 PACK_AND_STORE_ALIGNED(6) |
383 PACK_AND_STORE_ALIGNED(7); | 392 PACK_AND_STORE_ALIGNED(7) |
384 PACK_AND_STORE_ALIGNED(8); | 393 PACK_AND_STORE_ALIGNED(8) |
385 } else { | 394 } else { |
386 PACK_AND_STORE(1); | 395 PACK_AND_STORE(1) |
387 PACK_AND_STORE(2); | 396 PACK_AND_STORE(2) |
388 PACK_AND_STORE(3); | 397 PACK_AND_STORE(3) |
389 PACK_AND_STORE(4); | 398 PACK_AND_STORE(4) |
390 PACK_AND_STORE(5); | 399 PACK_AND_STORE(5) |
391 PACK_AND_STORE(6); | 400 PACK_AND_STORE(6) |
392 PACK_AND_STORE(7); | 401 PACK_AND_STORE(7) |
393 PACK_AND_STORE(8); | 402 PACK_AND_STORE(8) |
394 } | 403 } |
395 #undef PACK_AND_STORE | 404 #undef PACK_AND_STORE |
396 #undef PACK_AND_STORE_ALIGNED | 405 #undef PACK_AND_STORE_ALIGNED |
406 } | |
397 } | 407 } |
398 | 408 |
399 | 409 |
400 | 410 |
401 static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c) { | 411 static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c) { |