comparison postprocess_altivec_template.c @ 101:db57626d7d76 libpostproc

Remove declarations after statements from doVertLowPass_altivec
author lu_zero
date Sun, 23 Mar 2008 15:35:19 +0000
parents b944f0b99b23
children eeb4558c950a
comparison
equal deleted inserted replaced
100:b944f0b99b23 101:db57626d7d76
221 */ 221 */
222 uint8_t *src2 = src; 222 uint8_t *src2 = src;
223 const vector signed int zero = vec_splat_s32(0); 223 const vector signed int zero = vec_splat_s32(0);
224 const int properStride = (stride % 16); 224 const int properStride = (stride % 16);
225 const int srcAlign = ((unsigned long)src2 % 16); 225 const int srcAlign = ((unsigned long)src2 % 16);
226 DECLARE_ALIGNED(16, short, qp[8]); 226 DECLARE_ALIGNED(16, short, qp[8]) = {c->QP};
227 qp[0] = c->QP;
228 vector signed short vqp = vec_ld(0, qp); 227 vector signed short vqp = vec_ld(0, qp);
229 vqp = vec_splat(vqp, 0);
230
231 src2 += stride*3;
232
233 vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9; 228 vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9;
234 vector unsigned char vbA0, vbA1, vbA2, vbA3, vbA4, vbA5, vbA6, vbA7, vbA8, vbA9; 229 vector unsigned char vbA0, vbA1, vbA2, vbA3, vbA4, vbA5, vbA6, vbA7, vbA8, vbA9;
235 vector unsigned char vbB0, vbB1, vbB2, vbB3, vbB4, vbB5, vbB6, vbB7, vbB8, vbB9; 230 vector unsigned char vbB0, vbB1, vbB2, vbB3, vbB4, vbB5, vbB6, vbB7, vbB8, vbB9;
236 vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9; 231 vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9;
232 vector unsigned char perml0, perml1, perml2, perml3, perml4,
233 perml5, perml6, perml7, perml8, perml9;
234 register int j0 = 0,
235 j1 = stride,
236 j2 = 2 * stride,
237 j3 = 3 * stride,
238 j4 = 4 * stride,
239 j5 = 5 * stride,
240 j6 = 6 * stride,
241 j7 = 7 * stride,
242 j8 = 8 * stride,
243 j9 = 9 * stride;
244
245 vqp = vec_splat(vqp, 0);
246
247 src2 += stride*3;
237 248
238 #define LOAD_LINE(i) \ 249 #define LOAD_LINE(i) \
239 const vector unsigned char perml##i = \ 250 perml##i = vec_lvsl(i * stride, src2); \
240 vec_lvsl(i * stride, src2); \
241 vbA##i = vec_ld(i * stride, src2); \ 251 vbA##i = vec_ld(i * stride, src2); \
242 vbB##i = vec_ld(i * stride + 16, src2); \ 252 vbB##i = vec_ld(i * stride + 16, src2); \
243 vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \ 253 vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \
244 vb##i = \ 254 vb##i = \
245 (vector signed short)vec_mergeh((vector unsigned char)zero, \ 255 (vector signed short)vec_mergeh((vector unsigned char)zero, \
246 (vector unsigned char)vbT##i) 256 (vector unsigned char)vbT##i)
247 257
248 #define LOAD_LINE_ALIGNED(i) \ 258 #define LOAD_LINE_ALIGNED(i) \
249 register int j##i = i * stride; \
250 vbT##i = vec_ld(j##i, src2); \ 259 vbT##i = vec_ld(j##i, src2); \
251 vb##i = \ 260 vb##i = \
252 (vector signed short)vec_mergeh((vector signed char)zero, \ 261 (vector signed short)vec_mergeh((vector signed char)zero, \
253 (vector signed char)vbT##i) 262 (vector signed char)vbT##i)
254 263
255 /* Special-casing the aligned case is worthwhile, as all calls from 264 /* Special-casing the aligned case is worthwhile, as all calls from
256 * the (transposed) horizontable deblocks will be aligned, in addition 265 * the (transposed) horizontable deblocks will be aligned, in addition
257 * to the naturally aligned vertical deblocks. */ 266 * to the naturally aligned vertical deblocks. */
258 if (properStride && srcAlign) { 267 if (properStride && srcAlign) {
259 LOAD_LINE_ALIGNED(0); 268 LOAD_LINE_ALIGNED(0);
260 LOAD_LINE_ALIGNED(1); 269 LOAD_LINE_ALIGNED(1);
261 LOAD_LINE_ALIGNED(2); 270 LOAD_LINE_ALIGNED(2);
262 LOAD_LINE_ALIGNED(3); 271 LOAD_LINE_ALIGNED(3);
263 LOAD_LINE_ALIGNED(4); 272 LOAD_LINE_ALIGNED(4);
264 LOAD_LINE_ALIGNED(5); 273 LOAD_LINE_ALIGNED(5);
265 LOAD_LINE_ALIGNED(6); 274 LOAD_LINE_ALIGNED(6);
266 LOAD_LINE_ALIGNED(7); 275 LOAD_LINE_ALIGNED(7);
267 LOAD_LINE_ALIGNED(8); 276 LOAD_LINE_ALIGNED(8);
268 LOAD_LINE_ALIGNED(9); 277 LOAD_LINE_ALIGNED(9);
269 } else { 278 } else {
270 LOAD_LINE(0); 279 LOAD_LINE(0);
271 LOAD_LINE(1); 280 LOAD_LINE(1);
272 LOAD_LINE(2); 281 LOAD_LINE(2);
273 LOAD_LINE(3); 282 LOAD_LINE(3);
274 LOAD_LINE(4); 283 LOAD_LINE(4);
278 LOAD_LINE(8); 287 LOAD_LINE(8);
279 LOAD_LINE(9); 288 LOAD_LINE(9);
280 } 289 }
281 #undef LOAD_LINE 290 #undef LOAD_LINE
282 #undef LOAD_LINE_ALIGNED 291 #undef LOAD_LINE_ALIGNED
283 292 {
284 const vector unsigned short v_2 = vec_splat_u16(2); 293 const vector unsigned short v_2 = vec_splat_u16(2);
285 const vector unsigned short v_4 = vec_splat_u16(4); 294 const vector unsigned short v_4 = vec_splat_u16(4);
286 295
287 const vector signed short v_diff01 = vec_sub(vb0, vb1); 296 const vector signed short v_diff01 = vec_sub(vb0, vb1);
288 const vector unsigned short v_cmp01 = 297 const vector unsigned short v_cmp01 =
344 const vector signed char neg1 = vec_splat_s8(-1); 353 const vector signed char neg1 = vec_splat_s8(-1);
345 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 354 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
346 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); 355 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
347 356
348 #define PACK_AND_STORE(i) \ 357 #define PACK_AND_STORE(i) \
349 const vector unsigned char perms##i = \ 358 { const vector unsigned char perms##i = \
350 vec_lvsr(i * stride, src2); \ 359 vec_lvsr(i * stride, src2); \
351 const vector unsigned char vf##i = \ 360 const vector unsigned char vf##i = \
352 vec_packsu(vr##i, (vector signed short)zero); \ 361 vec_packsu(vr##i, (vector signed short)zero); \
353 const vector unsigned char vg##i = \ 362 const vector unsigned char vg##i = \
354 vec_perm(vf##i, vbT##i, permHH); \ 363 vec_perm(vf##i, vbT##i, permHH); \
359 const vector unsigned char svA##i = \ 368 const vector unsigned char svA##i = \
360 vec_sel(vbA##i, vg2##i, mask##i); \ 369 vec_sel(vbA##i, vg2##i, mask##i); \
361 const vector unsigned char svB##i = \ 370 const vector unsigned char svB##i = \
362 vec_sel(vg2##i, vbB##i, mask##i); \ 371 vec_sel(vg2##i, vbB##i, mask##i); \
363 vec_st(svA##i, i * stride, src2); \ 372 vec_st(svA##i, i * stride, src2); \
364 vec_st(svB##i, i * stride + 16, src2) 373 vec_st(svB##i, i * stride + 16, src2);}
365 374
366 #define PACK_AND_STORE_ALIGNED(i) \ 375 #define PACK_AND_STORE_ALIGNED(i) \
367 const vector unsigned char vf##i = \ 376 { const vector unsigned char vf##i = \
368 vec_packsu(vr##i, (vector signed short)zero); \ 377 vec_packsu(vr##i, (vector signed short)zero); \
369 const vector unsigned char vg##i = \ 378 const vector unsigned char vg##i = \
370 vec_perm(vf##i, vbT##i, permHH); \ 379 vec_perm(vf##i, vbT##i, permHH); \
371 vec_st(vg##i, i * stride, src2) 380 vec_st(vg##i, i * stride, src2);}
372 381
373 /* Special-casing the aligned case is worthwhile, as all calls from 382 /* Special-casing the aligned case is worthwhile, as all calls from
374 * the (transposed) horizontable deblocks will be aligned, in addition 383 * the (transposed) horizontable deblocks will be aligned, in addition
375 * to the naturally aligned vertical deblocks. */ 384 * to the naturally aligned vertical deblocks. */
376 if (properStride && srcAlign) { 385 if (properStride && srcAlign) {
377 PACK_AND_STORE_ALIGNED(1); 386 PACK_AND_STORE_ALIGNED(1)
378 PACK_AND_STORE_ALIGNED(2); 387 PACK_AND_STORE_ALIGNED(2)
379 PACK_AND_STORE_ALIGNED(3); 388 PACK_AND_STORE_ALIGNED(3)
380 PACK_AND_STORE_ALIGNED(4); 389 PACK_AND_STORE_ALIGNED(4)
381 PACK_AND_STORE_ALIGNED(5); 390 PACK_AND_STORE_ALIGNED(5)
382 PACK_AND_STORE_ALIGNED(6); 391 PACK_AND_STORE_ALIGNED(6)
383 PACK_AND_STORE_ALIGNED(7); 392 PACK_AND_STORE_ALIGNED(7)
384 PACK_AND_STORE_ALIGNED(8); 393 PACK_AND_STORE_ALIGNED(8)
385 } else { 394 } else {
386 PACK_AND_STORE(1); 395 PACK_AND_STORE(1)
387 PACK_AND_STORE(2); 396 PACK_AND_STORE(2)
388 PACK_AND_STORE(3); 397 PACK_AND_STORE(3)
389 PACK_AND_STORE(4); 398 PACK_AND_STORE(4)
390 PACK_AND_STORE(5); 399 PACK_AND_STORE(5)
391 PACK_AND_STORE(6); 400 PACK_AND_STORE(6)
392 PACK_AND_STORE(7); 401 PACK_AND_STORE(7)
393 PACK_AND_STORE(8); 402 PACK_AND_STORE(8)
394 } 403 }
395 #undef PACK_AND_STORE 404 #undef PACK_AND_STORE
396 #undef PACK_AND_STORE_ALIGNED 405 #undef PACK_AND_STORE_ALIGNED
406 }
397 } 407 }
398 408
399 409
400 410
401 static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c) { 411 static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c) {