comparison postprocess_altivec_template.c @ 103:53295ae2d45e libpostproc

Cosmetics: reindent and whitespaces
author lu_zero
date Sun, 23 Mar 2008 15:51:02 +0000
parents eeb4558c950a
children 883d75fc0712
comparison
equal deleted inserted replaced
102:eeb4558c950a 103:53295ae2d45e
101 v2QP = vec_splat(v_data, 2); 101 v2QP = vec_splat(v_data, 2);
102 v4QP = (vector unsigned short)vec_splat(v_data, 3); 102 v4QP = (vector unsigned short)vec_splat(v_data, 3);
103 103
104 src2 += stride * 4; 104 src2 += stride * 4;
105 105
106
107 #define LOAD_LINE(i) \ 106 #define LOAD_LINE(i) \
108 { \ 107 { \
109 vector unsigned char perm##i = vec_lvsl(j##i, src2); \ 108 vector unsigned char perm##i = vec_lvsl(j##i, src2); \
110 vector unsigned char v_srcA2##i; \ 109 vector unsigned char v_srcA2##i; \
111 vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \ 110 vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \
156 const vector signed short v_comp##i = \ 155 const vector signed short v_comp##i = \
157 (vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \ 156 (vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \
158 v_dcThreshold); \ 157 v_dcThreshold); \
159 const vector signed short v_part##i = vec_and(mask, v_comp##i); 158 const vector signed short v_part##i = vec_and(mask, v_comp##i);
160 159
161 { 160 {
162 ITER(0, 1) 161 ITER(0, 1)
163 ITER(1, 2) 162 ITER(1, 2)
164 ITER(2, 3) 163 ITER(2, 3)
165 ITER(3, 4) 164 ITER(3, 4)
166 ITER(4, 5) 165 ITER(4, 5)
167 ITER(5, 6) 166 ITER(5, 6)
168 ITER(6, 7) 167 ITER(6, 7)
169 168
170 v_numEq = vec_sum4s(v_part0, v_numEq); 169 v_numEq = vec_sum4s(v_part0, v_numEq);
171 v_numEq = vec_sum4s(v_part1, v_numEq); 170 v_numEq = vec_sum4s(v_part1, v_numEq);
172 v_numEq = vec_sum4s(v_part2, v_numEq); 171 v_numEq = vec_sum4s(v_part2, v_numEq);
173 v_numEq = vec_sum4s(v_part3, v_numEq); 172 v_numEq = vec_sum4s(v_part3, v_numEq);
174 v_numEq = vec_sum4s(v_part4, v_numEq); 173 v_numEq = vec_sum4s(v_part4, v_numEq);
175 v_numEq = vec_sum4s(v_part5, v_numEq); 174 v_numEq = vec_sum4s(v_part5, v_numEq);
176 v_numEq = vec_sum4s(v_part6, v_numEq); 175 v_numEq = vec_sum4s(v_part6, v_numEq);
177 } 176 }
178 177
179 #undef ITER 178 #undef ITER
180 179
181 v_numEq = vec_sums(v_numEq, zero); 180 v_numEq = vec_sums(v_numEq, zero);
182 181
284 LOAD_LINE(5); 283 LOAD_LINE(5);
285 LOAD_LINE(6); 284 LOAD_LINE(6);
286 LOAD_LINE(7); 285 LOAD_LINE(7);
287 LOAD_LINE(8); 286 LOAD_LINE(8);
288 LOAD_LINE(9); 287 LOAD_LINE(9);
289 } 288 }
290 #undef LOAD_LINE 289 #undef LOAD_LINE
291 #undef LOAD_LINE_ALIGNED 290 #undef LOAD_LINE_ALIGNED
292 { 291 {
293 const vector unsigned short v_2 = vec_splat_u16(2); 292 const vector unsigned short v_2 = vec_splat_u16(2);
294 const vector unsigned short v_4 = vec_splat_u16(4); 293 const vector unsigned short v_4 = vec_splat_u16(4);
295 294
296 const vector signed short v_diff01 = vec_sub(vb0, vb1); 295 const vector signed short v_diff01 = vec_sub(vb0, vb1);
297 const vector unsigned short v_cmp01 = 296 const vector unsigned short v_cmp01 =
298 (const vector unsigned short) vec_cmplt(vec_abs(v_diff01), vqp); 297 (const vector unsigned short) vec_cmplt(vec_abs(v_diff01), vqp);
299 const vector signed short v_first = vec_sel(vb1, vb0, v_cmp01); 298 const vector signed short v_first = vec_sel(vb1, vb0, v_cmp01);
300 const vector signed short v_diff89 = vec_sub(vb8, vb9); 299 const vector signed short v_diff89 = vec_sub(vb8, vb9);
301 const vector unsigned short v_cmp89 = 300 const vector unsigned short v_cmp89 =
302 (const vector unsigned short) vec_cmplt(vec_abs(v_diff89), vqp); 301 (const vector unsigned short) vec_cmplt(vec_abs(v_diff89), vqp);
303 const vector signed short v_last = vec_sel(vb8, vb9, v_cmp89); 302 const vector signed short v_last = vec_sel(vb8, vb9, v_cmp89);
304 303
305 const vector signed short temp01 = vec_mladd(v_first, (vector signed short)v_4, vb1); 304 const vector signed short temp01 = vec_mladd(v_first, (vector signed short)v_4, vb1);
306 const vector signed short temp02 = vec_add(vb2, vb3); 305 const vector signed short temp02 = vec_add(vb2, vb3);
307 const vector signed short temp03 = vec_add(temp01, (vector signed short)v_4); 306 const vector signed short temp03 = vec_add(temp01, (vector signed short)v_4);
308 const vector signed short v_sumsB0 = vec_add(temp02, temp03); 307 const vector signed short v_sumsB0 = vec_add(temp02, temp03);
309 308
310 const vector signed short temp11 = vec_sub(v_sumsB0, v_first); 309 const vector signed short temp11 = vec_sub(v_sumsB0, v_first);
311 const vector signed short v_sumsB1 = vec_add(temp11, vb4); 310 const vector signed short v_sumsB1 = vec_add(temp11, vb4);
312 311
313 const vector signed short temp21 = vec_sub(v_sumsB1, v_first); 312 const vector signed short temp21 = vec_sub(v_sumsB1, v_first);
314 const vector signed short v_sumsB2 = vec_add(temp21, vb5); 313 const vector signed short v_sumsB2 = vec_add(temp21, vb5);
315 314
316 const vector signed short temp31 = vec_sub(v_sumsB2, v_first); 315 const vector signed short temp31 = vec_sub(v_sumsB2, v_first);
317 const vector signed short v_sumsB3 = vec_add(temp31, vb6); 316 const vector signed short v_sumsB3 = vec_add(temp31, vb6);
318 317
319 const vector signed short temp41 = vec_sub(v_sumsB3, v_first); 318 const vector signed short temp41 = vec_sub(v_sumsB3, v_first);
320 const vector signed short v_sumsB4 = vec_add(temp41, vb7); 319 const vector signed short v_sumsB4 = vec_add(temp41, vb7);
321 320
322 const vector signed short temp51 = vec_sub(v_sumsB4, vb1); 321 const vector signed short temp51 = vec_sub(v_sumsB4, vb1);
323 const vector signed short v_sumsB5 = vec_add(temp51, vb8); 322 const vector signed short v_sumsB5 = vec_add(temp51, vb8);
324 323
325 const vector signed short temp61 = vec_sub(v_sumsB5, vb2); 324 const vector signed short temp61 = vec_sub(v_sumsB5, vb2);
326 const vector signed short v_sumsB6 = vec_add(temp61, v_last); 325 const vector signed short v_sumsB6 = vec_add(temp61, v_last);
327 326
328 const vector signed short temp71 = vec_sub(v_sumsB6, vb3); 327 const vector signed short temp71 = vec_sub(v_sumsB6, vb3);
329 const vector signed short v_sumsB7 = vec_add(temp71, v_last); 328 const vector signed short v_sumsB7 = vec_add(temp71, v_last);
330 329
331 const vector signed short temp81 = vec_sub(v_sumsB7, vb4); 330 const vector signed short temp81 = vec_sub(v_sumsB7, vb4);
332 const vector signed short v_sumsB8 = vec_add(temp81, v_last); 331 const vector signed short v_sumsB8 = vec_add(temp81, v_last);
333 332
334 const vector signed short temp91 = vec_sub(v_sumsB8, vb5); 333 const vector signed short temp91 = vec_sub(v_sumsB8, vb5);
335 const vector signed short v_sumsB9 = vec_add(temp91, v_last); 334 const vector signed short v_sumsB9 = vec_add(temp91, v_last);
336 335
337 #define COMPUTE_VR(i, j, k) \ 336 #define COMPUTE_VR(i, j, k) \
338 const vector signed short temps1##i = \ 337 const vector signed short temps1##i = \
339 vec_add(v_sumsB##i, v_sumsB##k); \ 338 vec_add(v_sumsB##i, v_sumsB##k); \
340 const vector signed short temps2##i = \ 339 const vector signed short temps2##i = \
341 vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \ 340 vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \
342 const vector signed short vr##j = vec_sra(temps2##i, v_4) 341 const vector signed short vr##j = vec_sra(temps2##i, v_4)
343 342
344 COMPUTE_VR(0, 1, 2); 343 COMPUTE_VR(0, 1, 2);
345 COMPUTE_VR(1, 2, 3); 344 COMPUTE_VR(1, 2, 3);
346 COMPUTE_VR(2, 3, 4); 345 COMPUTE_VR(2, 3, 4);
347 COMPUTE_VR(3, 4, 5); 346 COMPUTE_VR(3, 4, 5);
348 COMPUTE_VR(4, 5, 6); 347 COMPUTE_VR(4, 5, 6);
349 COMPUTE_VR(5, 6, 7); 348 COMPUTE_VR(5, 6, 7);
350 COMPUTE_VR(6, 7, 8); 349 COMPUTE_VR(6, 7, 8);
351 COMPUTE_VR(7, 8, 9); 350 COMPUTE_VR(7, 8, 9);
352 351
353 const vector signed char neg1 = vec_splat_s8(-1); 352 const vector signed char neg1 = vec_splat_s8(-1);
354 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 353 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
355 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); 354 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
356 355
357 #define PACK_AND_STORE(i) \ 356 #define PACK_AND_STORE(i) \
358 { const vector unsigned char perms##i = \ 357 { const vector unsigned char perms##i = \
359 vec_lvsr(i * stride, src2); \ 358 vec_lvsr(i * stride, src2); \
360 const vector unsigned char vf##i = \ 359 const vector unsigned char vf##i = \
377 vec_packsu(vr##i, (vector signed short)zero); \ 376 vec_packsu(vr##i, (vector signed short)zero); \
378 const vector unsigned char vg##i = \ 377 const vector unsigned char vg##i = \
379 vec_perm(vf##i, vbT##i, permHH); \ 378 vec_perm(vf##i, vbT##i, permHH); \
380 vec_st(vg##i, i * stride, src2);} 379 vec_st(vg##i, i * stride, src2);}
381 380
382 /* Special-casing the aligned case is worthwhile, as all calls from 381 /* Special-casing the aligned case is worthwhile, as all calls from
383 * the (transposed) horizontable deblocks will be aligned, in addition 382 * the (transposed) horizontable deblocks will be aligned, in addition
384 * to the naturally aligned vertical deblocks. */ 383 * to the naturally aligned vertical deblocks. */
385 if (properStride && srcAlign) { 384 if (properStride && srcAlign) {
386 PACK_AND_STORE_ALIGNED(1) 385 PACK_AND_STORE_ALIGNED(1)
387 PACK_AND_STORE_ALIGNED(2) 386 PACK_AND_STORE_ALIGNED(2)
388 PACK_AND_STORE_ALIGNED(3) 387 PACK_AND_STORE_ALIGNED(3)
389 PACK_AND_STORE_ALIGNED(4) 388 PACK_AND_STORE_ALIGNED(4)
390 PACK_AND_STORE_ALIGNED(5) 389 PACK_AND_STORE_ALIGNED(5)
391 PACK_AND_STORE_ALIGNED(6) 390 PACK_AND_STORE_ALIGNED(6)
392 PACK_AND_STORE_ALIGNED(7) 391 PACK_AND_STORE_ALIGNED(7)
393 PACK_AND_STORE_ALIGNED(8) 392 PACK_AND_STORE_ALIGNED(8)
394 } else { 393 } else {
395 PACK_AND_STORE(1) 394 PACK_AND_STORE(1)
396 PACK_AND_STORE(2) 395 PACK_AND_STORE(2)
397 PACK_AND_STORE(3) 396 PACK_AND_STORE(3)
398 PACK_AND_STORE(4) 397 PACK_AND_STORE(4)
399 PACK_AND_STORE(5) 398 PACK_AND_STORE(5)
400 PACK_AND_STORE(6) 399 PACK_AND_STORE(6)
401 PACK_AND_STORE(7) 400 PACK_AND_STORE(7)
402 PACK_AND_STORE(8) 401 PACK_AND_STORE(8)
402 }
403 #undef PACK_AND_STORE
404 #undef PACK_AND_STORE_ALIGNED
403 } 405 }
404 #undef PACK_AND_STORE
405 #undef PACK_AND_STORE_ALIGNED
406 }
407 } 406 }
408 407
409 408
410 409
411 static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c) { 410 static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c) {