Mercurial > libpostproc.hg
comparison postprocess_altivec_template.c @ 103:53295ae2d45e libpostproc
Cosmetics: reindent and whitespaces
author | lu_zero |
---|---|
date | Sun, 23 Mar 2008 15:51:02 +0000 |
parents | eeb4558c950a |
children | 883d75fc0712 |
comparison
equal
deleted
inserted
replaced
102:eeb4558c950a | 103:53295ae2d45e |
---|---|
101 v2QP = vec_splat(v_data, 2); | 101 v2QP = vec_splat(v_data, 2); |
102 v4QP = (vector unsigned short)vec_splat(v_data, 3); | 102 v4QP = (vector unsigned short)vec_splat(v_data, 3); |
103 | 103 |
104 src2 += stride * 4; | 104 src2 += stride * 4; |
105 | 105 |
106 | |
107 #define LOAD_LINE(i) \ | 106 #define LOAD_LINE(i) \ |
108 { \ | 107 { \ |
109 vector unsigned char perm##i = vec_lvsl(j##i, src2); \ | 108 vector unsigned char perm##i = vec_lvsl(j##i, src2); \ |
110 vector unsigned char v_srcA2##i; \ | 109 vector unsigned char v_srcA2##i; \ |
111 vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \ | 110 vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \ |
156 const vector signed short v_comp##i = \ | 155 const vector signed short v_comp##i = \ |
157 (vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \ | 156 (vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \ |
158 v_dcThreshold); \ | 157 v_dcThreshold); \ |
159 const vector signed short v_part##i = vec_and(mask, v_comp##i); | 158 const vector signed short v_part##i = vec_and(mask, v_comp##i); |
160 | 159 |
161 { | 160 { |
162 ITER(0, 1) | 161 ITER(0, 1) |
163 ITER(1, 2) | 162 ITER(1, 2) |
164 ITER(2, 3) | 163 ITER(2, 3) |
165 ITER(3, 4) | 164 ITER(3, 4) |
166 ITER(4, 5) | 165 ITER(4, 5) |
167 ITER(5, 6) | 166 ITER(5, 6) |
168 ITER(6, 7) | 167 ITER(6, 7) |
169 | 168 |
170 v_numEq = vec_sum4s(v_part0, v_numEq); | 169 v_numEq = vec_sum4s(v_part0, v_numEq); |
171 v_numEq = vec_sum4s(v_part1, v_numEq); | 170 v_numEq = vec_sum4s(v_part1, v_numEq); |
172 v_numEq = vec_sum4s(v_part2, v_numEq); | 171 v_numEq = vec_sum4s(v_part2, v_numEq); |
173 v_numEq = vec_sum4s(v_part3, v_numEq); | 172 v_numEq = vec_sum4s(v_part3, v_numEq); |
174 v_numEq = vec_sum4s(v_part4, v_numEq); | 173 v_numEq = vec_sum4s(v_part4, v_numEq); |
175 v_numEq = vec_sum4s(v_part5, v_numEq); | 174 v_numEq = vec_sum4s(v_part5, v_numEq); |
176 v_numEq = vec_sum4s(v_part6, v_numEq); | 175 v_numEq = vec_sum4s(v_part6, v_numEq); |
177 } | 176 } |
178 | 177 |
179 #undef ITER | 178 #undef ITER |
180 | 179 |
181 v_numEq = vec_sums(v_numEq, zero); | 180 v_numEq = vec_sums(v_numEq, zero); |
182 | 181 |
284 LOAD_LINE(5); | 283 LOAD_LINE(5); |
285 LOAD_LINE(6); | 284 LOAD_LINE(6); |
286 LOAD_LINE(7); | 285 LOAD_LINE(7); |
287 LOAD_LINE(8); | 286 LOAD_LINE(8); |
288 LOAD_LINE(9); | 287 LOAD_LINE(9); |
289 } | 288 } |
290 #undef LOAD_LINE | 289 #undef LOAD_LINE |
291 #undef LOAD_LINE_ALIGNED | 290 #undef LOAD_LINE_ALIGNED |
292 { | 291 { |
293 const vector unsigned short v_2 = vec_splat_u16(2); | 292 const vector unsigned short v_2 = vec_splat_u16(2); |
294 const vector unsigned short v_4 = vec_splat_u16(4); | 293 const vector unsigned short v_4 = vec_splat_u16(4); |
295 | 294 |
296 const vector signed short v_diff01 = vec_sub(vb0, vb1); | 295 const vector signed short v_diff01 = vec_sub(vb0, vb1); |
297 const vector unsigned short v_cmp01 = | 296 const vector unsigned short v_cmp01 = |
298 (const vector unsigned short) vec_cmplt(vec_abs(v_diff01), vqp); | 297 (const vector unsigned short) vec_cmplt(vec_abs(v_diff01), vqp); |
299 const vector signed short v_first = vec_sel(vb1, vb0, v_cmp01); | 298 const vector signed short v_first = vec_sel(vb1, vb0, v_cmp01); |
300 const vector signed short v_diff89 = vec_sub(vb8, vb9); | 299 const vector signed short v_diff89 = vec_sub(vb8, vb9); |
301 const vector unsigned short v_cmp89 = | 300 const vector unsigned short v_cmp89 = |
302 (const vector unsigned short) vec_cmplt(vec_abs(v_diff89), vqp); | 301 (const vector unsigned short) vec_cmplt(vec_abs(v_diff89), vqp); |
303 const vector signed short v_last = vec_sel(vb8, vb9, v_cmp89); | 302 const vector signed short v_last = vec_sel(vb8, vb9, v_cmp89); |
304 | 303 |
305 const vector signed short temp01 = vec_mladd(v_first, (vector signed short)v_4, vb1); | 304 const vector signed short temp01 = vec_mladd(v_first, (vector signed short)v_4, vb1); |
306 const vector signed short temp02 = vec_add(vb2, vb3); | 305 const vector signed short temp02 = vec_add(vb2, vb3); |
307 const vector signed short temp03 = vec_add(temp01, (vector signed short)v_4); | 306 const vector signed short temp03 = vec_add(temp01, (vector signed short)v_4); |
308 const vector signed short v_sumsB0 = vec_add(temp02, temp03); | 307 const vector signed short v_sumsB0 = vec_add(temp02, temp03); |
309 | 308 |
310 const vector signed short temp11 = vec_sub(v_sumsB0, v_first); | 309 const vector signed short temp11 = vec_sub(v_sumsB0, v_first); |
311 const vector signed short v_sumsB1 = vec_add(temp11, vb4); | 310 const vector signed short v_sumsB1 = vec_add(temp11, vb4); |
312 | 311 |
313 const vector signed short temp21 = vec_sub(v_sumsB1, v_first); | 312 const vector signed short temp21 = vec_sub(v_sumsB1, v_first); |
314 const vector signed short v_sumsB2 = vec_add(temp21, vb5); | 313 const vector signed short v_sumsB2 = vec_add(temp21, vb5); |
315 | 314 |
316 const vector signed short temp31 = vec_sub(v_sumsB2, v_first); | 315 const vector signed short temp31 = vec_sub(v_sumsB2, v_first); |
317 const vector signed short v_sumsB3 = vec_add(temp31, vb6); | 316 const vector signed short v_sumsB3 = vec_add(temp31, vb6); |
318 | 317 |
319 const vector signed short temp41 = vec_sub(v_sumsB3, v_first); | 318 const vector signed short temp41 = vec_sub(v_sumsB3, v_first); |
320 const vector signed short v_sumsB4 = vec_add(temp41, vb7); | 319 const vector signed short v_sumsB4 = vec_add(temp41, vb7); |
321 | 320 |
322 const vector signed short temp51 = vec_sub(v_sumsB4, vb1); | 321 const vector signed short temp51 = vec_sub(v_sumsB4, vb1); |
323 const vector signed short v_sumsB5 = vec_add(temp51, vb8); | 322 const vector signed short v_sumsB5 = vec_add(temp51, vb8); |
324 | 323 |
325 const vector signed short temp61 = vec_sub(v_sumsB5, vb2); | 324 const vector signed short temp61 = vec_sub(v_sumsB5, vb2); |
326 const vector signed short v_sumsB6 = vec_add(temp61, v_last); | 325 const vector signed short v_sumsB6 = vec_add(temp61, v_last); |
327 | 326 |
328 const vector signed short temp71 = vec_sub(v_sumsB6, vb3); | 327 const vector signed short temp71 = vec_sub(v_sumsB6, vb3); |
329 const vector signed short v_sumsB7 = vec_add(temp71, v_last); | 328 const vector signed short v_sumsB7 = vec_add(temp71, v_last); |
330 | 329 |
331 const vector signed short temp81 = vec_sub(v_sumsB7, vb4); | 330 const vector signed short temp81 = vec_sub(v_sumsB7, vb4); |
332 const vector signed short v_sumsB8 = vec_add(temp81, v_last); | 331 const vector signed short v_sumsB8 = vec_add(temp81, v_last); |
333 | 332 |
334 const vector signed short temp91 = vec_sub(v_sumsB8, vb5); | 333 const vector signed short temp91 = vec_sub(v_sumsB8, vb5); |
335 const vector signed short v_sumsB9 = vec_add(temp91, v_last); | 334 const vector signed short v_sumsB9 = vec_add(temp91, v_last); |
336 | 335 |
337 #define COMPUTE_VR(i, j, k) \ | 336 #define COMPUTE_VR(i, j, k) \ |
338 const vector signed short temps1##i = \ | 337 const vector signed short temps1##i = \ |
339 vec_add(v_sumsB##i, v_sumsB##k); \ | 338 vec_add(v_sumsB##i, v_sumsB##k); \ |
340 const vector signed short temps2##i = \ | 339 const vector signed short temps2##i = \ |
341 vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \ | 340 vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \ |
342 const vector signed short vr##j = vec_sra(temps2##i, v_4) | 341 const vector signed short vr##j = vec_sra(temps2##i, v_4) |
343 | 342 |
344 COMPUTE_VR(0, 1, 2); | 343 COMPUTE_VR(0, 1, 2); |
345 COMPUTE_VR(1, 2, 3); | 344 COMPUTE_VR(1, 2, 3); |
346 COMPUTE_VR(2, 3, 4); | 345 COMPUTE_VR(2, 3, 4); |
347 COMPUTE_VR(3, 4, 5); | 346 COMPUTE_VR(3, 4, 5); |
348 COMPUTE_VR(4, 5, 6); | 347 COMPUTE_VR(4, 5, 6); |
349 COMPUTE_VR(5, 6, 7); | 348 COMPUTE_VR(5, 6, 7); |
350 COMPUTE_VR(6, 7, 8); | 349 COMPUTE_VR(6, 7, 8); |
351 COMPUTE_VR(7, 8, 9); | 350 COMPUTE_VR(7, 8, 9); |
352 | 351 |
353 const vector signed char neg1 = vec_splat_s8(-1); | 352 const vector signed char neg1 = vec_splat_s8(-1); |
354 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | 353 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
355 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | 354 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); |
356 | 355 |
357 #define PACK_AND_STORE(i) \ | 356 #define PACK_AND_STORE(i) \ |
358 { const vector unsigned char perms##i = \ | 357 { const vector unsigned char perms##i = \ |
359 vec_lvsr(i * stride, src2); \ | 358 vec_lvsr(i * stride, src2); \ |
360 const vector unsigned char vf##i = \ | 359 const vector unsigned char vf##i = \ |
377 vec_packsu(vr##i, (vector signed short)zero); \ | 376 vec_packsu(vr##i, (vector signed short)zero); \ |
378 const vector unsigned char vg##i = \ | 377 const vector unsigned char vg##i = \ |
379 vec_perm(vf##i, vbT##i, permHH); \ | 378 vec_perm(vf##i, vbT##i, permHH); \ |
380 vec_st(vg##i, i * stride, src2);} | 379 vec_st(vg##i, i * stride, src2);} |
381 | 380 |
382 /* Special-casing the aligned case is worthwhile, as all calls from | 381 /* Special-casing the aligned case is worthwhile, as all calls from |
383 * the (transposed) horizontable deblocks will be aligned, in addition | 382 * the (transposed) horizontable deblocks will be aligned, in addition |
384 * to the naturally aligned vertical deblocks. */ | 383 * to the naturally aligned vertical deblocks. */ |
385 if (properStride && srcAlign) { | 384 if (properStride && srcAlign) { |
386 PACK_AND_STORE_ALIGNED(1) | 385 PACK_AND_STORE_ALIGNED(1) |
387 PACK_AND_STORE_ALIGNED(2) | 386 PACK_AND_STORE_ALIGNED(2) |
388 PACK_AND_STORE_ALIGNED(3) | 387 PACK_AND_STORE_ALIGNED(3) |
389 PACK_AND_STORE_ALIGNED(4) | 388 PACK_AND_STORE_ALIGNED(4) |
390 PACK_AND_STORE_ALIGNED(5) | 389 PACK_AND_STORE_ALIGNED(5) |
391 PACK_AND_STORE_ALIGNED(6) | 390 PACK_AND_STORE_ALIGNED(6) |
392 PACK_AND_STORE_ALIGNED(7) | 391 PACK_AND_STORE_ALIGNED(7) |
393 PACK_AND_STORE_ALIGNED(8) | 392 PACK_AND_STORE_ALIGNED(8) |
394 } else { | 393 } else { |
395 PACK_AND_STORE(1) | 394 PACK_AND_STORE(1) |
396 PACK_AND_STORE(2) | 395 PACK_AND_STORE(2) |
397 PACK_AND_STORE(3) | 396 PACK_AND_STORE(3) |
398 PACK_AND_STORE(4) | 397 PACK_AND_STORE(4) |
399 PACK_AND_STORE(5) | 398 PACK_AND_STORE(5) |
400 PACK_AND_STORE(6) | 399 PACK_AND_STORE(6) |
401 PACK_AND_STORE(7) | 400 PACK_AND_STORE(7) |
402 PACK_AND_STORE(8) | 401 PACK_AND_STORE(8) |
402 } | |
403 #undef PACK_AND_STORE | |
404 #undef PACK_AND_STORE_ALIGNED | |
403 } | 405 } |
404 #undef PACK_AND_STORE | |
405 #undef PACK_AND_STORE_ALIGNED | |
406 } | |
407 } | 406 } |
408 | 407 |
409 | 408 |
410 | 409 |
411 static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c) { | 410 static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c) { |