Mercurial > libpostproc.hg
comparison postprocess_altivec_template.c @ 63:650554bacd12 libpostproc
spelling
author | vitor |
---|---|
date | Sat, 01 Dec 2007 22:21:04 +0000 |
parents | f02d76ebf1f0 |
children | 8181b013dafa |
comparison
equal
deleted
inserted
replaced
62:aac097949f15 | 63:650554bacd12 |
---|---|
192 /* | 192 /* |
193 this code makes no assumption on src or stride. | 193 this code makes no assumption on src or stride. |
194 One could remove the recomputation of the perm | 194 One could remove the recomputation of the perm |
195 vector by assuming (stride % 16) == 0, unfortunately | 195 vector by assuming (stride % 16) == 0, unfortunately |
196 this is not always true. Quite a lot of load/stores | 196 this is not always true. Quite a lot of load/stores |
197 can be removed by assuming proper alignement of | 197 can be removed by assuming proper alignment of |
198 src & stride :-( | 198 src & stride :-( |
199 */ | 199 */ |
200 uint8_t *src2 = src; | 200 uint8_t *src2 = src; |
201 const vector signed int zero = vec_splat_s32(0); | 201 const vector signed int zero = vec_splat_s32(0); |
202 const int properStride = (stride % 16); | 202 const int properStride = (stride % 16); |
380 /* | 380 /* |
381 this code makes no assumption on src or stride. | 381 this code makes no assumption on src or stride. |
382 One could remove the recomputation of the perm | 382 One could remove the recomputation of the perm |
383 vector by assuming (stride % 16) == 0, unfortunately | 383 vector by assuming (stride % 16) == 0, unfortunately |
384 this is not always true. Quite a lot of load/stores | 384 this is not always true. Quite a lot of load/stores |
385 can be removed by assuming proper alignement of | 385 can be removed by assuming proper alignment of |
386 src & stride :-( | 386 src & stride :-( |
387 */ | 387 */ |
388 uint8_t *src2 = src; | 388 uint8_t *src2 = src; |
389 const vector signed int zero = vec_splat_s32(0); | 389 const vector signed int zero = vec_splat_s32(0); |
390 DECLARE_ALIGNED(16, short, qp[8]); | 390 DECLARE_ALIGNED(16, short, qp[8]); |
467 dclamp_P, | 467 dclamp_P, |
468 vec_cmpgt(q, (vector signed short)zero)); | 468 vec_cmpgt(q, (vector signed short)zero)); |
469 const vector signed short dornotd = vec_sel((vector signed short)zero, | 469 const vector signed short dornotd = vec_sel((vector signed short)zero, |
470 dclampedfinal, | 470 dclampedfinal, |
471 vec_cmplt(absmE, vqp)); | 471 vec_cmplt(absmE, vqp)); |
472 /* add/substract to l4 and l5 */ | 472 /* add/subtract to l4 and l5 */ |
473 const vector signed short vb4minusd = vec_sub(vb4, dornotd); | 473 const vector signed short vb4minusd = vec_sub(vb4, dornotd); |
474 const vector signed short vb5plusd = vec_add(vb5, dornotd); | 474 const vector signed short vb5plusd = vec_add(vb5, dornotd); |
475 /* finally, stores */ | 475 /* finally, stores */ |
476 const vector unsigned char st4 = vec_packsu(vb4minusd, (vector signed short)zero); | 476 const vector unsigned char st4 = vec_packsu(vb4minusd, (vector signed short)zero); |
477 const vector unsigned char st5 = vec_packsu(vb5plusd, (vector signed short)zero); | 477 const vector unsigned char st5 = vec_packsu(vb5plusd, (vector signed short)zero); |
504 /* | 504 /* |
505 this code makes no assumption on src or stride. | 505 this code makes no assumption on src or stride. |
506 One could remove the recomputation of the perm | 506 One could remove the recomputation of the perm |
507 vector by assuming (stride % 16) == 0, unfortunately | 507 vector by assuming (stride % 16) == 0, unfortunately |
508 this is not always true. Quite a lot of load/stores | 508 this is not always true. Quite a lot of load/stores |
509 can be removed by assuming proper alignement of | 509 can be removed by assuming proper alignment of |
510 src & stride :-( | 510 src & stride :-( |
511 */ | 511 */ |
512 uint8_t *srcCopy = src; | 512 uint8_t *srcCopy = src; |
513 DECLARE_ALIGNED(16, uint8_t, dt[16]); | 513 DECLARE_ALIGNED(16, uint8_t, dt[16]); |
514 const vector signed int zero = vec_splat_s32(0); | 514 const vector signed int zero = vec_splat_s32(0); |