comparison ppc/dsputil_altivec.c @ 884:2cef5c4c0ca6 libavcodec

* altivec and pix_norm patch by Brian Foley
author kabi
date Fri, 22 Nov 2002 07:53:06 +0000
parents 6ea69518e5f7
children fd31916942ef
comparison
equal deleted inserted replaced
883:b0d29bf1cecd 884:2cef5c4c0ca6
135 uint8_t *pix3 = pix2 + line_size; 135 uint8_t *pix3 = pix2 + line_size;
136 vector unsigned char *tv, avgv, t5, zero; 136 vector unsigned char *tv, avgv, t5, zero;
137 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; 137 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;
138 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; 138 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;
139 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; 139 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
140 vector unsigned short avghv, avglv, two, shift_mask; 140 vector unsigned short avghv, avglv, two;
141 vector unsigned short t1, t2, t3, t4; 141 vector unsigned short t1, t2, t3, t4;
142 vector unsigned int sad; 142 vector unsigned int sad;
143 vector signed int sumdiffs; 143 vector signed int sumdiffs;
144 144
145 shift_mask = (vector unsigned short) (0x3fff, 0x3fff, 0x3fff, 0x3fff,
146 0x3fff, 0x3fff, 0x3fff, 0x3fff);
147 zero = vec_splat_u8(0); 145 zero = vec_splat_u8(0);
148 two = vec_splat_u16(2); 146 two = vec_splat_u16(2);
149 sad = vec_splat_u32(0); 147 sad = vec_splat_u32(0);
150 148
151 s = 0; 149 s = 0;
203 201
204 /* Do the averaging on them */ 202 /* Do the averaging on them */
205 t3 = vec_add(pix3hv, pix3ihv); 203 t3 = vec_add(pix3hv, pix3ihv);
206 t4 = vec_add(pix3lv, pix3ilv); 204 t4 = vec_add(pix3lv, pix3ilv);
207 205
208 avghv = vec_add(vec_add(t1, t3), two); 206 avghv = vec_sr(vec_add(vec_add(t1, t3), two), two);
209 avghv= vec_and(vec_srl(avghv, two), shift_mask); 207 avglv = vec_sr(vec_add(vec_add(t2, t4), two), two);
210
211 avglv = vec_add(vec_add(t2, t4), two);
212 avglv = vec_and(vec_srl(avglv, two), shift_mask);
213 208
214 /* Pack the shorts back into a result */ 209 /* Pack the shorts back into a result */
215 avgv = vec_pack(avghv, avglv); 210 avgv = vec_pack(avghv, avglv);
216 211
217 /* Calculate a sum of abs differences vector */ 212 /* Calculate a sum of abs differences vector */
321 int pix_norm1_altivec(uint8_t *pix, int line_size) 316 int pix_norm1_altivec(uint8_t *pix, int line_size)
322 { 317 {
323 int s, i; 318 int s, i;
324 vector unsigned char *tv, zero; 319 vector unsigned char *tv, zero;
325 vector unsigned char pixv; 320 vector unsigned char pixv;
326 vector unsigned short pixlv, pixhv, zeros;
327 vector unsigned int sv; 321 vector unsigned int sv;
328 vector signed int sum; 322 vector signed int sum;
329 vector unsigned char perm_stoint_h = (vector unsigned char)
330 (16, 16, 0, 1, 16, 16, 2, 3, 16, 16, 4, 5, 16, 16, 6, 7);
331
332 vector unsigned char perm_stoint_l = (vector unsigned char)
333 (16, 16, 8, 9, 16, 16, 10, 11, 16, 16, 12, 13, 16, 16, 14, 15);
334 323
335 zero = vec_splat_u8(0); 324 zero = vec_splat_u8(0);
336 zeros = vec_splat_u16(0);
337 sv = vec_splat_u32(0); 325 sv = vec_splat_u32(0);
338 326
339 s = 0; 327 s = 0;
340 for (i = 0; i < 16; i++) { 328 for (i = 0; i < 16; i++) {
341 /* Read in the potentially unaligned pixels */ 329 /* Read in the potentially unaligned pixels */
342 tv = (vector unsigned char *) pix; 330 tv = (vector unsigned char *) pix;
343 pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix)); 331 pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix));
344 332
345 /* Split them into two vectors of shorts */ 333 /* Square the values, and add them to our sum */
346 pixhv = (vector unsigned short) vec_mergeh(zero, pixv); 334 sv = vec_msum(pixv, pixv, sv);
347 pixlv = (vector unsigned short) vec_mergel(zero, pixv);
348
349
350 /* Square the values and add them to our sum */
351 sv = vec_msum(pixhv, pixhv, sv);
352 sv = vec_msum(pixlv, pixlv, sv);
353 335
354 pix += line_size; 336 pix += line_size;
355 } 337 }
356 /* Sum up the four partial sums, and put the result into s */ 338 /* Sum up the four partial sums, and put the result into s */
357 sum = vec_sums((vector signed int) sv, (vector signed int) zero); 339 sum = vec_sums((vector signed int) sv, (vector signed int) zero);
358 sum = vec_splat(sum, 3); 340 sum = vec_splat(sum, 3);
359 vec_ste(sum, 0, &s); 341 vec_ste(sum, 0, &s);
360 342
361 return s; 343 return s;
362 } 344 }
345
346
347 int pix_norm_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
348 {
349 int s, i;
350 vector unsigned char *tv, zero;
351 vector unsigned char pix1v, pix2v, t5;
352 vector unsigned int sv;
353 vector signed int sum;
354
355 zero = vec_splat_u8(0);
356 sv = vec_splat_u32(0);
357 s = 0;
358 for (i = 0; i < 16; i++) {
359 /* Read in the potentially unaligned pixels */
360 tv = (vector unsigned char *) pix1;
361 pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
362
363 tv = (vector unsigned char *) pix2;
364 pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix2));
365
366 /*
367 Since we want to use unsigned chars, we can take advantage
368 of the fact that abs(a-b)^2 = (a-b)^2.
369 */
370
371 /* Calculate a sum of abs differences vector */
372 t5 = vec_sub(vec_max(pix1v, pix2v), vec_min(pix1v, pix2v));
373
374 /* Square the values and add them to our sum */
375 sv = vec_msum(t5, t5, sv);
376
377 pix1 += line_size;
378 pix2 += line_size;
379 }
380 /* Sum up the four partial sums, and put the result into s */
381 sum = vec_sums((vector signed int) sv, (vector signed int) zero);
382 sum = vec_splat(sum, 3);
383 vec_ste(sum, 0, &s);
384 return s;
385 }
386
363 387
364 int pix_sum_altivec(UINT8 * pix, int line_size) 388 int pix_sum_altivec(UINT8 * pix, int line_size)
365 { 389 {
366 390
367 vector unsigned char perm, *pixv; 391 vector unsigned char perm, *pixv;