Mercurial > libavcodec.hg
comparison ppc/dsputil_altivec.c @ 884:2cef5c4c0ca6 libavcodec
* altivec and pix_norm patch by Brian Foley
author | kabi |
---|---|
date | Fri, 22 Nov 2002 07:53:06 +0000 |
parents | 6ea69518e5f7 |
children | fd31916942ef |
comparison
equal
deleted
inserted
replaced
883:b0d29bf1cecd | 884:2cef5c4c0ca6 |
---|---|
135 uint8_t *pix3 = pix2 + line_size; | 135 uint8_t *pix3 = pix2 + line_size; |
136 vector unsigned char *tv, avgv, t5, zero; | 136 vector unsigned char *tv, avgv, t5, zero; |
137 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; | 137 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; |
138 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; | 138 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; |
139 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; | 139 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; |
140 vector unsigned short avghv, avglv, two, shift_mask; | 140 vector unsigned short avghv, avglv, two; |
141 vector unsigned short t1, t2, t3, t4; | 141 vector unsigned short t1, t2, t3, t4; |
142 vector unsigned int sad; | 142 vector unsigned int sad; |
143 vector signed int sumdiffs; | 143 vector signed int sumdiffs; |
144 | 144 |
145 shift_mask = (vector unsigned short) (0x3fff, 0x3fff, 0x3fff, 0x3fff, | |
146 0x3fff, 0x3fff, 0x3fff, 0x3fff); | |
147 zero = vec_splat_u8(0); | 145 zero = vec_splat_u8(0); |
148 two = vec_splat_u16(2); | 146 two = vec_splat_u16(2); |
149 sad = vec_splat_u32(0); | 147 sad = vec_splat_u32(0); |
150 | 148 |
151 s = 0; | 149 s = 0; |
203 | 201 |
204 /* Do the averaging on them */ | 202 /* Do the averaging on them */ |
205 t3 = vec_add(pix3hv, pix3ihv); | 203 t3 = vec_add(pix3hv, pix3ihv); |
206 t4 = vec_add(pix3lv, pix3ilv); | 204 t4 = vec_add(pix3lv, pix3ilv); |
207 | 205 |
208 avghv = vec_add(vec_add(t1, t3), two); | 206 avghv = vec_sr(vec_add(vec_add(t1, t3), two), two); |
209 avghv= vec_and(vec_srl(avghv, two), shift_mask); | 207 avglv = vec_sr(vec_add(vec_add(t2, t4), two), two); |
210 | |
211 avglv = vec_add(vec_add(t2, t4), two); | |
212 avglv = vec_and(vec_srl(avglv, two), shift_mask); | |
213 | 208 |
214 /* Pack the shorts back into a result */ | 209 /* Pack the shorts back into a result */ |
215 avgv = vec_pack(avghv, avglv); | 210 avgv = vec_pack(avghv, avglv); |
216 | 211 |
217 /* Calculate a sum of abs differences vector */ | 212 /* Calculate a sum of abs differences vector */ |
321 int pix_norm1_altivec(uint8_t *pix, int line_size) | 316 int pix_norm1_altivec(uint8_t *pix, int line_size) |
322 { | 317 { |
323 int s, i; | 318 int s, i; |
324 vector unsigned char *tv, zero; | 319 vector unsigned char *tv, zero; |
325 vector unsigned char pixv; | 320 vector unsigned char pixv; |
326 vector unsigned short pixlv, pixhv, zeros; | |
327 vector unsigned int sv; | 321 vector unsigned int sv; |
328 vector signed int sum; | 322 vector signed int sum; |
329 vector unsigned char perm_stoint_h = (vector unsigned char) | |
330 (16, 16, 0, 1, 16, 16, 2, 3, 16, 16, 4, 5, 16, 16, 6, 7); | |
331 | |
332 vector unsigned char perm_stoint_l = (vector unsigned char) | |
333 (16, 16, 8, 9, 16, 16, 10, 11, 16, 16, 12, 13, 16, 16, 14, 15); | |
334 | 323 |
335 zero = vec_splat_u8(0); | 324 zero = vec_splat_u8(0); |
336 zeros = vec_splat_u16(0); | |
337 sv = vec_splat_u32(0); | 325 sv = vec_splat_u32(0); |
338 | 326 |
339 s = 0; | 327 s = 0; |
340 for (i = 0; i < 16; i++) { | 328 for (i = 0; i < 16; i++) { |
341 /* Read in the potentially unaligned pixels */ | 329 /* Read in the potentially unaligned pixels */ |
342 tv = (vector unsigned char *) pix; | 330 tv = (vector unsigned char *) pix; |
343 pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix)); | 331 pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix)); |
344 | 332 |
345 /* Split them into two vectors of shorts */ | 333 /* Square the values, and add them to our sum */ |
346 pixhv = (vector unsigned short) vec_mergeh(zero, pixv); | 334 sv = vec_msum(pixv, pixv, sv); |
347 pixlv = (vector unsigned short) vec_mergel(zero, pixv); | |
348 | |
349 | |
350 /* Square the values and add them to our sum */ | |
351 sv = vec_msum(pixhv, pixhv, sv); | |
352 sv = vec_msum(pixlv, pixlv, sv); | |
353 | 335 |
354 pix += line_size; | 336 pix += line_size; |
355 } | 337 } |
356 /* Sum up the four partial sums, and put the result into s */ | 338 /* Sum up the four partial sums, and put the result into s */ |
357 sum = vec_sums((vector signed int) sv, (vector signed int) zero); | 339 sum = vec_sums((vector signed int) sv, (vector signed int) zero); |
358 sum = vec_splat(sum, 3); | 340 sum = vec_splat(sum, 3); |
359 vec_ste(sum, 0, &s); | 341 vec_ste(sum, 0, &s); |
360 | 342 |
361 return s; | 343 return s; |
362 } | 344 } |
345 | |
346 | |
347 int pix_norm_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) | |
348 { | |
349 int s, i; | |
350 vector unsigned char *tv, zero; | |
351 vector unsigned char pix1v, pix2v, t5; | |
352 vector unsigned int sv; | |
353 vector signed int sum; | |
354 | |
355 zero = vec_splat_u8(0); | |
356 sv = vec_splat_u32(0); | |
357 s = 0; | |
358 for (i = 0; i < 16; i++) { | |
359 /* Read in the potentially unaligned pixels */ | |
360 tv = (vector unsigned char *) pix1; | |
361 pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); | |
362 | |
363 tv = (vector unsigned char *) pix2; | |
364 pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix2)); | |
365 | |
366 /* | |
367 Since we want to use unsigned chars, we can take advantage | |
368 of the fact that abs(a-b)^2 = (a-b)^2. | |
369 */ | |
370 | |
371 /* Calculate a sum of abs differences vector */ | |
372 t5 = vec_sub(vec_max(pix1v, pix2v), vec_min(pix1v, pix2v)); | |
373 | |
374 /* Square the values and add them to our sum */ | |
375 sv = vec_msum(t5, t5, sv); | |
376 | |
377 pix1 += line_size; | |
378 pix2 += line_size; | |
379 } | |
380 /* Sum up the four partial sums, and put the result into s */ | |
381 sum = vec_sums((vector signed int) sv, (vector signed int) zero); | |
382 sum = vec_splat(sum, 3); | |
383 vec_ste(sum, 0, &s); | |
384 return s; | |
385 } | |
386 | |
363 | 387 |
364 int pix_sum_altivec(UINT8 * pix, int line_size) | 388 int pix_sum_altivec(UINT8 * pix, int line_size) |
365 { | 389 { |
366 | 390 |
367 vector unsigned char perm, *pixv; | 391 vector unsigned char perm, *pixv; |