Mercurial > libavcodec.hg
comparison ppc/dsputil_altivec.c @ 2979:bfabfdf9ce55 libavcodec
COSMETICS: tabs --> spaces, some prettyprinting
author | diego |
---|---|
date | Thu, 22 Dec 2005 01:10:11 +0000 |
parents | ef2149182f1c |
children | 0b546eab515d |
comparison
equal
deleted
inserted
replaced
2978:403183bbb505 | 2979:bfabfdf9ce55 |
---|---|
65 sad = (vector unsigned int)vec_splat_u32(0); | 65 sad = (vector unsigned int)vec_splat_u32(0); |
66 for(i=0;i<h;i++) { | 66 for(i=0;i<h;i++) { |
67 /* | 67 /* |
68 Read unaligned pixels into our vectors. The vectors are as follows: | 68 Read unaligned pixels into our vectors. The vectors are as follows: |
69 pix1v: pix1[0]-pix1[15] | 69 pix1v: pix1[0]-pix1[15] |
70 pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] | 70 pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] |
71 */ | 71 */ |
72 tv = (vector unsigned char *) pix1; | 72 tv = (vector unsigned char *) pix1; |
73 pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); | 73 pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); |
74 | 74 |
75 tv = (vector unsigned char *) &pix2[0]; | 75 tv = (vector unsigned char *) &pix2[0]; |
182 Due to the fact that pix3 = pix2 + line_size, the pix3 of one | 182 Due to the fact that pix3 = pix2 + line_size, the pix3 of one |
183 iteration becomes pix2 in the next iteration. We can use this | 183 iteration becomes pix2 in the next iteration. We can use this |
184 fact to avoid a potentially expensive unaligned read, as well | 184 fact to avoid a potentially expensive unaligned read, as well |
185 as some splitting, and vector addition each time around the loop. | 185 as some splitting, and vector addition each time around the loop. |
186 Read unaligned pixels into our vectors. The vectors are as follows: | 186 Read unaligned pixels into our vectors. The vectors are as follows: |
187 pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] | 187 pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] |
188 Split the pixel vectors into shorts | 188 Split the pixel vectors into shorts |
189 */ | 189 */ |
190 tv = (vector unsigned char *) &pix2[0]; | 190 tv = (vector unsigned char *) &pix2[0]; |
191 pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0])); | 191 pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0])); |
192 | 192 |
202 | 202 |
203 for(i=0;i<h;i++) { | 203 for(i=0;i<h;i++) { |
204 /* | 204 /* |
205 Read unaligned pixels into our vectors. The vectors are as follows: | 205 Read unaligned pixels into our vectors. The vectors are as follows: |
206 pix1v: pix1[0]-pix1[15] | 206 pix1v: pix1[0]-pix1[15] |
207 pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16] | 207 pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16] |
208 */ | 208 */ |
209 tv = (vector unsigned char *) pix1; | 209 tv = (vector unsigned char *) pix1; |
210 pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); | 210 pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); |
211 | 211 |
212 tv = (vector unsigned char *) &pix3[0]; | 212 tv = (vector unsigned char *) &pix3[0]; |
271 | 271 |
272 sad = (vector unsigned int)vec_splat_u32(0); | 272 sad = (vector unsigned int)vec_splat_u32(0); |
273 | 273 |
274 | 274 |
275 for(i=0;i<h;i++) { | 275 for(i=0;i<h;i++) { |
276 /* Read potentially unaligned pixels into t1 and t2 */ | 276 /* Read potentially unaligned pixels into t1 and t2 */ |
277 perm1 = vec_lvsl(0, pix1); | 277 perm1 = vec_lvsl(0, pix1); |
278 pix1v = (vector unsigned char *) pix1; | 278 pix1v = (vector unsigned char *) pix1; |
279 perm2 = vec_lvsl(0, pix2); | 279 perm2 = vec_lvsl(0, pix2); |
280 pix2v = (vector unsigned char *) pix2; | 280 pix2v = (vector unsigned char *) pix2; |
281 t1 = vec_perm(pix1v[0], pix1v[1], perm1); | 281 t1 = vec_perm(pix1v[0], pix1v[1], perm1); |
282 t2 = vec_perm(pix2v[0], pix2v[1], perm2); | 282 t2 = vec_perm(pix2v[0], pix2v[1], perm2); |
283 | 283 |
284 /* Calculate a sum of abs differences vector */ | 284 /* Calculate a sum of abs differences vector */ |
285 t3 = vec_max(t1, t2); | 285 t3 = vec_max(t1, t2); |
286 t4 = vec_min(t1, t2); | 286 t4 = vec_min(t1, t2); |
287 t5 = vec_sub(t3, t4); | 287 t5 = vec_sub(t3, t4); |
288 | 288 |
289 /* Add each 4 pixel group together and put 4 results into sad */ | 289 /* Add each 4 pixel group together and put 4 results into sad */ |
290 sad = vec_sum4s(t5, sad); | 290 sad = vec_sum4s(t5, sad); |
291 | 291 |
292 pix1 += line_size; | 292 pix1 += line_size; |
293 pix2 += line_size; | 293 pix2 += line_size; |
294 } | 294 } |
314 sad = (vector unsigned int)vec_splat_u32(0); | 314 sad = (vector unsigned int)vec_splat_u32(0); |
315 | 315 |
316 permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); | 316 permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); |
317 | 317 |
318 for(i=0;i<h;i++) { | 318 for(i=0;i<h;i++) { |
319 /* Read potentially unaligned pixels into t1 and t2 | 319 /* Read potentially unaligned pixels into t1 and t2 |
320 Since we're reading 16 pixels, and actually only want 8, | 320 Since we're reading 16 pixels, and actually only want 8, |
321 mask out the last 8 pixels. The 0s don't change the sum. */ | 321 mask out the last 8 pixels. The 0s don't change the sum. */ |
322 perm1 = vec_lvsl(0, pix1); | 322 perm1 = vec_lvsl(0, pix1); |
323 pix1v = (vector unsigned char *) pix1; | 323 pix1v = (vector unsigned char *) pix1; |
324 perm2 = vec_lvsl(0, pix2); | 324 perm2 = vec_lvsl(0, pix2); |
325 pix2v = (vector unsigned char *) pix2; | 325 pix2v = (vector unsigned char *) pix2; |
326 t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear); | 326 t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear); |
327 t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear); | 327 t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear); |
328 | 328 |
329 /* Calculate a sum of abs differences vector */ | 329 /* Calculate a sum of abs differences vector */ |
330 t3 = vec_max(t1, t2); | 330 t3 = vec_max(t1, t2); |
331 t4 = vec_min(t1, t2); | 331 t4 = vec_min(t1, t2); |
332 t5 = vec_sub(t3, t4); | 332 t5 = vec_sub(t3, t4); |
333 | 333 |
334 /* Add each 4 pixel group together and put 4 results into sad */ | 334 /* Add each 4 pixel group together and put 4 results into sad */ |
335 sad = vec_sum4s(t5, sad); | 335 sad = vec_sum4s(t5, sad); |
336 | 336 |
337 pix1 += line_size; | 337 pix1 += line_size; |
338 pix2 += line_size; | 338 pix2 += line_size; |
339 } | 339 } |
396 | 396 |
397 permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); | 397 permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); |
398 | 398 |
399 | 399 |
400 for(i=0;i<h;i++) { | 400 for(i=0;i<h;i++) { |
401 /* Read potentially unaligned pixels into t1 and t2 | 401 /* Read potentially unaligned pixels into t1 and t2 |
402 Since we're reading 16 pixels, and actually only want 8, | 402 Since we're reading 16 pixels, and actually only want 8, |
403 mask out the last 8 pixels. The 0s don't change the sum. */ | 403 mask out the last 8 pixels. The 0s don't change the sum. */ |
404 perm1 = vec_lvsl(0, pix1); | 404 perm1 = vec_lvsl(0, pix1); |
405 pix1v = (vector unsigned char *) pix1; | 405 pix1v = (vector unsigned char *) pix1; |
406 perm2 = vec_lvsl(0, pix2); | 406 perm2 = vec_lvsl(0, pix2); |
407 pix2v = (vector unsigned char *) pix2; | 407 pix2v = (vector unsigned char *) pix2; |
408 t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear); | 408 t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear); |
411 /* | 411 /* |
412 Since we want to use unsigned chars, we can take advantage | 412 Since we want to use unsigned chars, we can take advantage |
413 of the fact that abs(a-b)^2 = (a-b)^2. | 413 of the fact that abs(a-b)^2 = (a-b)^2. |
414 */ | 414 */ |
415 | 415 |
416 /* Calculate abs differences vector */ | 416 /* Calculate abs differences vector */ |
417 t3 = vec_max(t1, t2); | 417 t3 = vec_max(t1, t2); |
418 t4 = vec_min(t1, t2); | 418 t4 = vec_min(t1, t2); |
419 t5 = vec_sub(t3, t4); | 419 t5 = vec_sub(t3, t4); |
420 | 420 |
421 /* Square the values and add them to our sum */ | 421 /* Square the values and add them to our sum */ |
449 vector signed int sumsqr; | 449 vector signed int sumsqr; |
450 | 450 |
451 sum = (vector unsigned int)vec_splat_u32(0); | 451 sum = (vector unsigned int)vec_splat_u32(0); |
452 | 452 |
453 for(i=0;i<h;i++) { | 453 for(i=0;i<h;i++) { |
454 /* Read potentially unaligned pixels into t1 and t2 */ | 454 /* Read potentially unaligned pixels into t1 and t2 */ |
455 perm1 = vec_lvsl(0, pix1); | 455 perm1 = vec_lvsl(0, pix1); |
456 pix1v = (vector unsigned char *) pix1; | 456 pix1v = (vector unsigned char *) pix1; |
457 perm2 = vec_lvsl(0, pix2); | 457 perm2 = vec_lvsl(0, pix2); |
458 pix2v = (vector unsigned char *) pix2; | 458 pix2v = (vector unsigned char *) pix2; |
459 t1 = vec_perm(pix1v[0], pix1v[1], perm1); | 459 t1 = vec_perm(pix1v[0], pix1v[1], perm1); |
462 /* | 462 /* |
463 Since we want to use unsigned chars, we can take advantage | 463 Since we want to use unsigned chars, we can take advantage |
464 of the fact that abs(a-b)^2 = (a-b)^2. | 464 of the fact that abs(a-b)^2 = (a-b)^2. |
465 */ | 465 */ |
466 | 466 |
467 /* Calculate abs differences vector */ | 467 /* Calculate abs differences vector */ |
468 t3 = vec_max(t1, t2); | 468 t3 = vec_max(t1, t2); |
469 t4 = vec_min(t1, t2); | 469 t4 = vec_min(t1, t2); |
470 t5 = vec_sub(t3, t4); | 470 t5 = vec_sub(t3, t4); |
471 | 471 |
472 /* Square the values and add them to our sum */ | 472 /* Square the values and add them to our sum */ |
496 int s __attribute__((aligned(16))); | 496 int s __attribute__((aligned(16))); |
497 | 497 |
498 sad = (vector unsigned int)vec_splat_u32(0); | 498 sad = (vector unsigned int)vec_splat_u32(0); |
499 | 499 |
500 for (i = 0; i < 16; i++) { | 500 for (i = 0; i < 16; i++) { |
501 /* Read the potentially unaligned 16 pixels into t1 */ | 501 /* Read the potentially unaligned 16 pixels into t1 */ |
502 perm = vec_lvsl(0, pix); | 502 perm = vec_lvsl(0, pix); |
503 pixv = (vector unsigned char *) pix; | 503 pixv = (vector unsigned char *) pix; |
504 t1 = vec_perm(pixv[0], pixv[1], perm); | 504 t1 = vec_perm(pixv[0], pixv[1], perm); |
505 | 505 |
506 /* Add each 4 pixel group together and put 4 results into sad */ | 506 /* Add each 4 pixel group together and put 4 results into sad */ |
507 sad = vec_sum4s(t1, sad); | 507 sad = vec_sum4s(t1, sad); |
508 | 508 |
509 pix += line_size; | 509 pix += line_size; |
510 } | 510 } |
511 | 511 |
1333 AVV(0x08, 0x09, 0x0A, 0x0B, | 1333 AVV(0x08, 0x09, 0x0A, 0x0B, |
1334 0x0C, 0x0D, 0x0E, 0x0F, | 1334 0x0C, 0x0D, 0x0E, 0x0F, |
1335 0x00, 0x01, 0x02, 0x03, | 1335 0x00, 0x01, 0x02, 0x03, |
1336 0x04, 0x05, 0x06, 0x07); | 1336 0x04, 0x05, 0x06, 0x07); |
1337 | 1337 |
1338 #define ONEITERBUTTERFLY(i, res) \ | 1338 #define ONEITERBUTTERFLY(i, res) \ |
1339 { \ | 1339 { \ |
1340 register vector unsigned char src1, src2, srcO; \ | 1340 register vector unsigned char src1, src2, srcO; \ |
1341 register vector unsigned char dst1, dst2, dstO; \ | 1341 register vector unsigned char dst1, dst2, dstO; \ |
1342 src1 = vec_ld(stride * i, src); \ | 1342 src1 = vec_ld(stride * i, src); \ |
1343 if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \ | 1343 if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \ |
1344 src2 = vec_ld((stride * i) + 16, src); \ | 1344 src2 = vec_ld((stride * i) + 16, src); \ |
1345 srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ | 1345 srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ |
1346 dst1 = vec_ld(stride * i, dst); \ | 1346 dst1 = vec_ld(stride * i, dst); \ |
1347 if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8) \ | 1347 if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8) \ |
1348 dst2 = vec_ld((stride * i) + 16, dst); \ | 1348 dst2 = vec_ld((stride * i) + 16, dst); \ |
1349 dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ | 1349 dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ |
1350 /* promote the unsigned chars to signed shorts */ \ | 1350 /* promote the unsigned chars to signed shorts */ \ |
1351 /* we're in the 8x8 function, we only care for the first 8 */ \ | 1351 /* we're in the 8x8 function, we only care for the first 8 */ \ |
1352 register vector signed short srcV = \ | 1352 register vector signed short srcV = \ |
1353 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ | 1353 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ |
1354 register vector signed short dstV = \ | 1354 register vector signed short dstV = \ |
1355 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ | 1355 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ |
1356 /* substractions inside the first butterfly */ \ | 1356 /* substractions inside the first butterfly */ \ |
1357 register vector signed short but0 = vec_sub(srcV, dstV); \ | 1357 register vector signed short but0 = vec_sub(srcV, dstV); \ |
1358 register vector signed short op1 = vec_perm(but0, but0, perm1); \ | 1358 register vector signed short op1 = vec_perm(but0, but0, perm1); \ |
1359 register vector signed short but1 = vec_mladd(but0, vprod1, op1); \ | 1359 register vector signed short but1 = vec_mladd(but0, vprod1, op1); \ |
1360 register vector signed short op2 = vec_perm(but1, but1, perm2); \ | 1360 register vector signed short op2 = vec_perm(but1, but1, perm2); \ |
1361 register vector signed short but2 = vec_mladd(but1, vprod2, op2); \ | 1361 register vector signed short but2 = vec_mladd(but1, vprod2, op2); \ |
1362 register vector signed short op3 = vec_perm(but2, but2, perm3); \ | 1362 register vector signed short op3 = vec_perm(but2, but2, perm3); \ |
1363 res = vec_mladd(but2, vprod3, op3); \ | 1363 res = vec_mladd(but2, vprod3, op3); \ |
1364 } | 1364 } |
1365 ONEITERBUTTERFLY(0, temp0); | 1365 ONEITERBUTTERFLY(0, temp0); |
1366 ONEITERBUTTERFLY(1, temp1); | 1366 ONEITERBUTTERFLY(1, temp1); |
1367 ONEITERBUTTERFLY(2, temp2); | 1367 ONEITERBUTTERFLY(2, temp2); |
1368 ONEITERBUTTERFLY(3, temp3); | 1368 ONEITERBUTTERFLY(3, temp3); |
1478 AVV(0x08, 0x09, 0x0A, 0x0B, | 1478 AVV(0x08, 0x09, 0x0A, 0x0B, |
1479 0x0C, 0x0D, 0x0E, 0x0F, | 1479 0x0C, 0x0D, 0x0E, 0x0F, |
1480 0x00, 0x01, 0x02, 0x03, | 1480 0x00, 0x01, 0x02, 0x03, |
1481 0x04, 0x05, 0x06, 0x07); | 1481 0x04, 0x05, 0x06, 0x07); |
1482 | 1482 |
1483 #define ONEITERBUTTERFLY(i, res1, res2) \ | 1483 #define ONEITERBUTTERFLY(i, res1, res2) \ |
1484 { \ | 1484 { \ |
1485 register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \ | 1485 register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \ |
1486 register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \ | 1486 register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \ |
1487 src1 = vec_ld(stride * i, src); \ | 1487 src1 = vec_ld(stride * i, src); \ |
1488 src2 = vec_ld((stride * i) + 16, src); \ | 1488 src2 = vec_ld((stride * i) + 16, src); \ |
1489 register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ | 1489 register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ |
1490 dst1 = vec_ld(stride * i, dst); \ | 1490 dst1 = vec_ld(stride * i, dst); \ |
1491 dst2 = vec_ld((stride * i) + 16, dst); \ | 1491 dst2 = vec_ld((stride * i) + 16, dst); \ |
1492 register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ | 1492 register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ |
1493 /* promote the unsigned chars to signed shorts */ \ | 1493 /* promote the unsigned chars to signed shorts */ \ |
1494 register vector signed short srcV asm ("v24") = \ | 1494 register vector signed short srcV asm ("v24") = \ |
1495 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ | 1495 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ |
1496 register vector signed short dstV asm ("v25") = \ | 1496 register vector signed short dstV asm ("v25") = \ |
1497 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ | 1497 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ |
1498 register vector signed short srcW asm ("v26") = \ | 1498 register vector signed short srcW asm ("v26") = \ |
1499 (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \ | 1499 (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \ |
1500 register vector signed short dstW asm ("v27") = \ | 1500 register vector signed short dstW asm ("v27") = \ |
1501 (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \ | 1501 (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \ |
1502 /* substractions inside the first butterfly */ \ | 1502 /* substractions inside the first butterfly */ \ |
1503 register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \ | 1503 register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \ |
1504 register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \ | 1504 register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \ |
1505 register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \ | 1505 register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \ |
1506 register vector signed short but1 asm ("v22") = vec_mladd(but0, vprod1, op1); \ | 1506 register vector signed short but1 asm ("v22") = vec_mladd(but0, vprod1, op1); \ |
1507 register vector signed short op1S asm ("v23") = vec_perm(but0S, but0S, perm1); \ | 1507 register vector signed short op1S asm ("v23") = vec_perm(but0S, but0S, perm1); \ |
1509 register vector signed short op2 asm ("v25") = vec_perm(but1, but1, perm2); \ | 1509 register vector signed short op2 asm ("v25") = vec_perm(but1, but1, perm2); \ |
1510 register vector signed short but2 asm ("v26") = vec_mladd(but1, vprod2, op2); \ | 1510 register vector signed short but2 asm ("v26") = vec_mladd(but1, vprod2, op2); \ |
1511 register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \ | 1511 register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \ |
1512 register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \ | 1512 register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \ |
1513 register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \ | 1513 register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \ |
1514 res1 = vec_mladd(but2, vprod3, op3); \ | 1514 res1 = vec_mladd(but2, vprod3, op3); \ |
1515 register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \ | 1515 register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \ |
1516 res2 = vec_mladd(but2S, vprod3, op3S); \ | 1516 res2 = vec_mladd(but2S, vprod3, op3S); \ |
1517 } | 1517 } |
1518 ONEITERBUTTERFLY(0, temp0, temp0S); | 1518 ONEITERBUTTERFLY(0, temp0, temp0S); |
1519 ONEITERBUTTERFLY(1, temp1, temp1S); | 1519 ONEITERBUTTERFLY(1, temp1, temp1S); |
1520 ONEITERBUTTERFLY(2, temp2, temp2S); | 1520 ONEITERBUTTERFLY(2, temp2, temp2S); |
1521 ONEITERBUTTERFLY(3, temp3, temp3S); | 1521 ONEITERBUTTERFLY(3, temp3, temp3S); |
1621 #endif //CONFIG_DARWIN | 1621 #endif //CONFIG_DARWIN |
1622 | 1622 |
1623 int has_altivec(void) | 1623 int has_altivec(void) |
1624 { | 1624 { |
1625 #ifdef __AMIGAOS4__ | 1625 #ifdef __AMIGAOS4__ |
1626 ULONG result = 0; | 1626 ULONG result = 0; |
1627 extern struct ExecIFace *IExec; | 1627 extern struct ExecIFace *IExec; |
1628 | 1628 |
1629 IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); | 1629 IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); |
1630 if (result == VECTORTYPE_ALTIVEC) return 1; | 1630 if (result == VECTORTYPE_ALTIVEC) return 1; |
1631 return 0; | 1631 return 0; |
1632 #else /* __AMIGAOS4__ */ | 1632 #else /* __AMIGAOS4__ */ |
1633 | 1633 |
1634 #ifdef CONFIG_DARWIN | 1634 #ifdef CONFIG_DARWIN |
1635 int sels[2] = {CTL_HW, HW_VECTORUNIT}; | 1635 int sels[2] = {CTL_HW, HW_VECTORUNIT}; |
1636 int has_vu = 0; | 1636 int has_vu = 0; |