comparison ppc/dsputil_altivec.c @ 3252:0b482ccd7f0e libavcodec

hadamard8_diff* enabled on linux/ppc
author lu_zero
date Fri, 07 Apr 2006 12:40:28 +0000
parents 0b546eab515d
children 052765f11f1c
comparison
equal deleted inserted replaced
3251:ef3ba591d0cc 3252:0b482ccd7f0e
1306 1306
1307 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); 1307 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1308 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 1308 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1309 } 1309 }
1310 1310
1311 #ifdef CONFIG_DARWIN
1312 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ 1311 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1313 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); 1312 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1);
1314 int sum; 1313 int sum;
1315 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); 1314 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
1316 register const_vector unsigned char vzero = (const_vector unsigned char)vec_splat_u8(0); 1315 register const_vector unsigned char vzero = (const_vector unsigned char)vec_splat_u8(0);
1440 */ 1439 */
1441 1440
1442 static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { 1441 static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) {
1443 int sum; 1442 int sum;
1444 register vector signed short 1443 register vector signed short
1445 temp0 asm ("v0"), 1444 temp0 REG_v(v0),
1446 temp1 asm ("v1"), 1445 temp1 REG_v(v1),
1447 temp2 asm ("v2"), 1446 temp2 REG_v(v2),
1448 temp3 asm ("v3"), 1447 temp3 REG_v(v3),
1449 temp4 asm ("v4"), 1448 temp4 REG_v(v4),
1450 temp5 asm ("v5"), 1449 temp5 REG_v(v5),
1451 temp6 asm ("v6"), 1450 temp6 REG_v(v6),
1452 temp7 asm ("v7"); 1451 temp7 REG_v(v7);
1453 register vector signed short 1452 register vector signed short
1454 temp0S asm ("v8"), 1453 temp0S REG_v(v8),
1455 temp1S asm ("v9"), 1454 temp1S REG_v(v9),
1456 temp2S asm ("v10"), 1455 temp2S REG_v(v10),
1457 temp3S asm ("v11"), 1456 temp3S REG_v(v11),
1458 temp4S asm ("v12"), 1457 temp4S REG_v(v12),
1459 temp5S asm ("v13"), 1458 temp5S REG_v(v13),
1460 temp6S asm ("v14"), 1459 temp6S REG_v(v14),
1461 temp7S asm ("v15"); 1460 temp7S REG_v(v15);
1462 register const_vector unsigned char vzero asm ("v31")= (const_vector unsigned char)vec_splat_u8(0); 1461 register const_vector unsigned char vzero REG_v(v31)= (const_vector unsigned char)vec_splat_u8(0);
1463 { 1462 {
1464 register const_vector signed short vprod1 asm ("v16")= (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); 1463 register const_vector signed short vprod1 REG_v(v16)= (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1);
1465 register const_vector signed short vprod2 asm ("v17")= (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); 1464 register const_vector signed short vprod2 REG_v(v17)= (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1);
1466 register const_vector signed short vprod3 asm ("v18")= (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); 1465 register const_vector signed short vprod3 REG_v(v18)= (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1);
1467 register const_vector unsigned char perm1 asm ("v19")= (const_vector unsigned char) 1466 register const_vector unsigned char perm1 REG_v(v19)= (const_vector unsigned char)
1468 AVV(0x02, 0x03, 0x00, 0x01, 1467 AVV(0x02, 0x03, 0x00, 0x01,
1469 0x06, 0x07, 0x04, 0x05, 1468 0x06, 0x07, 0x04, 0x05,
1470 0x0A, 0x0B, 0x08, 0x09, 1469 0x0A, 0x0B, 0x08, 0x09,
1471 0x0E, 0x0F, 0x0C, 0x0D); 1470 0x0E, 0x0F, 0x0C, 0x0D);
1472 register const_vector unsigned char perm2 asm ("v20")= (const_vector unsigned char) 1471 register const_vector unsigned char perm2 REG_v(v20)= (const_vector unsigned char)
1473 AVV(0x04, 0x05, 0x06, 0x07, 1472 AVV(0x04, 0x05, 0x06, 0x07,
1474 0x00, 0x01, 0x02, 0x03, 1473 0x00, 0x01, 0x02, 0x03,
1475 0x0C, 0x0D, 0x0E, 0x0F, 1474 0x0C, 0x0D, 0x0E, 0x0F,
1476 0x08, 0x09, 0x0A, 0x0B); 1475 0x08, 0x09, 0x0A, 0x0B);
1477 register const_vector unsigned char perm3 asm ("v21")= (const_vector unsigned char) 1476 register const_vector unsigned char perm3 REG_v(v21)= (const_vector unsigned char)
1478 AVV(0x08, 0x09, 0x0A, 0x0B, 1477 AVV(0x08, 0x09, 0x0A, 0x0B,
1479 0x0C, 0x0D, 0x0E, 0x0F, 1478 0x0C, 0x0D, 0x0E, 0x0F,
1480 0x00, 0x01, 0x02, 0x03, 1479 0x00, 0x01, 0x02, 0x03,
1481 0x04, 0x05, 0x06, 0x07); 1480 0x04, 0x05, 0x06, 0x07);
1482 1481
1483 #define ONEITERBUTTERFLY(i, res1, res2) \ 1482 #define ONEITERBUTTERFLY(i, res1, res2) \
1484 { \ 1483 { \
1485 register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \ 1484 register vector unsigned char src1 REG_v(v22), src2 REG_v(v23); \
1486 register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \ 1485 register vector unsigned char dst1 REG_v(v24), dst2 REG_v(v25); \
1487 src1 = vec_ld(stride * i, src); \ 1486 src1 = vec_ld(stride * i, src); \
1488 src2 = vec_ld((stride * i) + 16, src); \ 1487 src2 = vec_ld((stride * i) + 16, src); \
1489 register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ 1488 register vector unsigned char srcO REG_v(v22) = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
1490 dst1 = vec_ld(stride * i, dst); \ 1489 dst1 = vec_ld(stride * i, dst); \
1491 dst2 = vec_ld((stride * i) + 16, dst); \ 1490 dst2 = vec_ld((stride * i) + 16, dst); \
1492 register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ 1491 register vector unsigned char dstO REG_v(v23) = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
1493 /* promote the unsigned chars to signed shorts */ \ 1492 /* promote the unsigned chars to signed shorts */ \
1494 register vector signed short srcV asm ("v24") = \ 1493 register vector signed short srcV REG_v(v24) = \
1495 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ 1494 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
1496 register vector signed short dstV asm ("v25") = \ 1495 register vector signed short dstV REG_v(v25) = \
1497 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ 1496 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
1498 register vector signed short srcW asm ("v26") = \ 1497 register vector signed short srcW REG_v(v26) = \
1499 (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \ 1498 (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \
1500 register vector signed short dstW asm ("v27") = \ 1499 register vector signed short dstW REG_v(v27) = \
1501 (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \ 1500 (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \
1502 /* substractions inside the first butterfly */ \ 1501 /* substractions inside the first butterfly */ \
1503 register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \ 1502 register vector signed short but0 REG_v(v28) = vec_sub(srcV, dstV); \
1504 register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \ 1503 register vector signed short but0S REG_v(v29) = vec_sub(srcW, dstW); \
1505 register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \ 1504 register vector signed short op1 REG_v(v30) = vec_perm(but0, but0, perm1); \
1506 register vector signed short but1 asm ("v22") = vec_mladd(but0, vprod1, op1); \ 1505 register vector signed short but1 REG_v(v22) = vec_mladd(but0, vprod1, op1); \
1507 register vector signed short op1S asm ("v23") = vec_perm(but0S, but0S, perm1); \ 1506 register vector signed short op1S REG_v(v23) = vec_perm(but0S, but0S, perm1); \
1508 register vector signed short but1S asm ("v24") = vec_mladd(but0S, vprod1, op1S); \ 1507 register vector signed short but1S REG_v(v24) = vec_mladd(but0S, vprod1, op1S); \
1509 register vector signed short op2 asm ("v25") = vec_perm(but1, but1, perm2); \ 1508 register vector signed short op2 REG_v(v25) = vec_perm(but1, but1, perm2); \
1510 register vector signed short but2 asm ("v26") = vec_mladd(but1, vprod2, op2); \ 1509 register vector signed short but2 REG_v(v26) = vec_mladd(but1, vprod2, op2); \
1511 register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \ 1510 register vector signed short op2S REG_v(v27) = vec_perm(but1S, but1S, perm2); \
1512 register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \ 1511 register vector signed short but2S REG_v(v28) = vec_mladd(but1S, vprod2, op2S); \
1513 register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \ 1512 register vector signed short op3 REG_v(v29) = vec_perm(but2, but2, perm3); \
1514 res1 = vec_mladd(but2, vprod3, op3); \ 1513 res1 = vec_mladd(but2, vprod3, op3); \
1515 register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \ 1514 register vector signed short op3S REG_v(v30) = vec_perm(but2S, but2S, perm3); \
1516 res2 = vec_mladd(but2S, vprod3, op3S); \ 1515 res2 = vec_mladd(but2S, vprod3, op3S); \
1517 } 1516 }
1518 ONEITERBUTTERFLY(0, temp0, temp0S); 1517 ONEITERBUTTERFLY(0, temp0, temp0S);
1519 ONEITERBUTTERFLY(1, temp1, temp1S); 1518 ONEITERBUTTERFLY(1, temp1, temp1S);
1520 ONEITERBUTTERFLY(2, temp2, temp2S); 1519 ONEITERBUTTERFLY(2, temp2, temp2S);
1616 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); 1615 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
1617 } 1616 }
1618 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); 1617 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1);
1619 return score; 1618 return score;
1620 } 1619 }
1621 #endif //CONFIG_DARWIN
1622 1620
1623 int has_altivec(void) 1621 int has_altivec(void)
1624 { 1622 {
1625 #ifdef __AMIGAOS4__ 1623 #ifdef __AMIGAOS4__
1626 ULONG result = 0; 1624 ULONG result = 0;