Mercurial > libavcodec.hg
comparison ppc/dsputil_altivec.c @ 3252:0b482ccd7f0e libavcodec
hadamard8_diff* enabled on linux/ppc
author | lu_zero |
---|---|
date | Fri, 07 Apr 2006 12:40:28 +0000 |
parents | 0b546eab515d |
children | 052765f11f1c |
comparison
equal
deleted
inserted
replaced
3251:ef3ba591d0cc | 3252:0b482ccd7f0e |
---|---|
1306 | 1306 |
1307 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | 1307 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); |
1308 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | 1308 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
1309 } | 1309 } |
1310 | 1310 |
1311 #ifdef CONFIG_DARWIN | |
1312 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ | 1311 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
1313 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); | 1312 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); |
1314 int sum; | 1313 int sum; |
1315 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); | 1314 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); |
1316 register const_vector unsigned char vzero = (const_vector unsigned char)vec_splat_u8(0); | 1315 register const_vector unsigned char vzero = (const_vector unsigned char)vec_splat_u8(0); |
1440 */ | 1439 */ |
1441 | 1440 |
1442 static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { | 1441 static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { |
1443 int sum; | 1442 int sum; |
1444 register vector signed short | 1443 register vector signed short |
1445 temp0 asm ("v0"), | 1444 temp0 REG_v(v0), |
1446 temp1 asm ("v1"), | 1445 temp1 REG_v(v1), |
1447 temp2 asm ("v2"), | 1446 temp2 REG_v(v2), |
1448 temp3 asm ("v3"), | 1447 temp3 REG_v(v3), |
1449 temp4 asm ("v4"), | 1448 temp4 REG_v(v4), |
1450 temp5 asm ("v5"), | 1449 temp5 REG_v(v5), |
1451 temp6 asm ("v6"), | 1450 temp6 REG_v(v6), |
1452 temp7 asm ("v7"); | 1451 temp7 REG_v(v7); |
1453 register vector signed short | 1452 register vector signed short |
1454 temp0S asm ("v8"), | 1453 temp0S REG_v(v8), |
1455 temp1S asm ("v9"), | 1454 temp1S REG_v(v9), |
1456 temp2S asm ("v10"), | 1455 temp2S REG_v(v10), |
1457 temp3S asm ("v11"), | 1456 temp3S REG_v(v11), |
1458 temp4S asm ("v12"), | 1457 temp4S REG_v(v12), |
1459 temp5S asm ("v13"), | 1458 temp5S REG_v(v13), |
1460 temp6S asm ("v14"), | 1459 temp6S REG_v(v14), |
1461 temp7S asm ("v15"); | 1460 temp7S REG_v(v15); |
1462 register const_vector unsigned char vzero asm ("v31")= (const_vector unsigned char)vec_splat_u8(0); | 1461 register const_vector unsigned char vzero REG_v(v31)= (const_vector unsigned char)vec_splat_u8(0); |
1463 { | 1462 { |
1464 register const_vector signed short vprod1 asm ("v16")= (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); | 1463 register const_vector signed short vprod1 REG_v(v16)= (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); |
1465 register const_vector signed short vprod2 asm ("v17")= (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); | 1464 register const_vector signed short vprod2 REG_v(v17)= (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); |
1466 register const_vector signed short vprod3 asm ("v18")= (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); | 1465 register const_vector signed short vprod3 REG_v(v18)= (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); |
1467 register const_vector unsigned char perm1 asm ("v19")= (const_vector unsigned char) | 1466 register const_vector unsigned char perm1 REG_v(v19)= (const_vector unsigned char) |
1468 AVV(0x02, 0x03, 0x00, 0x01, | 1467 AVV(0x02, 0x03, 0x00, 0x01, |
1469 0x06, 0x07, 0x04, 0x05, | 1468 0x06, 0x07, 0x04, 0x05, |
1470 0x0A, 0x0B, 0x08, 0x09, | 1469 0x0A, 0x0B, 0x08, 0x09, |
1471 0x0E, 0x0F, 0x0C, 0x0D); | 1470 0x0E, 0x0F, 0x0C, 0x0D); |
1472 register const_vector unsigned char perm2 asm ("v20")= (const_vector unsigned char) | 1471 register const_vector unsigned char perm2 REG_v(v20)= (const_vector unsigned char) |
1473 AVV(0x04, 0x05, 0x06, 0x07, | 1472 AVV(0x04, 0x05, 0x06, 0x07, |
1474 0x00, 0x01, 0x02, 0x03, | 1473 0x00, 0x01, 0x02, 0x03, |
1475 0x0C, 0x0D, 0x0E, 0x0F, | 1474 0x0C, 0x0D, 0x0E, 0x0F, |
1476 0x08, 0x09, 0x0A, 0x0B); | 1475 0x08, 0x09, 0x0A, 0x0B); |
1477 register const_vector unsigned char perm3 asm ("v21")= (const_vector unsigned char) | 1476 register const_vector unsigned char perm3 REG_v(v21)= (const_vector unsigned char) |
1478 AVV(0x08, 0x09, 0x0A, 0x0B, | 1477 AVV(0x08, 0x09, 0x0A, 0x0B, |
1479 0x0C, 0x0D, 0x0E, 0x0F, | 1478 0x0C, 0x0D, 0x0E, 0x0F, |
1480 0x00, 0x01, 0x02, 0x03, | 1479 0x00, 0x01, 0x02, 0x03, |
1481 0x04, 0x05, 0x06, 0x07); | 1480 0x04, 0x05, 0x06, 0x07); |
1482 | 1481 |
1483 #define ONEITERBUTTERFLY(i, res1, res2) \ | 1482 #define ONEITERBUTTERFLY(i, res1, res2) \ |
1484 { \ | 1483 { \ |
1485 register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \ | 1484 register vector unsigned char src1 REG_v(v22), src2 REG_v(v23); \ |
1486 register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \ | 1485 register vector unsigned char dst1 REG_v(v24), dst2 REG_v(v25); \ |
1487 src1 = vec_ld(stride * i, src); \ | 1486 src1 = vec_ld(stride * i, src); \ |
1488 src2 = vec_ld((stride * i) + 16, src); \ | 1487 src2 = vec_ld((stride * i) + 16, src); \ |
1489 register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ | 1488 register vector unsigned char srcO REG_v(v22) = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ |
1490 dst1 = vec_ld(stride * i, dst); \ | 1489 dst1 = vec_ld(stride * i, dst); \ |
1491 dst2 = vec_ld((stride * i) + 16, dst); \ | 1490 dst2 = vec_ld((stride * i) + 16, dst); \ |
1492 register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ | 1491 register vector unsigned char dstO REG_v(v23) = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ |
1493 /* promote the unsigned chars to signed shorts */ \ | 1492 /* promote the unsigned chars to signed shorts */ \ |
1494 register vector signed short srcV asm ("v24") = \ | 1493 register vector signed short srcV REG_v(v24) = \ |
1495 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ | 1494 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ |
1496 register vector signed short dstV asm ("v25") = \ | 1495 register vector signed short dstV REG_v(v25) = \ |
1497 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ | 1496 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ |
1498 register vector signed short srcW asm ("v26") = \ | 1497 register vector signed short srcW REG_v(v26) = \ |
1499 (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \ | 1498 (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \ |
1500 register vector signed short dstW asm ("v27") = \ | 1499 register vector signed short dstW REG_v(v27) = \ |
1501 (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \ | 1500 (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \ |
1502 /* substractions inside the first butterfly */ \ | 1501 /* substractions inside the first butterfly */ \ |
1503 register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \ | 1502 register vector signed short but0 REG_v(v28) = vec_sub(srcV, dstV); \ |
1504 register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \ | 1503 register vector signed short but0S REG_v(v29) = vec_sub(srcW, dstW); \ |
1505 register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \ | 1504 register vector signed short op1 REG_v(v30) = vec_perm(but0, but0, perm1); \ |
1506 register vector signed short but1 asm ("v22") = vec_mladd(but0, vprod1, op1); \ | 1505 register vector signed short but1 REG_v(v22) = vec_mladd(but0, vprod1, op1); \ |
1507 register vector signed short op1S asm ("v23") = vec_perm(but0S, but0S, perm1); \ | 1506 register vector signed short op1S REG_v(v23) = vec_perm(but0S, but0S, perm1); \ |
1508 register vector signed short but1S asm ("v24") = vec_mladd(but0S, vprod1, op1S); \ | 1507 register vector signed short but1S REG_v(v24) = vec_mladd(but0S, vprod1, op1S); \ |
1509 register vector signed short op2 asm ("v25") = vec_perm(but1, but1, perm2); \ | 1508 register vector signed short op2 REG_v(v25) = vec_perm(but1, but1, perm2); \ |
1510 register vector signed short but2 asm ("v26") = vec_mladd(but1, vprod2, op2); \ | 1509 register vector signed short but2 REG_v(v26) = vec_mladd(but1, vprod2, op2); \ |
1511 register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \ | 1510 register vector signed short op2S REG_v(v27) = vec_perm(but1S, but1S, perm2); \ |
1512 register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \ | 1511 register vector signed short but2S REG_v(v28) = vec_mladd(but1S, vprod2, op2S); \ |
1513 register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \ | 1512 register vector signed short op3 REG_v(v29) = vec_perm(but2, but2, perm3); \ |
1514 res1 = vec_mladd(but2, vprod3, op3); \ | 1513 res1 = vec_mladd(but2, vprod3, op3); \ |
1515 register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \ | 1514 register vector signed short op3S REG_v(v30) = vec_perm(but2S, but2S, perm3); \ |
1516 res2 = vec_mladd(but2S, vprod3, op3S); \ | 1515 res2 = vec_mladd(but2S, vprod3, op3S); \ |
1517 } | 1516 } |
1518 ONEITERBUTTERFLY(0, temp0, temp0S); | 1517 ONEITERBUTTERFLY(0, temp0, temp0S); |
1519 ONEITERBUTTERFLY(1, temp1, temp1S); | 1518 ONEITERBUTTERFLY(1, temp1, temp1S); |
1520 ONEITERBUTTERFLY(2, temp2, temp2S); | 1519 ONEITERBUTTERFLY(2, temp2, temp2S); |
1616 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); | 1615 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); |
1617 } | 1616 } |
1618 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); | 1617 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); |
1619 return score; | 1618 return score; |
1620 } | 1619 } |
1621 #endif //CONFIG_DARWIN | |
1622 | 1620 |
1623 int has_altivec(void) | 1621 int has_altivec(void) |
1624 { | 1622 { |
1625 #ifdef __AMIGAOS4__ | 1623 #ifdef __AMIGAOS4__ |
1626 ULONG result = 0; | 1624 ULONG result = 0; |