comparison ppc/dsputil_altivec.c @ 1949:66215baae7b9 libavcodec

hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
author michael
date Tue, 20 Apr 2004 17:05:12 +0000
parents b370288f004d
children 2599b8444831
comparison
equal deleted inserted replaced
1948:d10578f7fd40 1949:66215baae7b9
1 /* 1 /*
2 * Copyright (c) 2002 Brian Foley 2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley 3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> 4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5 * 5 *
6 * This library is free software; you can redistribute it and/or 6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public 7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either 8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version. 9 * version 2 of the License, or (at your option) any later version.
1300 1300
1301 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); 1301 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1302 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 1302 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1303 } 1303 }
1304 1304
1305 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1306 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1);
1307 int sum;
1308 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
1309 {
1310 const_vector unsigned char vzero = (const_vector unsigned char)vec_splat_u8(0);
1311 #ifdef CONFIG_DARWIN
1312 const_vector signed short vprod1 = (const_vector signed short)( 1,-1, 1,-1, 1,-1, 1,-1);
1313 const_vector signed short vprod2 = (const_vector signed short)( 1, 1,-1,-1, 1, 1,-1,-1);
1314 const_vector signed short vprod3 = (const_vector signed short)( 1, 1, 1, 1,-1,-1,-1,-1);
1315 const_vector unsigned char perm1 = (const_vector unsigned char)
1316 (0x02, 0x03, 0x00, 0x01,
1317 0x06, 0x07, 0x04, 0x05,
1318 0x0A, 0x0B, 0x08, 0x09,
1319 0x0E, 0x0F, 0x0C, 0x0D);
1320 const_vector unsigned char perm2 = (const_vector unsigned char)
1321 (0x04, 0x05, 0x06, 0x07,
1322 0x00, 0x01, 0x02, 0x03,
1323 0x0C, 0x0D, 0x0E, 0x0F,
1324 0x08, 0x09, 0x0A, 0x0B);
1325 const_vector unsigned char perm3 = (const_vector unsigned char)
1326 (0x08, 0x09, 0x0A, 0x0B,
1327 0x0C, 0x0D, 0x0E, 0x0F,
1328 0x00, 0x01, 0x02, 0x03,
1329 0x04, 0x05, 0x06, 0x07);
1330 #else
1331 const_vector signed short vprod1 = (const_vector signed short){ 1,-1, 1,-1, 1,-1, 1,-1};
1332 const_vector signed short vprod2 = (const_vector signed short){ 1, 1,-1,-1, 1, 1,-1,-1};
1333 const_vector signed short vprod3 = (const_vector signed short){ 1, 1, 1, 1,-1,-1,-1,-1};
1334 const_vector unsigned char perm1 = (const_vector unsigned char)
1335 {0x02, 0x03, 0x00, 0x01,
1336 0x06, 0x07, 0x04, 0x05,
1337 0x0A, 0x0B, 0x08, 0x09,
1338 0x0E, 0x0F, 0x0C, 0x0D};
1339 const_vector unsigned char perm2 = (const_vector unsigned char)
1340 {0x04, 0x05, 0x06, 0x07,
1341 0x00, 0x01, 0x02, 0x03,
1342 0x0C, 0x0D, 0x0E, 0x0F,
1343 0x08, 0x09, 0x0A, 0x0B};
1344 const_vector unsigned char perm3 = (const_vector unsigned char)
1345 {0x08, 0x09, 0x0A, 0x0B,
1346 0x0C, 0x0D, 0x0E, 0x0F,
1347 0x00, 0x01, 0x02, 0x03,
1348 0x04, 0x05, 0x06, 0x07};
1349 #endif
1350
1351 #define ONEITERBUTTERFLY(i, res) \
1352 { \
1353 vector unsigned char src1, src2, srcO; \
1354 vector unsigned char dst1, dst2, dstO; \
1355 src1 = vec_ld(stride * i, src); \
1356 if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \
1357 src2 = vec_ld((stride * i) + 16, src); \
1358 srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
1359 dst1 = vec_ld(stride * i, dst); \
1360 if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8) \
1361 dst2 = vec_ld((stride * i) + 16, dst); \
1362 dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
1363 /* promote the unsigned chars to signed shorts */ \
1364 /* we're in the 8x8 function, we only care for the first 8 */ \
1365 vector signed short srcV = \
1366 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
1367 vector signed short dstV = \
1368 (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
1369 /* substractions inside the first butterfly */ \
1370 vector signed short but0 = vec_sub(srcV, dstV); \
1371 vector signed short op1 = vec_perm(but0, but0, perm1); \
1372 vector signed short but1 = vec_mladd(but0, vprod1, op1); \
1373 vector signed short op2 = vec_perm(but1, but1, perm2); \
1374 vector signed short but2 = vec_mladd(but1, vprod2, op2); \
1375 vector signed short op3 = vec_perm(but2, but2, perm3); \
1376 res = vec_mladd(but2, vprod3, op3); \
1377 }
1378 vector signed short temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
1379 ONEITERBUTTERFLY(0, temp0);
1380 ONEITERBUTTERFLY(1, temp1);
1381 ONEITERBUTTERFLY(2, temp2);
1382 ONEITERBUTTERFLY(3, temp3);
1383 ONEITERBUTTERFLY(4, temp4);
1384 ONEITERBUTTERFLY(5, temp5);
1385 ONEITERBUTTERFLY(6, temp6);
1386 ONEITERBUTTERFLY(7, temp7);
1387 #undef ONEITERBUTTERFLY
1388 {
1389 vector signed int vsum;
1390 vector signed short line0 = vec_add(temp0, temp1);
1391 vector signed short line1 = vec_sub(temp0, temp1);
1392 vector signed short line2 = vec_add(temp2, temp3);
1393 vector signed short line3 = vec_sub(temp2, temp3);
1394 vector signed short line4 = vec_add(temp4, temp5);
1395 vector signed short line5 = vec_sub(temp4, temp5);
1396 vector signed short line6 = vec_add(temp6, temp7);
1397 vector signed short line7 = vec_sub(temp6, temp7);
1398
1399 vector signed short line0B = vec_add(line0, line2);
1400 vector signed short line2B = vec_sub(line0, line2);
1401 vector signed short line1B = vec_add(line1, line3);
1402 vector signed short line3B = vec_sub(line1, line3);
1403 vector signed short line4B = vec_add(line4, line6);
1404 vector signed short line6B = vec_sub(line4, line6);
1405 vector signed short line5B = vec_add(line5, line7);
1406 vector signed short line7B = vec_sub(line5, line7);
1407
1408 vector signed short line0C = vec_add(line0B, line4B);
1409 vector signed short line4C = vec_sub(line0B, line4B);
1410 vector signed short line1C = vec_add(line1B, line5B);
1411 vector signed short line5C = vec_sub(line1B, line5B);
1412 vector signed short line2C = vec_add(line2B, line6B);
1413 vector signed short line6C = vec_sub(line2B, line6B);
1414 vector signed short line3C = vec_add(line3B, line7B);
1415 vector signed short line7C = vec_sub(line3B, line7B);
1416
1417 vsum = vec_sum4s(vec_abs(line0C), vec_splat_s32(0));
1418 vsum = vec_sum4s(vec_abs(line1C), vsum);
1419 vsum = vec_sum4s(vec_abs(line2C), vsum);
1420 vsum = vec_sum4s(vec_abs(line3C), vsum);
1421 vsum = vec_sum4s(vec_abs(line4C), vsum);
1422 vsum = vec_sum4s(vec_abs(line5C), vsum);
1423 vsum = vec_sum4s(vec_abs(line6C), vsum);
1424 vsum = vec_sum4s(vec_abs(line7C), vsum);
1425 vsum = vec_sums(vsum, (vector signed int)vzero);
1426 vsum = vec_splat(vsum, 3);
1427 vec_ste(vsum, 0, &sum);
1428 }
1429 }
1430 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff8x8_num, 1);
1431 return sum;
1432 }
1433
1305 int has_altivec(void) 1434 int has_altivec(void)
1306 { 1435 {
1307 #ifdef CONFIG_DARWIN 1436 #ifdef CONFIG_DARWIN
1308 int sels[2] = {CTL_HW, HW_VECTORUNIT}; 1437 int sels[2] = {CTL_HW, HW_VECTORUNIT};
1309 int has_vu = 0; 1438 int has_vu = 0;