comparison dsputil.c @ 6052:c90798ac28ee libavcodec

~15% faster h264_chroma_mc2/4_c() these also prevent some possible out of array reads.
author michael
date Fri, 21 Dec 2007 10:16:22 +0000
parents 1e3b5597505a
children f4607985f888
comparison
equal deleted inserted replaced
6051:1e3b5597505a 6052:c90798ac28ee
1438 const int D=( x)*( y);\ 1438 const int D=( x)*( y);\
1439 int i;\ 1439 int i;\
1440 \ 1440 \
1441 assert(x<8 && y<8 && x>=0 && y>=0);\ 1441 assert(x<8 && y<8 && x>=0 && y>=0);\
1442 \ 1442 \
1443 if(D){\
1443 for(i=0; i<h; i++)\ 1444 for(i=0; i<h; i++)\
1444 {\ 1445 {\
1445 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ 1446 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1446 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ 1447 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1447 dst+= stride;\ 1448 dst+= stride;\
1448 src+= stride;\ 1449 src+= stride;\
1450 }\
1451 }else{\
1452 const int E= B+C;\
1453 const int step= C ? stride : 1;\
1454 for(i=0; i<h; i++)\
1455 {\
1456 OP(dst[0], (A*src[0] + E*src[step+0]));\
1457 OP(dst[1], (A*src[1] + E*src[step+1]));\
1458 dst+= stride;\
1459 src+= stride;\
1460 }\
1449 }\ 1461 }\
1450 }\ 1462 }\
1451 \ 1463 \
1452 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ 1464 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1453 const int A=(8-x)*(8-y);\ 1465 const int A=(8-x)*(8-y);\
1456 const int D=( x)*( y);\ 1468 const int D=( x)*( y);\
1457 int i;\ 1469 int i;\
1458 \ 1470 \
1459 assert(x<8 && y<8 && x>=0 && y>=0);\ 1471 assert(x<8 && y<8 && x>=0 && y>=0);\
1460 \ 1472 \
1473 if(D){\
1461 for(i=0; i<h; i++)\ 1474 for(i=0; i<h; i++)\
1462 {\ 1475 {\
1463 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ 1476 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1464 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ 1477 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1465 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ 1478 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1466 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ 1479 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1467 dst+= stride;\ 1480 dst+= stride;\
1468 src+= stride;\ 1481 src+= stride;\
1482 }\
1483 }else{\
1484 const int E= B+C;\
1485 const int step= C ? stride : 1;\
1486 for(i=0; i<h; i++)\
1487 {\
1488 OP(dst[0], (A*src[0] + E*src[step+0]));\
1489 OP(dst[1], (A*src[1] + E*src[step+1]));\
1490 OP(dst[2], (A*src[2] + E*src[step+2]));\
1491 OP(dst[3], (A*src[3] + E*src[step+3]));\
1492 dst+= stride;\
1493 src+= stride;\
1494 }\
1469 }\ 1495 }\
1470 }\ 1496 }\
1471 \ 1497 \
1472 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ 1498 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1473 const int A=(8-x)*(8-y);\ 1499 const int A=(8-x)*(8-y);\