comparison dsputil.c @ 1168:5af9aeadbdc3 libavcodec

H264 decoder & demuxer
author michaelni
date Fri, 04 Apr 2003 14:42:28 +0000
parents 1e39f273ecd6
children e0fc95a6eb4e
comparison
equal deleted inserted replaced
1167:35b80080b2db 1168:5af9aeadbdc3
464 464
465 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) 465 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
466 #else // 64 bit variant 466 #else // 64 bit variant
467 467
468 #define PIXOP2(OPNAME, OP) \ 468 #define PIXOP2(OPNAME, OP) \
469 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
470 int i;\
471 for(i=0; i<h; i++){\
472 OP(*((uint32_t*)(block )), LD32(pixels ));\
473 pixels+=line_size;\
474 block +=line_size;\
475 }\
476 }\
469 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 477 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
470 int i;\ 478 int i;\
471 for(i=0; i<h; i++){\ 479 for(i=0; i<h; i++){\
472 OP(*((uint32_t*)(block )), LD32(pixels ));\ 480 OP(*((uint32_t*)(block )), LD32(pixels ));\
473 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ 481 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
502 b= LD32(&src2[i*src_stride2 ]);\ 510 b= LD32(&src2[i*src_stride2 ]);\
503 OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ 511 OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
504 a= LD32(&src1[i*src_stride1+4]);\ 512 a= LD32(&src1[i*src_stride1+4]);\
505 b= LD32(&src2[i*src_stride2+4]);\ 513 b= LD32(&src2[i*src_stride2+4]);\
506 OP(*((uint32_t*)&dst[i*dst_stride+4]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ 514 OP(*((uint32_t*)&dst[i*dst_stride+4]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
515 }\
516 }\
517 \
518 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
519 int src_stride1, int src_stride2, int h){\
520 int i;\
521 for(i=0; i<h; i++){\
522 uint32_t a,b;\
523 a= LD32(&src1[i*src_stride1 ]);\
524 b= LD32(&src2[i*src_stride2 ]);\
525 OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
507 }\ 526 }\
508 }\ 527 }\
509 \ 528 \
510 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 529 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
511 int src_stride1, int src_stride2, int h){\ 530 int src_stride1, int src_stride2, int h){\
796 vx+= dxx; 815 vx+= dxx;
797 vy+= dyx; 816 vy+= dyx;
798 } 817 }
799 ox += dxy; 818 ox += dxy;
800 oy += dyy; 819 oy += dyy;
820 }
821 }
822 #define H264_CHROMA_MC(OPNAME, OP)\
823 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
824 const int A=(8-x)*(8-y);\
825 const int B=( x)*(8-y);\
826 const int C=(8-x)*( y);\
827 const int D=( x)*( y);\
828 int i;\
829 \
830 assert(x<8 && y<8 && x>=0 && y>=0);\
831 \
832 for(i=0; i<h; i++)\
833 {\
834 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
835 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
836 dst+= stride;\
837 src+= stride;\
838 }\
839 }\
840 \
841 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
842 const int A=(8-x)*(8-y);\
843 const int B=( x)*(8-y);\
844 const int C=(8-x)*( y);\
845 const int D=( x)*( y);\
846 int i;\
847 \
848 assert(x<8 && y<8 && x>=0 && y>=0);\
849 \
850 for(i=0; i<h; i++)\
851 {\
852 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
853 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
854 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
855 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
856 dst+= stride;\
857 src+= stride;\
858 }\
859 }\
860 \
861 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
862 const int A=(8-x)*(8-y);\
863 const int B=( x)*(8-y);\
864 const int C=(8-x)*( y);\
865 const int D=( x)*( y);\
866 int i;\
867 \
868 assert(x<8 && y<8 && x>=0 && y>=0);\
869 \
870 for(i=0; i<h; i++)\
871 {\
872 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
873 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
874 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
875 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
876 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
877 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
878 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
879 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
880 dst+= stride;\
881 src+= stride;\
882 }\
883 }
884
885 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
886 #define op_put(a, b) a = (((b) + 32)>>6)
887
888 H264_CHROMA_MC(put_ , op_put)
889 H264_CHROMA_MC(avg_ , op_avg)
890 #undef op_avg
891 #undef op_put
892
893 static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
894 {
895 int i;
896 for(i=0; i<h; i++)
897 {
898 ST32(dst , LD32(src ));
899 dst+=dstStride;
900 src+=srcStride;
901 }
902 }
903
904 static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
905 {
906 int i;
907 for(i=0; i<h; i++)
908 {
909 ST32(dst , LD32(src ));
910 ST32(dst+4 , LD32(src+4 ));
911 dst+=dstStride;
912 src+=srcStride;
913 }
914 }
915
916 static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
917 {
918 int i;
919 for(i=0; i<h; i++)
920 {
921 ST32(dst , LD32(src ));
922 ST32(dst+4 , LD32(src+4 ));
923 ST32(dst+8 , LD32(src+8 ));
924 ST32(dst+12, LD32(src+12));
925 dst+=dstStride;
926 src+=srcStride;
801 } 927 }
802 } 928 }
803 929
804 static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) 930 static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
805 { 931 {
1325 #undef op_avg 1451 #undef op_avg
1326 #undef op_avg_no_rnd 1452 #undef op_avg_no_rnd
1327 #undef op_put 1453 #undef op_put
1328 #undef op_put_no_rnd 1454 #undef op_put_no_rnd
1329 1455
1456 #if 1
1457 #define H264_LOWPASS(OPNAME, OP, OP2) \
1458 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1459 const int h=4;\
1460 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1461 int i;\
1462 for(i=0; i<h; i++)\
1463 {\
1464 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
1465 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
1466 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
1467 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
1468 dst+=dstStride;\
1469 src+=srcStride;\
1470 }\
1471 }\
1472 \
1473 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1474 const int w=4;\
1475 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1476 int i;\
1477 for(i=0; i<w; i++)\
1478 {\
1479 const int srcB= src[-2*srcStride];\
1480 const int srcA= src[-1*srcStride];\
1481 const int src0= src[0 *srcStride];\
1482 const int src1= src[1 *srcStride];\
1483 const int src2= src[2 *srcStride];\
1484 const int src3= src[3 *srcStride];\
1485 const int src4= src[4 *srcStride];\
1486 const int src5= src[5 *srcStride];\
1487 const int src6= src[6 *srcStride];\
1488 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
1489 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
1490 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
1491 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
1492 dst++;\
1493 src++;\
1494 }\
1495 }\
1496 \
1497 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1498 const int h=4;\
1499 const int w=4;\
1500 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1501 int i;\
1502 src -= 2*srcStride;\
1503 for(i=0; i<h+5; i++)\
1504 {\
1505 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
1506 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
1507 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
1508 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
1509 tmp+=tmpStride;\
1510 src+=srcStride;\
1511 }\
1512 tmp -= tmpStride*(h+5-2);\
1513 for(i=0; i<w; i++)\
1514 {\
1515 const int tmpB= tmp[-2*tmpStride];\
1516 const int tmpA= tmp[-1*tmpStride];\
1517 const int tmp0= tmp[0 *tmpStride];\
1518 const int tmp1= tmp[1 *tmpStride];\
1519 const int tmp2= tmp[2 *tmpStride];\
1520 const int tmp3= tmp[3 *tmpStride];\
1521 const int tmp4= tmp[4 *tmpStride];\
1522 const int tmp5= tmp[5 *tmpStride];\
1523 const int tmp6= tmp[6 *tmpStride];\
1524 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
1525 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
1526 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
1527 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
1528 dst++;\
1529 tmp++;\
1530 }\
1531 }\
1532 \
1533 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1534 const int h=8;\
1535 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1536 int i;\
1537 for(i=0; i<h; i++)\
1538 {\
1539 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
1540 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
1541 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
1542 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
1543 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
1544 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
1545 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
1546 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
1547 dst+=dstStride;\
1548 src+=srcStride;\
1549 }\
1550 }\
1551 \
1552 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1553 const int w=8;\
1554 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1555 int i;\
1556 for(i=0; i<w; i++)\
1557 {\
1558 const int srcB= src[-2*srcStride];\
1559 const int srcA= src[-1*srcStride];\
1560 const int src0= src[0 *srcStride];\
1561 const int src1= src[1 *srcStride];\
1562 const int src2= src[2 *srcStride];\
1563 const int src3= src[3 *srcStride];\
1564 const int src4= src[4 *srcStride];\
1565 const int src5= src[5 *srcStride];\
1566 const int src6= src[6 *srcStride];\
1567 const int src7= src[7 *srcStride];\
1568 const int src8= src[8 *srcStride];\
1569 const int src9= src[9 *srcStride];\
1570 const int src10=src[10*srcStride];\
1571 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
1572 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
1573 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
1574 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
1575 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
1576 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
1577 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
1578 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
1579 dst++;\
1580 src++;\
1581 }\
1582 }\
1583 \
1584 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1585 const int h=8;\
1586 const int w=8;\
1587 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1588 int i;\
1589 src -= 2*srcStride;\
1590 for(i=0; i<h+5; i++)\
1591 {\
1592 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
1593 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
1594 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
1595 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
1596 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
1597 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
1598 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
1599 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
1600 tmp+=tmpStride;\
1601 src+=srcStride;\
1602 }\
1603 tmp -= tmpStride*(h+5-2);\
1604 for(i=0; i<w; i++)\
1605 {\
1606 const int tmpB= tmp[-2*tmpStride];\
1607 const int tmpA= tmp[-1*tmpStride];\
1608 const int tmp0= tmp[0 *tmpStride];\
1609 const int tmp1= tmp[1 *tmpStride];\
1610 const int tmp2= tmp[2 *tmpStride];\
1611 const int tmp3= tmp[3 *tmpStride];\
1612 const int tmp4= tmp[4 *tmpStride];\
1613 const int tmp5= tmp[5 *tmpStride];\
1614 const int tmp6= tmp[6 *tmpStride];\
1615 const int tmp7= tmp[7 *tmpStride];\
1616 const int tmp8= tmp[8 *tmpStride];\
1617 const int tmp9= tmp[9 *tmpStride];\
1618 const int tmp10=tmp[10*tmpStride];\
1619 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
1620 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
1621 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
1622 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
1623 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
1624 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
1625 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
1626 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
1627 dst++;\
1628 tmp++;\
1629 }\
1630 }\
1631 \
1632 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1633 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
1634 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
1635 src += 8*srcStride;\
1636 dst += 8*dstStride;\
1637 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
1638 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
1639 }\
1640 \
1641 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1642 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
1643 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
1644 src += 8*srcStride;\
1645 dst += 8*dstStride;\
1646 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
1647 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
1648 }\
1649 \
1650 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1651 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
1652 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
1653 src += 8*srcStride;\
1654 tmp += 8*tmpStride;\
1655 dst += 8*dstStride;\
1656 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
1657 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
1658 }\
1659
1660 #define H264_MC(OPNAME, SIZE) \
1661 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1662 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
1663 }\
1664 \
1665 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1666 uint8_t half[SIZE*SIZE];\
1667 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
1668 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
1669 }\
1670 \
1671 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1672 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
1673 }\
1674 \
1675 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1676 uint8_t half[SIZE*SIZE];\
1677 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
1678 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
1679 }\
1680 \
1681 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1682 uint8_t full[SIZE*(SIZE+5)];\
1683 uint8_t * const full_mid= full + SIZE*2;\
1684 uint8_t half[SIZE*SIZE];\
1685 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1686 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
1687 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
1688 }\
1689 \
1690 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1691 uint8_t full[SIZE*(SIZE+5)];\
1692 uint8_t * const full_mid= full + SIZE*2;\
1693 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1694 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
1695 }\
1696 \
1697 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1698 uint8_t full[SIZE*(SIZE+5)];\
1699 uint8_t * const full_mid= full + SIZE*2;\
1700 uint8_t half[SIZE*SIZE];\
1701 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1702 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
1703 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
1704 }\
1705 \
1706 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1707 uint8_t full[SIZE*(SIZE+5)];\
1708 uint8_t * const full_mid= full + SIZE*2;\
1709 uint8_t halfH[SIZE*SIZE];\
1710 uint8_t halfV[SIZE*SIZE];\
1711 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1712 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1713 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1714 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1715 }\
1716 \
1717 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1718 uint8_t full[SIZE*(SIZE+5)];\
1719 uint8_t * const full_mid= full + SIZE*2;\
1720 uint8_t halfH[SIZE*SIZE];\
1721 uint8_t halfV[SIZE*SIZE];\
1722 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1723 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
1724 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1725 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1726 }\
1727 \
1728 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1729 uint8_t full[SIZE*(SIZE+5)];\
1730 uint8_t * const full_mid= full + SIZE*2;\
1731 uint8_t halfH[SIZE*SIZE];\
1732 uint8_t halfV[SIZE*SIZE];\
1733 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1734 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1735 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1736 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1737 }\
1738 \
1739 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1740 uint8_t full[SIZE*(SIZE+5)];\
1741 uint8_t * const full_mid= full + SIZE*2;\
1742 uint8_t halfH[SIZE*SIZE];\
1743 uint8_t halfV[SIZE*SIZE];\
1744 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1745 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
1746 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1747 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1748 }\
1749 \
1750 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1751 int16_t tmp[SIZE*(SIZE+5)];\
1752 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
1753 }\
1754 \
1755 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1756 int16_t tmp[SIZE*(SIZE+5)];\
1757 uint8_t halfH[SIZE*SIZE];\
1758 uint8_t halfHV[SIZE*SIZE];\
1759 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1760 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1761 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
1762 }\
1763 \
1764 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1765 int16_t tmp[SIZE*(SIZE+5)];\
1766 uint8_t halfH[SIZE*SIZE];\
1767 uint8_t halfHV[SIZE*SIZE];\
1768 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1769 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1770 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
1771 }\
1772 \
1773 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1774 uint8_t full[SIZE*(SIZE+5)];\
1775 uint8_t * const full_mid= full + SIZE*2;\
1776 int16_t tmp[SIZE*(SIZE+5)];\
1777 uint8_t halfV[SIZE*SIZE];\
1778 uint8_t halfHV[SIZE*SIZE];\
1779 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1780 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1781 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1782 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
1783 }\
1784 \
1785 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1786 uint8_t full[SIZE*(SIZE+5)];\
1787 uint8_t * const full_mid= full + SIZE*2;\
1788 int16_t tmp[SIZE*(SIZE+5)];\
1789 uint8_t halfV[SIZE*SIZE];\
1790 uint8_t halfHV[SIZE*SIZE];\
1791 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
1792 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1793 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1794 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
1795 }\
1796
1797 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1798 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
1799 #define op_put(a, b) a = cm[((b) + 16)>>5]
1800 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
1801 #define op2_put(a, b) a = cm[((b) + 512)>>10]
1802
1803 H264_LOWPASS(put_ , op_put, op2_put)
1804 H264_LOWPASS(avg_ , op_avg, op2_avg)
1805 H264_MC(put_, 4)
1806 H264_MC(put_, 8)
1807 H264_MC(put_, 16)
1808 H264_MC(avg_, 4)
1809 H264_MC(avg_, 8)
1810 H264_MC(avg_, 16)
1811
1812 #undef op_avg
1813 #undef op_put
1814 #undef op2_avg
1815 #undef op2_put
1816 #endif
1817
1330 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ 1818 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1331 uint8_t *cm = cropTbl + MAX_NEG_CROP; 1819 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1332 int i; 1820 int i;
1333 1821
1334 for(i=0; i<h; i++){ 1822 for(i=0; i<h; i++){
2105 dspfunc(put_qpel, 1, 8); 2593 dspfunc(put_qpel, 1, 8);
2106 dspfunc(put_no_rnd_qpel, 1, 8); 2594 dspfunc(put_no_rnd_qpel, 1, 8);
2107 2595
2108 dspfunc(avg_qpel, 1, 8); 2596 dspfunc(avg_qpel, 1, 8);
2109 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ 2597 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2598
2599 dspfunc(put_h264_qpel, 0, 16);
2600 dspfunc(put_h264_qpel, 1, 8);
2601 dspfunc(put_h264_qpel, 2, 4);
2602 dspfunc(avg_h264_qpel, 0, 16);
2603 dspfunc(avg_h264_qpel, 1, 8);
2604 dspfunc(avg_h264_qpel, 2, 4);
2605
2110 #undef dspfunc 2606 #undef dspfunc
2607 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
2608 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
2609 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
2610 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
2611 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
2612 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
2111 2613
2112 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; 2614 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
2113 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; 2615 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2114 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; 2616 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2115 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; 2617 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;