Mercurial > libavcodec.hg
comparison dsputil.c @ 1168:5af9aeadbdc3 libavcodec
H264 decoder & demuxer
author | michaelni |
---|---|
date | Fri, 04 Apr 2003 14:42:28 +0000 |
parents | 1e39f273ecd6 |
children | e0fc95a6eb4e |
comparison
equal
deleted
inserted
replaced
1167:35b80080b2db | 1168:5af9aeadbdc3 |
---|---|
464 | 464 |
465 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | 465 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) |
466 #else // 64 bit variant | 466 #else // 64 bit variant |
467 | 467 |
468 #define PIXOP2(OPNAME, OP) \ | 468 #define PIXOP2(OPNAME, OP) \ |
469 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
470 int i;\ | |
471 for(i=0; i<h; i++){\ | |
472 OP(*((uint32_t*)(block )), LD32(pixels ));\ | |
473 pixels+=line_size;\ | |
474 block +=line_size;\ | |
475 }\ | |
476 }\ | |
469 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | 477 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
470 int i;\ | 478 int i;\ |
471 for(i=0; i<h; i++){\ | 479 for(i=0; i<h; i++){\ |
472 OP(*((uint32_t*)(block )), LD32(pixels ));\ | 480 OP(*((uint32_t*)(block )), LD32(pixels ));\ |
473 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ | 481 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ |
502 b= LD32(&src2[i*src_stride2 ]);\ | 510 b= LD32(&src2[i*src_stride2 ]);\ |
503 OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ | 511 OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ |
504 a= LD32(&src1[i*src_stride1+4]);\ | 512 a= LD32(&src1[i*src_stride1+4]);\ |
505 b= LD32(&src2[i*src_stride2+4]);\ | 513 b= LD32(&src2[i*src_stride2+4]);\ |
506 OP(*((uint32_t*)&dst[i*dst_stride+4]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ | 514 OP(*((uint32_t*)&dst[i*dst_stride+4]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ |
515 }\ | |
516 }\ | |
517 \ | |
518 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
519 int src_stride1, int src_stride2, int h){\ | |
520 int i;\ | |
521 for(i=0; i<h; i++){\ | |
522 uint32_t a,b;\ | |
523 a= LD32(&src1[i*src_stride1 ]);\ | |
524 b= LD32(&src2[i*src_stride2 ]);\ | |
525 OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ | |
507 }\ | 526 }\ |
508 }\ | 527 }\ |
509 \ | 528 \ |
510 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | 529 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
511 int src_stride1, int src_stride2, int h){\ | 530 int src_stride1, int src_stride2, int h){\ |
796 vx+= dxx; | 815 vx+= dxx; |
797 vy+= dyx; | 816 vy+= dyx; |
798 } | 817 } |
799 ox += dxy; | 818 ox += dxy; |
800 oy += dyy; | 819 oy += dyy; |
820 } | |
821 } | |
822 #define H264_CHROMA_MC(OPNAME, OP)\ | |
823 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
824 const int A=(8-x)*(8-y);\ | |
825 const int B=( x)*(8-y);\ | |
826 const int C=(8-x)*( y);\ | |
827 const int D=( x)*( y);\ | |
828 int i;\ | |
829 \ | |
830 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
831 \ | |
832 for(i=0; i<h; i++)\ | |
833 {\ | |
834 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
835 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
836 dst+= stride;\ | |
837 src+= stride;\ | |
838 }\ | |
839 }\ | |
840 \ | |
841 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
842 const int A=(8-x)*(8-y);\ | |
843 const int B=( x)*(8-y);\ | |
844 const int C=(8-x)*( y);\ | |
845 const int D=( x)*( y);\ | |
846 int i;\ | |
847 \ | |
848 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
849 \ | |
850 for(i=0; i<h; i++)\ | |
851 {\ | |
852 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
853 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
854 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
855 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
856 dst+= stride;\ | |
857 src+= stride;\ | |
858 }\ | |
859 }\ | |
860 \ | |
861 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
862 const int A=(8-x)*(8-y);\ | |
863 const int B=( x)*(8-y);\ | |
864 const int C=(8-x)*( y);\ | |
865 const int D=( x)*( y);\ | |
866 int i;\ | |
867 \ | |
868 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
869 \ | |
870 for(i=0; i<h; i++)\ | |
871 {\ | |
872 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
873 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
874 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
875 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
876 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ | |
877 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ | |
878 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ | |
879 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ | |
880 dst+= stride;\ | |
881 src+= stride;\ | |
882 }\ | |
883 } | |
884 | |
885 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) | |
886 #define op_put(a, b) a = (((b) + 32)>>6) | |
887 | |
888 H264_CHROMA_MC(put_ , op_put) | |
889 H264_CHROMA_MC(avg_ , op_avg) | |
890 #undef op_avg | |
891 #undef op_put | |
892 | |
893 static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
894 { | |
895 int i; | |
896 for(i=0; i<h; i++) | |
897 { | |
898 ST32(dst , LD32(src )); | |
899 dst+=dstStride; | |
900 src+=srcStride; | |
901 } | |
902 } | |
903 | |
904 static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
905 { | |
906 int i; | |
907 for(i=0; i<h; i++) | |
908 { | |
909 ST32(dst , LD32(src )); | |
910 ST32(dst+4 , LD32(src+4 )); | |
911 dst+=dstStride; | |
912 src+=srcStride; | |
913 } | |
914 } | |
915 | |
916 static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
917 { | |
918 int i; | |
919 for(i=0; i<h; i++) | |
920 { | |
921 ST32(dst , LD32(src )); | |
922 ST32(dst+4 , LD32(src+4 )); | |
923 ST32(dst+8 , LD32(src+8 )); | |
924 ST32(dst+12, LD32(src+12)); | |
925 dst+=dstStride; | |
926 src+=srcStride; | |
801 } | 927 } |
802 } | 928 } |
803 | 929 |
804 static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | 930 static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) |
805 { | 931 { |
1325 #undef op_avg | 1451 #undef op_avg |
1326 #undef op_avg_no_rnd | 1452 #undef op_avg_no_rnd |
1327 #undef op_put | 1453 #undef op_put |
1328 #undef op_put_no_rnd | 1454 #undef op_put_no_rnd |
1329 | 1455 |
1456 #if 1 | |
1457 #define H264_LOWPASS(OPNAME, OP, OP2) \ | |
1458 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1459 const int h=4;\ | |
1460 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1461 int i;\ | |
1462 for(i=0; i<h; i++)\ | |
1463 {\ | |
1464 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | |
1465 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | |
1466 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ | |
1467 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ | |
1468 dst+=dstStride;\ | |
1469 src+=srcStride;\ | |
1470 }\ | |
1471 }\ | |
1472 \ | |
1473 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1474 const int w=4;\ | |
1475 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1476 int i;\ | |
1477 for(i=0; i<w; i++)\ | |
1478 {\ | |
1479 const int srcB= src[-2*srcStride];\ | |
1480 const int srcA= src[-1*srcStride];\ | |
1481 const int src0= src[0 *srcStride];\ | |
1482 const int src1= src[1 *srcStride];\ | |
1483 const int src2= src[2 *srcStride];\ | |
1484 const int src3= src[3 *srcStride];\ | |
1485 const int src4= src[4 *srcStride];\ | |
1486 const int src5= src[5 *srcStride];\ | |
1487 const int src6= src[6 *srcStride];\ | |
1488 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
1489 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
1490 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
1491 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
1492 dst++;\ | |
1493 src++;\ | |
1494 }\ | |
1495 }\ | |
1496 \ | |
1497 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
1498 const int h=4;\ | |
1499 const int w=4;\ | |
1500 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1501 int i;\ | |
1502 src -= 2*srcStride;\ | |
1503 for(i=0; i<h+5; i++)\ | |
1504 {\ | |
1505 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | |
1506 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | |
1507 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ | |
1508 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ | |
1509 tmp+=tmpStride;\ | |
1510 src+=srcStride;\ | |
1511 }\ | |
1512 tmp -= tmpStride*(h+5-2);\ | |
1513 for(i=0; i<w; i++)\ | |
1514 {\ | |
1515 const int tmpB= tmp[-2*tmpStride];\ | |
1516 const int tmpA= tmp[-1*tmpStride];\ | |
1517 const int tmp0= tmp[0 *tmpStride];\ | |
1518 const int tmp1= tmp[1 *tmpStride];\ | |
1519 const int tmp2= tmp[2 *tmpStride];\ | |
1520 const int tmp3= tmp[3 *tmpStride];\ | |
1521 const int tmp4= tmp[4 *tmpStride];\ | |
1522 const int tmp5= tmp[5 *tmpStride];\ | |
1523 const int tmp6= tmp[6 *tmpStride];\ | |
1524 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
1525 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
1526 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
1527 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
1528 dst++;\ | |
1529 tmp++;\ | |
1530 }\ | |
1531 }\ | |
1532 \ | |
1533 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1534 const int h=8;\ | |
1535 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1536 int i;\ | |
1537 for(i=0; i<h; i++)\ | |
1538 {\ | |
1539 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ | |
1540 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ | |
1541 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ | |
1542 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ | |
1543 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ | |
1544 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ | |
1545 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ | |
1546 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ | |
1547 dst+=dstStride;\ | |
1548 src+=srcStride;\ | |
1549 }\ | |
1550 }\ | |
1551 \ | |
1552 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1553 const int w=8;\ | |
1554 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1555 int i;\ | |
1556 for(i=0; i<w; i++)\ | |
1557 {\ | |
1558 const int srcB= src[-2*srcStride];\ | |
1559 const int srcA= src[-1*srcStride];\ | |
1560 const int src0= src[0 *srcStride];\ | |
1561 const int src1= src[1 *srcStride];\ | |
1562 const int src2= src[2 *srcStride];\ | |
1563 const int src3= src[3 *srcStride];\ | |
1564 const int src4= src[4 *srcStride];\ | |
1565 const int src5= src[5 *srcStride];\ | |
1566 const int src6= src[6 *srcStride];\ | |
1567 const int src7= src[7 *srcStride];\ | |
1568 const int src8= src[8 *srcStride];\ | |
1569 const int src9= src[9 *srcStride];\ | |
1570 const int src10=src[10*srcStride];\ | |
1571 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
1572 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
1573 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
1574 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
1575 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ | |
1576 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ | |
1577 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ | |
1578 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ | |
1579 dst++;\ | |
1580 src++;\ | |
1581 }\ | |
1582 }\ | |
1583 \ | |
1584 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
1585 const int h=8;\ | |
1586 const int w=8;\ | |
1587 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1588 int i;\ | |
1589 src -= 2*srcStride;\ | |
1590 for(i=0; i<h+5; i++)\ | |
1591 {\ | |
1592 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ | |
1593 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ | |
1594 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ | |
1595 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ | |
1596 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ | |
1597 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ | |
1598 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ | |
1599 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ | |
1600 tmp+=tmpStride;\ | |
1601 src+=srcStride;\ | |
1602 }\ | |
1603 tmp -= tmpStride*(h+5-2);\ | |
1604 for(i=0; i<w; i++)\ | |
1605 {\ | |
1606 const int tmpB= tmp[-2*tmpStride];\ | |
1607 const int tmpA= tmp[-1*tmpStride];\ | |
1608 const int tmp0= tmp[0 *tmpStride];\ | |
1609 const int tmp1= tmp[1 *tmpStride];\ | |
1610 const int tmp2= tmp[2 *tmpStride];\ | |
1611 const int tmp3= tmp[3 *tmpStride];\ | |
1612 const int tmp4= tmp[4 *tmpStride];\ | |
1613 const int tmp5= tmp[5 *tmpStride];\ | |
1614 const int tmp6= tmp[6 *tmpStride];\ | |
1615 const int tmp7= tmp[7 *tmpStride];\ | |
1616 const int tmp8= tmp[8 *tmpStride];\ | |
1617 const int tmp9= tmp[9 *tmpStride];\ | |
1618 const int tmp10=tmp[10*tmpStride];\ | |
1619 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
1620 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
1621 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
1622 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
1623 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ | |
1624 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ | |
1625 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ | |
1626 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ | |
1627 dst++;\ | |
1628 tmp++;\ | |
1629 }\ | |
1630 }\ | |
1631 \ | |
1632 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1633 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
1634 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
1635 src += 8*srcStride;\ | |
1636 dst += 8*dstStride;\ | |
1637 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
1638 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
1639 }\ | |
1640 \ | |
1641 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1642 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
1643 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
1644 src += 8*srcStride;\ | |
1645 dst += 8*dstStride;\ | |
1646 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
1647 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
1648 }\ | |
1649 \ | |
1650 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
1651 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
1652 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
1653 src += 8*srcStride;\ | |
1654 tmp += 8*tmpStride;\ | |
1655 dst += 8*dstStride;\ | |
1656 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
1657 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
1658 }\ | |
1659 | |
1660 #define H264_MC(OPNAME, SIZE) \ | |
1661 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | |
1662 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ | |
1663 }\ | |
1664 \ | |
1665 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1666 uint8_t half[SIZE*SIZE];\ | |
1667 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
1668 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ | |
1669 }\ | |
1670 \ | |
1671 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1672 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ | |
1673 }\ | |
1674 \ | |
1675 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1676 uint8_t half[SIZE*SIZE];\ | |
1677 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
1678 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ | |
1679 }\ | |
1680 \ | |
1681 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1682 uint8_t full[SIZE*(SIZE+5)];\ | |
1683 uint8_t * const full_mid= full + SIZE*2;\ | |
1684 uint8_t half[SIZE*SIZE];\ | |
1685 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
1686 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
1687 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ | |
1688 }\ | |
1689 \ | |
1690 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1691 uint8_t full[SIZE*(SIZE+5)];\ | |
1692 uint8_t * const full_mid= full + SIZE*2;\ | |
1693 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
1694 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ | |
1695 }\ | |
1696 \ | |
1697 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1698 uint8_t full[SIZE*(SIZE+5)];\ | |
1699 uint8_t * const full_mid= full + SIZE*2;\ | |
1700 uint8_t half[SIZE*SIZE];\ | |
1701 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
1702 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
1703 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ | |
1704 }\ | |
1705 \ | |
1706 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1707 uint8_t full[SIZE*(SIZE+5)];\ | |
1708 uint8_t * const full_mid= full + SIZE*2;\ | |
1709 uint8_t halfH[SIZE*SIZE];\ | |
1710 uint8_t halfV[SIZE*SIZE];\ | |
1711 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
1712 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
1713 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
1714 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
1715 }\ | |
1716 \ | |
1717 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1718 uint8_t full[SIZE*(SIZE+5)];\ | |
1719 uint8_t * const full_mid= full + SIZE*2;\ | |
1720 uint8_t halfH[SIZE*SIZE];\ | |
1721 uint8_t halfV[SIZE*SIZE];\ | |
1722 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
1723 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
1724 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
1725 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
1726 }\ | |
1727 \ | |
1728 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1729 uint8_t full[SIZE*(SIZE+5)];\ | |
1730 uint8_t * const full_mid= full + SIZE*2;\ | |
1731 uint8_t halfH[SIZE*SIZE];\ | |
1732 uint8_t halfV[SIZE*SIZE];\ | |
1733 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
1734 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
1735 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
1736 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
1737 }\ | |
1738 \ | |
1739 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1740 uint8_t full[SIZE*(SIZE+5)];\ | |
1741 uint8_t * const full_mid= full + SIZE*2;\ | |
1742 uint8_t halfH[SIZE*SIZE];\ | |
1743 uint8_t halfV[SIZE*SIZE];\ | |
1744 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
1745 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
1746 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
1747 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
1748 }\ | |
1749 \ | |
1750 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1751 int16_t tmp[SIZE*(SIZE+5)];\ | |
1752 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ | |
1753 }\ | |
1754 \ | |
1755 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1756 int16_t tmp[SIZE*(SIZE+5)];\ | |
1757 uint8_t halfH[SIZE*SIZE];\ | |
1758 uint8_t halfHV[SIZE*SIZE];\ | |
1759 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
1760 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
1761 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
1762 }\ | |
1763 \ | |
1764 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1765 int16_t tmp[SIZE*(SIZE+5)];\ | |
1766 uint8_t halfH[SIZE*SIZE];\ | |
1767 uint8_t halfHV[SIZE*SIZE];\ | |
1768 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
1769 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
1770 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
1771 }\ | |
1772 \ | |
1773 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1774 uint8_t full[SIZE*(SIZE+5)];\ | |
1775 uint8_t * const full_mid= full + SIZE*2;\ | |
1776 int16_t tmp[SIZE*(SIZE+5)];\ | |
1777 uint8_t halfV[SIZE*SIZE];\ | |
1778 uint8_t halfHV[SIZE*SIZE];\ | |
1779 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
1780 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
1781 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
1782 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
1783 }\ | |
1784 \ | |
1785 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1786 uint8_t full[SIZE*(SIZE+5)];\ | |
1787 uint8_t * const full_mid= full + SIZE*2;\ | |
1788 int16_t tmp[SIZE*(SIZE+5)];\ | |
1789 uint8_t halfV[SIZE*SIZE];\ | |
1790 uint8_t halfHV[SIZE*SIZE];\ | |
1791 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
1792 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
1793 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
1794 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
1795 }\ | |
1796 | |
1797 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | |
1798 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) | |
1799 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
1800 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) | |
1801 #define op2_put(a, b) a = cm[((b) + 512)>>10] | |
1802 | |
1803 H264_LOWPASS(put_ , op_put, op2_put) | |
1804 H264_LOWPASS(avg_ , op_avg, op2_avg) | |
1805 H264_MC(put_, 4) | |
1806 H264_MC(put_, 8) | |
1807 H264_MC(put_, 16) | |
1808 H264_MC(avg_, 4) | |
1809 H264_MC(avg_, 8) | |
1810 H264_MC(avg_, 16) | |
1811 | |
1812 #undef op_avg | |
1813 #undef op_put | |
1814 #undef op2_avg | |
1815 #undef op2_put | |
1816 #endif | |
1817 | |
1330 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ | 1818 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ |
1331 uint8_t *cm = cropTbl + MAX_NEG_CROP; | 1819 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
1332 int i; | 1820 int i; |
1333 | 1821 |
1334 for(i=0; i<h; i++){ | 1822 for(i=0; i<h; i++){ |
2105 dspfunc(put_qpel, 1, 8); | 2593 dspfunc(put_qpel, 1, 8); |
2106 dspfunc(put_no_rnd_qpel, 1, 8); | 2594 dspfunc(put_no_rnd_qpel, 1, 8); |
2107 | 2595 |
2108 dspfunc(avg_qpel, 1, 8); | 2596 dspfunc(avg_qpel, 1, 8); |
2109 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ | 2597 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ |
2598 | |
2599 dspfunc(put_h264_qpel, 0, 16); | |
2600 dspfunc(put_h264_qpel, 1, 8); | |
2601 dspfunc(put_h264_qpel, 2, 4); | |
2602 dspfunc(avg_h264_qpel, 0, 16); | |
2603 dspfunc(avg_h264_qpel, 1, 8); | |
2604 dspfunc(avg_h264_qpel, 2, 4); | |
2605 | |
2110 #undef dspfunc | 2606 #undef dspfunc |
2607 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; | |
2608 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; | |
2609 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; | |
2610 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; | |
2611 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; | |
2612 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; | |
2111 | 2613 |
2112 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; | 2614 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; |
2113 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; | 2615 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; |
2114 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; | 2616 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; |
2115 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; | 2617 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; |