Mercurial > libavcodec.hg
comparison h264.c @ 1184:05a2ac8978ad libavcodec
faster 8x8 & 16x16 plane prediction by skal (massimin at planet-d dot net)
author | michaelni |
---|---|
date | Sat, 12 Apr 2003 22:49:54 +0000 |
parents | fea03d2c4946 |
children | f3c659bfdb8e |
comparison
equal
deleted
inserted
replaced
1183:03b97d87dcdd | 1184:05a2ac8978ad |
---|---|
1679 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U; | 1679 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U; |
1680 } | 1680 } |
1681 } | 1681 } |
1682 | 1682 |
1683 static void pred16x16_plane_c(uint8_t *src, int stride){ | 1683 static void pred16x16_plane_c(uint8_t *src, int stride){ |
1684 uint8_t *cm = cropTbl + MAX_NEG_CROP; | 1684 int i, j, k; |
1685 int i, dx, dy, dc; | 1685 int a; |
1686 int temp[16]; | 1686 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
1687 | 1687 const uint8_t * const src0 = src+7-stride; |
1688 dc= 16*(src[15-stride] + src[-1+15*stride]); | 1688 const uint8_t *src1 = src+8*stride-1; |
1689 | 1689 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1; |
1690 dx=dy=0; | 1690 int H = src0[1] - src0[-1]; |
1691 for(i=1; i<9; i++){ | 1691 int V = src1[0] - src2[ 0]; |
1692 dx += i*(src[7+i-stride] - src[7-i-stride]); | 1692 for(k=2; k<=8; ++k) { |
1693 dy += i*(src[-1+(7+i)*stride] - src[-1+(7-i)*stride]); | 1693 src1 += stride; src2 -= stride; |
1694 } | 1694 H += k*(src0[k] - src0[-k]); |
1695 dx= (5*dx+32)>>6; | 1695 V += k*(src1[0] - src2[ 0]); |
1696 dy= (5*dy+32)>>6; | 1696 } |
1697 | 1697 H = ( 5*H+32 ) >> 6; |
1698 dc += 16; | 1698 V = ( 5*V+32 ) >> 6; |
1699 | 1699 |
1700 //FIXME modifiy dc,dx,dy to avoid -7 | 1700 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H); |
1701 | 1701 for(j=16; j>0; --j) { |
1702 for(i=0; i<16; i++) | 1702 int b = a; |
1703 temp[i]= dx*(i-7) + dc; | 1703 a += V; |
1704 | 1704 for(i=-16; i<0; i+=4) { |
1705 if( (dc - ABS(dx)*8 - ABS(dy)*8)>>5 < 0 | 1705 src[16+i] = cm[ (b ) >> 5 ]; |
1706 || (dc + ABS(dx)*8 + ABS(dy)*8)>>5 > 255){ | 1706 src[17+i] = cm[ (b+ H) >> 5 ]; |
1707 | 1707 src[18+i] = cm[ (b+2*H) >> 5 ]; |
1708 for(i=0; i<16; i++){ | 1708 src[19+i] = cm[ (b+3*H) >> 5 ]; |
1709 int j; | 1709 b += 4*H; |
1710 for(j=0; j<16; j++) | 1710 } |
1711 src[j + i*stride]= cm[ (temp[j] + dy*(i-7))>>5 ]; | 1711 src += stride; |
1712 } | 1712 } |
1713 }else{ | |
1714 for(i=0; i<16; i++){ | |
1715 int j; | |
1716 for(j=0; j<16; j++) | |
1717 src[j + i*stride]= (temp[j] + dy*(i-7))>>5; | |
1718 } | |
1719 } | |
1720 } | 1713 } |
1721 | 1714 |
1722 static void pred8x8_vertical_c(uint8_t *src, int stride){ | 1715 static void pred8x8_vertical_c(uint8_t *src, int stride){ |
1723 int i; | 1716 int i; |
1724 const uint32_t a= ((uint32_t*)(src-stride))[0]; | 1717 const uint32_t a= ((uint32_t*)(src-stride))[0]; |
1821 ((uint32_t*)(src+i*stride))[1]= dc3; | 1814 ((uint32_t*)(src+i*stride))[1]= dc3; |
1822 } | 1815 } |
1823 } | 1816 } |
1824 | 1817 |
1825 static void pred8x8_plane_c(uint8_t *src, int stride){ | 1818 static void pred8x8_plane_c(uint8_t *src, int stride){ |
1826 uint8_t *cm = cropTbl + MAX_NEG_CROP; | 1819 int j, k; |
1827 int i, dx, dy, dc; | 1820 int a; |
1828 int temp[8]; | 1821 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
1829 | 1822 const uint8_t * const src0 = src+3-stride; |
1830 dc= 16*(src[7-stride] + src[-1+7*stride]); | 1823 const uint8_t *src1 = src+4*stride-1; |
1831 | 1824 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1; |
1832 dx=dy=0; | 1825 int H = src0[1] - src0[-1]; |
1833 for(i=1; i<5; i++){ | 1826 int V = src1[0] - src2[ 0]; |
1834 dx += i*(src[3+i-stride] - src[3-i-stride]); | 1827 for(k=2; k<=4; ++k) { |
1835 dy += i*(src[-1+(3+i)*stride] - src[-1+(3-i)*stride]); | 1828 src1 += stride; src2 -= stride; |
1836 } | 1829 H += k*(src0[k] - src0[-k]); |
1837 dx= (17*dx+16)>>5; | 1830 V += k*(src1[0] - src2[ 0]); |
1838 dy= (17*dy+16)>>5; | 1831 } |
1839 | 1832 H = ( 17*H+16 ) >> 5; |
1840 dc += 16; | 1833 V = ( 17*V+16 ) >> 5; |
1841 | 1834 |
1842 //FIXME modifiy dc,dx,dy to avoid -3 | 1835 a = 16*(src1[0] + src2[8]+1) - 3*(V+H); |
1843 | 1836 for(j=8; j>0; --j) { |
1844 for(i=0; i<8; i++) | 1837 int b = a; |
1845 temp[i]= dx*(i-3) + dc; | 1838 a += V; |
1846 | 1839 src[0] = cm[ (b ) >> 5 ]; |
1847 if( (dc - ABS(dx)*4 - ABS(dy)*4)>>5 < 0 | 1840 src[1] = cm[ (b+ H) >> 5 ]; |
1848 || (dc + ABS(dx)*4 + ABS(dy)*4)>>5 > 255){ | 1841 src[2] = cm[ (b+2*H) >> 5 ]; |
1849 | 1842 src[3] = cm[ (b+3*H) >> 5 ]; |
1850 for(i=0; i<8; i++){ | 1843 src[4] = cm[ (b+4*H) >> 5 ]; |
1851 int j; | 1844 src[5] = cm[ (b+5*H) >> 5 ]; |
1852 for(j=0; j<8; j++) | 1845 src[6] = cm[ (b+6*H) >> 5 ]; |
1853 src[j + i*stride]= cm[ (temp[j] + dy*(i-3))>>5 ]; | 1846 src[7] = cm[ (b+7*H) >> 5 ]; |
1854 } | 1847 src += stride; |
1855 }else{ | 1848 } |
1856 for(i=0; i<8; i++){ | |
1857 int j; | |
1858 for(j=0; j<8; j++) | |
1859 src[j + i*stride]= (temp[j] + dy*(i-3))>>5; | |
1860 } | |
1861 } | |
1862 } | 1849 } |
1863 | 1850 |
1864 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, | 1851 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, |
1865 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | 1852 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, |
1866 int src_x_offset, int src_y_offset, | 1853 int src_x_offset, int src_y_offset, |