comparison h264.c @ 1184:05a2ac8978ad libavcodec

faster 8x8 & 16x16 plane prediction by skal (massimin at planet-d dot net)
author michaelni
date Sat, 12 Apr 2003 22:49:54 +0000
parents fea03d2c4946
children f3c659bfdb8e
comparison
equal deleted inserted replaced
1183:03b97d87dcdd 1184:05a2ac8978ad
1679 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U; 1679 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
1680 } 1680 }
1681 } 1681 }
1682 1682
1683 static void pred16x16_plane_c(uint8_t *src, int stride){ 1683 static void pred16x16_plane_c(uint8_t *src, int stride){
1684 uint8_t *cm = cropTbl + MAX_NEG_CROP; 1684 int i, j, k;
1685 int i, dx, dy, dc; 1685 int a;
1686 int temp[16]; 1686 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1687 1687 const uint8_t * const src0 = src+7-stride;
1688 dc= 16*(src[15-stride] + src[-1+15*stride]); 1688 const uint8_t *src1 = src+8*stride-1;
1689 1689 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
1690 dx=dy=0; 1690 int H = src0[1] - src0[-1];
1691 for(i=1; i<9; i++){ 1691 int V = src1[0] - src2[ 0];
1692 dx += i*(src[7+i-stride] - src[7-i-stride]); 1692 for(k=2; k<=8; ++k) {
1693 dy += i*(src[-1+(7+i)*stride] - src[-1+(7-i)*stride]); 1693 src1 += stride; src2 -= stride;
1694 } 1694 H += k*(src0[k] - src0[-k]);
1695 dx= (5*dx+32)>>6; 1695 V += k*(src1[0] - src2[ 0]);
1696 dy= (5*dy+32)>>6; 1696 }
1697 1697 H = ( 5*H+32 ) >> 6;
1698 dc += 16; 1698 V = ( 5*V+32 ) >> 6;
1699 1699
1700 //FIXME modifiy dc,dx,dy to avoid -7 1700 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
1701 1701 for(j=16; j>0; --j) {
1702 for(i=0; i<16; i++) 1702 int b = a;
1703 temp[i]= dx*(i-7) + dc; 1703 a += V;
1704 1704 for(i=-16; i<0; i+=4) {
1705 if( (dc - ABS(dx)*8 - ABS(dy)*8)>>5 < 0 1705 src[16+i] = cm[ (b ) >> 5 ];
1706 || (dc + ABS(dx)*8 + ABS(dy)*8)>>5 > 255){ 1706 src[17+i] = cm[ (b+ H) >> 5 ];
1707 1707 src[18+i] = cm[ (b+2*H) >> 5 ];
1708 for(i=0; i<16; i++){ 1708 src[19+i] = cm[ (b+3*H) >> 5 ];
1709 int j; 1709 b += 4*H;
1710 for(j=0; j<16; j++) 1710 }
1711 src[j + i*stride]= cm[ (temp[j] + dy*(i-7))>>5 ]; 1711 src += stride;
1712 } 1712 }
1713 }else{
1714 for(i=0; i<16; i++){
1715 int j;
1716 for(j=0; j<16; j++)
1717 src[j + i*stride]= (temp[j] + dy*(i-7))>>5;
1718 }
1719 }
1720 } 1713 }
1721 1714
1722 static void pred8x8_vertical_c(uint8_t *src, int stride){ 1715 static void pred8x8_vertical_c(uint8_t *src, int stride){
1723 int i; 1716 int i;
1724 const uint32_t a= ((uint32_t*)(src-stride))[0]; 1717 const uint32_t a= ((uint32_t*)(src-stride))[0];
1821 ((uint32_t*)(src+i*stride))[1]= dc3; 1814 ((uint32_t*)(src+i*stride))[1]= dc3;
1822 } 1815 }
1823 } 1816 }
1824 1817
1825 static void pred8x8_plane_c(uint8_t *src, int stride){ 1818 static void pred8x8_plane_c(uint8_t *src, int stride){
1826 uint8_t *cm = cropTbl + MAX_NEG_CROP; 1819 int j, k;
1827 int i, dx, dy, dc; 1820 int a;
1828 int temp[8]; 1821 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1829 1822 const uint8_t * const src0 = src+3-stride;
1830 dc= 16*(src[7-stride] + src[-1+7*stride]); 1823 const uint8_t *src1 = src+4*stride-1;
1831 1824 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
1832 dx=dy=0; 1825 int H = src0[1] - src0[-1];
1833 for(i=1; i<5; i++){ 1826 int V = src1[0] - src2[ 0];
1834 dx += i*(src[3+i-stride] - src[3-i-stride]); 1827 for(k=2; k<=4; ++k) {
1835 dy += i*(src[-1+(3+i)*stride] - src[-1+(3-i)*stride]); 1828 src1 += stride; src2 -= stride;
1836 } 1829 H += k*(src0[k] - src0[-k]);
1837 dx= (17*dx+16)>>5; 1830 V += k*(src1[0] - src2[ 0]);
1838 dy= (17*dy+16)>>5; 1831 }
1839 1832 H = ( 17*H+16 ) >> 5;
1840 dc += 16; 1833 V = ( 17*V+16 ) >> 5;
1841 1834
1842 //FIXME modifiy dc,dx,dy to avoid -3 1835 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
1843 1836 for(j=8; j>0; --j) {
1844 for(i=0; i<8; i++) 1837 int b = a;
1845 temp[i]= dx*(i-3) + dc; 1838 a += V;
1846 1839 src[0] = cm[ (b ) >> 5 ];
1847 if( (dc - ABS(dx)*4 - ABS(dy)*4)>>5 < 0 1840 src[1] = cm[ (b+ H) >> 5 ];
1848 || (dc + ABS(dx)*4 + ABS(dy)*4)>>5 > 255){ 1841 src[2] = cm[ (b+2*H) >> 5 ];
1849 1842 src[3] = cm[ (b+3*H) >> 5 ];
1850 for(i=0; i<8; i++){ 1843 src[4] = cm[ (b+4*H) >> 5 ];
1851 int j; 1844 src[5] = cm[ (b+5*H) >> 5 ];
1852 for(j=0; j<8; j++) 1845 src[6] = cm[ (b+6*H) >> 5 ];
1853 src[j + i*stride]= cm[ (temp[j] + dy*(i-3))>>5 ]; 1846 src[7] = cm[ (b+7*H) >> 5 ];
1854 } 1847 src += stride;
1855 }else{ 1848 }
1856 for(i=0; i<8; i++){
1857 int j;
1858 for(j=0; j<8; j++)
1859 src[j + i*stride]= (temp[j] + dy*(i-3))>>5;
1860 }
1861 }
1862 } 1849 }
1863 1850
1864 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, 1851 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1865 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 1852 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1866 int src_x_offset, int src_y_offset, 1853 int src_x_offset, int src_y_offset,