libavcodec.hg: snow.c comparison

comparison snow.c @ 5648:cd26ab6e3953 libavcodec

cleanup mc_block() perform interpolation steps in such an order that halfpel interpolation could be done per picture this also makes mc_block() match h.264 for the 1/4 pel cases so that the use of the h264 functions for some cases does not introduce a fantastic mess

author	michael
date	Sat, 08 Sep 2007 03:14:20 +0000
parents	473cada682a1
children	9fe214a99139

comparison

equal deleted inserted replaced

-:7c139ea9065e
+:cd26ab6e3953
 }
 }
 }
 static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
-int x, y;
+const static uint8_t weight[64]={
+8,7,6,5,4,3,2,1,
+7,7,0,0,0,0,0,1,
+6,0,6,0,0,0,2,0,
+5,0,0,5,0,3,0,0,
+4,0,0,0,4,0,0,0,
+3,0,0,5,0,3,0,0,
+2,0,6,0,0,0,2,0,
+1,7,0,0,0,0,0,1,
+};
+const static uint8_t brane[256]={
+0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
+0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
+0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
+0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
+0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
+0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
+0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
+0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
+0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
+0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
+0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
+0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
+0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
+0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
+0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
+};
+const static uint8_t needs[16]={
+0,1,0,0,
+2,4,2,0,
+0,1,0,0,
+15
+};
+int x, y, b, r, l;
+int16_t tmpIt   [64*(32+HTAPS)];
+uint8_t tmp2t[3][stride*(32+HTAPS)];
+int16_t *tmpI= tmpIt;
+uint8_t *tmp2= tmp2t[0];
+uint8_t *hpel[11];
 START_TIMER
+assert(dx<16 && dy<16);
+r= brane[dx + 16*dy]&15;
+l= brane[dx + 16*dy]>>4;
+b= needs[l] | needs[r];
+if(b&5){
 for(y=0; y < b_h+HTAPS-1; y++){
 for(x=0; x < b_w; x++){
 int a_2=src[x + HTAPS/2-5];
 int a_1=src[x + HTAPS/2-4];
 int a0= src[x + HTAPS/2-3];
 //             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
 //             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
 //            if(b_w==16) am= 8*(a1+a2);
-if(dx<8) am = (32*a2*( 8-dx) +    am* dx    + 128)>>8;
+tmpI[x]= am;
-else     am = (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
+am= (am+16)>>5;
-/* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
 if(am&(~255)) am= ~(am>>31);
+tmp2[x]= am;
-tmp[x] = am;
+}
+tmpI+= 64;
-/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
+tmp2+= stride;
-else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
-else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
-else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
-}
-tmp += stride;
 src += stride;
 }
-tmp -= (b_h+HTAPS-1)*stride;
+src -= stride*y;
+}
+src += HTAPS/2 - 1;
+tmp2= tmp2t[1];
+if(b&2){
 for(y=0; y < b_h; y++){
-for(x=0; x < b_w; x++){
+for(x=0; x < b_w+1; x++){
-int a_2=tmp[x + (HTAPS/2-5)*stride];
+int a_2=src[x + (HTAPS/2-5)*stride];
-int a_1=tmp[x + (HTAPS/2-4)*stride];
+int a_1=src[x + (HTAPS/2-4)*stride];
-int a0= tmp[x + (HTAPS/2-3)*stride];
+int a0= src[x + (HTAPS/2-3)*stride];
-int a1= tmp[x + (HTAPS/2-2)*stride];
+int a1= src[x + (HTAPS/2-2)*stride];
-int a2= tmp[x + (HTAPS/2-1)*stride];
+int a2= src[x + (HTAPS/2-1)*stride];
-int a3= tmp[x + (HTAPS/2+0)*stride];
+int a3= src[x + (HTAPS/2+0)*stride];
-int a4= tmp[x + (HTAPS/2+1)*stride];
+int a4= src[x + (HTAPS/2+1)*stride];
-int a5= tmp[x + (HTAPS/2+2)*stride];
+int a5= src[x + (HTAPS/2+2)*stride];
-int a6= tmp[x + (HTAPS/2+3)*stride];
+int a6= src[x + (HTAPS/2+3)*stride];
-int a7= tmp[x + (HTAPS/2+4)*stride];
+int a7= src[x + (HTAPS/2+4)*stride];
 #if HTAPS==6
 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
 #else
 int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
 #endif
 /*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
 //            if(b_w==16) am= 8*(a1+a2);
-if(dy<8) am =  (32*a2*( 8-dy) +    am* dy    + 128)>>8;
+am= (am + 16)>>5;
-else     am = (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
 if(am&(~255)) am= ~(am>>31);
+tmp2[x]= am;
-dst[x] = am;
+}
-/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
+src += stride;
-else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
+tmp2+= stride;
-else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
+}
-else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
+src -= stride*y;
 }
-dst += stride;
+src += stride*(HTAPS/2 - 1);
-tmp += stride;
+tmp2= tmp2t[2];
+tmpI= tmpIt;
+if(b&4){
+for(y=0; y < b_h; y++){
+for(x=0; x < b_w; x++){
+int a_2=tmpI[x + (HTAPS/2-5)*64];
+int a_1=tmpI[x + (HTAPS/2-4)*64];
+int a0= tmpI[x + (HTAPS/2-3)*64];
+int a1= tmpI[x + (HTAPS/2-2)*64];
+int a2= tmpI[x + (HTAPS/2-1)*64];
+int a3= tmpI[x + (HTAPS/2+0)*64];
+int a4= tmpI[x + (HTAPS/2+1)*64];
+int a5= tmpI[x + (HTAPS/2+2)*64];
+int a6= tmpI[x + (HTAPS/2+3)*64];
+int a7= tmpI[x + (HTAPS/2+4)*64];
+#if HTAPS==6
+int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
+#else
+int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
+#endif
+am= (am + 512)>>10;
+if(am&(~255)) am= ~(am>>31);
+tmp2[x]= am;
+}
+tmpI+= 64;
+tmp2+= stride;
+}
+}
+hpel[ 0]= src;
+hpel[ 1]= tmp2t[0] + stride*(HTAPS/2-1);
+hpel[ 2]= src + 1;
+hpel[ 4]= tmp2t[1];
+hpel[ 5]= tmp2t[2];
+hpel[ 6]= tmp2t[1] + 1;
+hpel[ 8]= src + stride;
+hpel[ 9]= hpel[1] + stride;
+hpel[10]= hpel[8] + 1;
+if(b==15){
+uint8_t *src1= hpel[dx/8 + dy/8*4  ];
+uint8_t *src2= hpel[dx/8 + dy/8*4+1];
+uint8_t *src3= hpel[dx/8 + dy/8*4+4];
+uint8_t *src4= hpel[dx/8 + dy/8*4+5];
+dx&=7;
+dy&=7;
+for(y=0; y < b_h; y++){
+for(x=0; x < b_w; x++){
+dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
+(8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
+}
+src1+=stride;
+src2+=stride;
+src3+=stride;
+src4+=stride;
+dst +=stride;
+}
+}else{
+uint8_t *src1= hpel[l];
+uint8_t *src2= hpel[r];
+int a= weight[((dx&7) + (8*(dy&7)))];
+int b= 8-a;
+for(y=0; y < b_h; y++){
+for(x=0; x < b_w; x++){
+dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
+}
+src1+=stride;
+src2+=stride;
+dst +=stride;
+}
 }
 STOP_TIMER("mc_block")
 }
 #define mca(dx,dy,b_w)\

Mercurial > libavcodec.hg

comparison snow.c @ 5648:cd26ab6e3953 libavcodec