# HG changeset patch # User michaelni # Date 1052833089 0 # Node ID 67ee8bab0f28745d024ea4a2011e6afca0d341d8 # Parent afdd177080c910884124459694fa04712fae6517 optimizations diff -r afdd177080c9 -r 67ee8bab0f28 h264.c --- a/h264.c Tue May 13 08:21:35 2003 +0000 +++ b/h264.c Tue May 13 13:38:09 2003 +0000 @@ -319,6 +319,8 @@ *(uint16_t*)(p + 1*stride)= *(uint16_t*)(p + 2*stride)= *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101; + }else if(w==4 && h==1){ + *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101; }else if(w==4 && h==2){ *(uint32_t*)(p + 0*stride)= *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101; @@ -3197,7 +3199,7 @@ const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; int mb_type, partition_count, cbp; - memset(h->mb, 0, sizeof(int16_t)*24*16); //FIXME avoid if allready clear (move after skip handlong? + s->dsp.clear_blocks(h->mb); //FIXME avoid if allready clear (move after skip handlong? tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y); diff -r afdd177080c9 -r 67ee8bab0f28 svq3.c --- a/svq3.c Tue May 13 08:21:35 2003 +0000 +++ b/svq3.c Tue May 13 13:38:09 2003 +0000 @@ -43,6 +43,15 @@ * svq3 decoder. */ +/* dual scan (from some older h264 draft) + o-->o-->o o + | /| + o o o / o + | / | |/ | + o o o o + / + o-->o-->o-->o +*/ static const uint8_t svq3_scan[16]={ 0+0*4, 1+0*4, 2+0*4, 2+1*4, 2+2*4, 3+0*4, 3+1*4, 3+2*4, @@ -446,17 +455,22 @@ } /* fill caches */ - memset (h->ref_cache[0], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); - + /* note ref_cache[0] should contain here: + ???????? + ???11111 + N??11111 + N??11111 + N??11111 + N + */ + if (s->mb_x > 0) { for (i=0; i < 4; i++) { *(uint32_t *) h->mv_cache[0][scan8[0] - 1 + i*8] = *(uint32_t *) s->current_picture.motion_val[0][b_xy - 1 + i*h->b_stride]; - h->ref_cache[0][scan8[0] - 1 + i*8] = 1; } } else { for (i=0; i < 4; i++) { *(uint32_t *) h->mv_cache[0][scan8[0] - 1 + i*8] = 0; - h->ref_cache[0][scan8[0] - 1 + i*8] = 1; } } if (s->mb_y > 0) { @@ -466,12 +480,15 @@ if (s->mb_x < (s->mb_width - 1)) { *(uint32_t *) h->mv_cache[0][scan8[0] + 4 - 1*8] = *(uint32_t *) s->current_picture.motion_val[0][b_xy - h->b_stride + 4]; h->ref_cache[0][scan8[0] + 4 - 1*8] = 1; - } + }else + h->ref_cache[0][scan8[0] + 4 - 1*8] = PART_NOT_AVAILABLE; if (s->mb_x > 0) { *(uint32_t *) h->mv_cache[0][scan8[0] - 1 - 1*8] = *(uint32_t *) s->current_picture.motion_val[0][b_xy - h->b_stride - 1]; h->ref_cache[0][scan8[0] - 1 - 1*8] = 1; - } - } + }else + h->ref_cache[0][scan8[0] - 1 - 1*8] = PART_NOT_AVAILABLE; + }else + memset (&h->ref_cache[0][scan8[0] - 1*8 - 1], PART_NOT_AVAILABLE, 8); /* decode motion vector(s) and form prediction(s) */ part_width = ((mb_type & 5) == 5) ? 4 : 8 << (mb_type & 1); @@ -510,14 +527,7 @@ } /* update mv_cache */ - for (l=0; l < part_height; l+=4) { - for (m=0; m < part_width; m+=4) { - k = scan8[0] + ((m + j) >> 2) + ((l + i) << 1); - h->mv_cache [0][k][0] = mx; - h->mv_cache [0][k][1] = my; - h->ref_cache[0][k] = 1; - } - } + fill_rectangle(h->mv_cache[0][scan8[k]], part_width>>2, part_height>>2, 8, (mx&0xFFFF)+(my<<16), 4); svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my); } @@ -592,8 +602,8 @@ memset (h->intra4x4_pred_mode[mb_xy], DC_PRED, 8); } if (!IS_SKIP(mb_type)) { - memset (h->mb, 0, 24*16*sizeof(DCTELEM)); - memset (h->non_zero_count_cache, 0, 8*6*sizeof(uint8_t)); + memset (h->non_zero_count_cache + 8, 0, 4*9*sizeof(uint8_t)); + s->dsp.clear_blocks(h->mb); } if (IS_INTRA16x16(mb_type) || (s->pict_type != I_TYPE && s->adaptive_quant && cbp)) { @@ -743,6 +753,13 @@ frame_start (h); + for(i=0; i<4; i++){ + int j; + for(j=-1; j<4; j++) + h->ref_cache[0][scan8[0] + 8*i + j]= 1; + h->ref_cache[0][scan8[0] + 8*i + j]= PART_NOT_AVAILABLE; + } + for (s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) { for (s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) { int mb_type = svq3_get_ue_golomb (&s->gb);