comparison snow.c @ 6405:7f615bfc21a2 libavcodec

Remove START_TIMER and STOP_TIMER macros.
author diego
date Tue, 26 Feb 2008 09:35:21 +0000
parents 3164768539be
children 1f70d64c00bc
comparison
equal deleted inserted replaced
6404:a0a1db4738dd 6405:7f615bfc21a2
921 921
922 for(y=-2; y<height; y+=2){ 922 for(y=-2; y<height; y+=2){
923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; 923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; 924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
925 925
926 {START_TIMER
927 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); 926 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
928 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); 927 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
929 STOP_TIMER("horizontal_decompose53i")} 928
930
931 {START_TIMER
932 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); 929 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
933 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); 930 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
934 STOP_TIMER("vertical_decompose53i*")}
935 931
936 b0=b2; 932 b0=b2;
937 b1=b3; 933 b1=b3;
938 } 934 }
939 } 935 }
994 990
995 for(y=-4; y<height; y+=2){ 991 for(y=-4; y<height; y+=2){
996 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; 992 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
997 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; 993 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
998 994
999 {START_TIMER
1000 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width); 995 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1001 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width); 996 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1002 if(width>400){ 997
1003 STOP_TIMER("horizontal_decompose97i")
1004 }}
1005
1006 {START_TIMER
1007 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); 998 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1008 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); 999 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1009 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width); 1000 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1010 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width); 1001 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1011
1012 if(width>400){
1013 STOP_TIMER("vertical_decompose97i")
1014 }}
1015 1002
1016 b0=b2; 1003 b0=b2;
1017 b1=b3; 1004 b1=b3;
1018 b2=b4; 1005 b2=b4;
1019 b3=b5; 1006 b3=b5;
1112 IDWTELEM *b0= cs->b0; 1099 IDWTELEM *b0= cs->b0;
1113 IDWTELEM *b1= cs->b1; 1100 IDWTELEM *b1= cs->b1;
1114 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); 1101 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1115 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); 1102 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1116 1103
1117 {START_TIMER
1118 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); 1104 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1119 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); 1105 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1120 STOP_TIMER("vertical_compose53i*")} 1106
1121
1122 {START_TIMER
1123 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); 1107 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1124 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); 1108 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1125 STOP_TIMER("horizontal_compose53i")}
1126 1109
1127 cs->b0 = b2; 1110 cs->b0 = b2;
1128 cs->b1 = b3; 1111 cs->b1 = b3;
1129 cs->y += 2; 1112 cs->y += 2;
1130 } 1113 }
1134 IDWTELEM *b0= cs->b0; 1117 IDWTELEM *b0= cs->b0;
1135 IDWTELEM *b1= cs->b1; 1118 IDWTELEM *b1= cs->b1;
1136 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; 1119 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1137 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; 1120 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1138 1121
1139 {START_TIMER
1140 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); 1122 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1141 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); 1123 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1142 STOP_TIMER("vertical_compose53i*")} 1124
1143
1144 {START_TIMER
1145 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); 1125 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1146 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); 1126 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1147 STOP_TIMER("horizontal_compose53i")}
1148 1127
1149 cs->b0 = b2; 1128 cs->b0 = b2;
1150 cs->b1 = b3; 1129 cs->b1 = b3;
1151 cs->y += 2; 1130 cs->y += 2;
1152 } 1131 }
1244 IDWTELEM *b2= cs->b2; 1223 IDWTELEM *b2= cs->b2;
1245 IDWTELEM *b3= cs->b3; 1224 IDWTELEM *b3= cs->b3;
1246 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); 1225 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1247 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); 1226 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1248 1227
1249 {START_TIMER
1250 if(y>0 && y+4<height){ 1228 if(y>0 && y+4<height){
1251 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); 1229 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1252 }else{ 1230 }else{
1253 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); 1231 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1254 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); 1232 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1255 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); 1233 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1256 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); 1234 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1257 } 1235 }
1258 if(width>400){ 1236
1259 STOP_TIMER("vertical_compose97i")}}
1260
1261 {START_TIMER
1262 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); 1237 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1263 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); 1238 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1264 if(width>400 && y+0<(unsigned)height){
1265 STOP_TIMER("horizontal_compose97i")}}
1266 1239
1267 cs->b0=b2; 1240 cs->b0=b2;
1268 cs->b1=b3; 1241 cs->b1=b3;
1269 cs->b2=b4; 1242 cs->b2=b4;
1270 cs->b3=b5; 1243 cs->b3=b5;
1278 IDWTELEM *b2= cs->b2; 1251 IDWTELEM *b2= cs->b2;
1279 IDWTELEM *b3= cs->b3; 1252 IDWTELEM *b3= cs->b3;
1280 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; 1253 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1281 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; 1254 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1282 1255
1283 {START_TIMER
1284 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); 1256 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1285 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); 1257 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1286 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); 1258 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1287 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); 1259 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1288 if(width>400){ 1260
1289 STOP_TIMER("vertical_compose97i")}}
1290
1291 {START_TIMER
1292 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); 1261 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1293 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); 1262 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1294 if(width>400 && b0 <= b2){
1295 STOP_TIMER("horizontal_compose97i")}}
1296 1263
1297 cs->b0=b2; 1264 cs->b0=b2;
1298 cs->b1=b3; 1265 cs->b1=b3;
1299 cs->b2=b4; 1266 cs->b2=b4;
1300 cs->b3=b5; 1267 cs->b3=b5;
1603 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 1570 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1604 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 1571 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1605 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; 1572 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1606 int new_index = 0; 1573 int new_index = 0;
1607 1574
1608 START_TIMER
1609
1610 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ 1575 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1611 qadd= 0; 1576 qadd= 0;
1612 qmul= 1<<QEXPSHIFT; 1577 qmul= 1<<QEXPSHIFT;
1613 } 1578 }
1614 1579
1631 line[x] = (t^u) - u; 1596 line[x] = (t^u) - u;
1632 1597
1633 v = b->x_coeff[new_index].coeff; 1598 v = b->x_coeff[new_index].coeff;
1634 x = b->x_coeff[new_index++].x; 1599 x = b->x_coeff[new_index++].x;
1635 } 1600 }
1636 }
1637 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1638 STOP_TIMER("decode_subband")
1639 } 1601 }
1640 1602
1641 /* Save our variables for the next slice. */ 1603 /* Save our variables for the next slice. */
1642 save_state[0] = new_index; 1604 save_state[0] = new_index;
1643 1605
2183 int16_t tmpIt [64*(32+HTAPS_MAX)]; 2145 int16_t tmpIt [64*(32+HTAPS_MAX)];
2184 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; 2146 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2185 int16_t *tmpI= tmpIt; 2147 int16_t *tmpI= tmpIt;
2186 uint8_t *tmp2= tmp2t[0]; 2148 uint8_t *tmp2= tmp2t[0];
2187 const uint8_t *hpel[11]; 2149 const uint8_t *hpel[11];
2188 START_TIMER
2189 assert(dx<16 && dy<16); 2150 assert(dx<16 && dy<16);
2190 r= brane[dx + 16*dy]&15; 2151 r= brane[dx + 16*dy]&15;
2191 l= brane[dx + 16*dy]>>4; 2152 l= brane[dx + 16*dy]>>4;
2192 2153
2193 b= needs[l] | needs[r]; 2154 b= needs[l] | needs[r];
2322 src1+=stride; 2283 src1+=stride;
2323 src2+=stride; 2284 src2+=stride;
2324 dst +=stride; 2285 dst +=stride;
2325 } 2286 }
2326 } 2287 }
2327 STOP_TIMER("mc_block")
2328 } 2288 }
2329 2289
2330 #define mca(dx,dy,b_w)\ 2290 #define mca(dx,dy,b_w)\
2331 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ 2291 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2332 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\ 2292 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2579 else dst[x + y*dst_stride] -= v; 2539 else dst[x + y*dst_stride] -= v;
2580 } 2540 }
2581 } 2541 }
2582 #else 2542 #else
2583 if(sliced){ 2543 if(sliced){
2584 START_TIMER
2585
2586 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); 2544 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2587 STOP_TIMER("inner_add_yblock")
2588 }else 2545 }else
2589 for(y=0; y<b_h; y++){ 2546 for(y=0; y<b_h; y++){
2590 //FIXME ugly misuse of obmc_stride 2547 //FIXME ugly misuse of obmc_stride
2591 const uint8_t *obmc1= obmc + y*obmc_stride; 2548 const uint8_t *obmc1= obmc + y*obmc_stride;
2592 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); 2549 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2626 int obmc_stride= plane_index ? block_size : 2*block_size; 2583 int obmc_stride= plane_index ? block_size : 2*block_size;
2627 int ref_stride= s->current_picture.linesize[plane_index]; 2584 int ref_stride= s->current_picture.linesize[plane_index];
2628 uint8_t *dst8= s->current_picture.data[plane_index]; 2585 uint8_t *dst8= s->current_picture.data[plane_index];
2629 int w= p->width; 2586 int w= p->width;
2630 int h= p->height; 2587 int h= p->height;
2631 START_TIMER
2632 2588
2633 if(s->keyframe || (s->avctx->debug&512)){ 2589 if(s->keyframe || (s->avctx->debug&512)){
2634 if(mb_y==mb_h) 2590 if(mb_y==mb_h)
2635 return; 2591 return;
2636 2592
2663 2619
2664 return; 2620 return;
2665 } 2621 }
2666 2622
2667 for(mb_x=0; mb_x<=mb_w; mb_x++){ 2623 for(mb_x=0; mb_x<=mb_w; mb_x++){
2668 START_TIMER
2669
2670 add_yblock(s, 1, sb, old_buffer, dst8, obmc, 2624 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2671 block_w*mb_x - block_w/2, 2625 block_w*mb_x - block_w/2,
2672 block_w*mb_y - block_w/2, 2626 block_w*mb_y - block_w/2,
2673 block_w, block_w, 2627 block_w, block_w,
2674 w, h, 2628 w, h,
2675 w, ref_stride, obmc_stride, 2629 w, ref_stride, obmc_stride,
2676 mb_x - 1, mb_y - 1, 2630 mb_x - 1, mb_y - 1,
2677 add, 0, plane_index); 2631 add, 0, plane_index);
2678 2632 }
2679 STOP_TIMER("add_yblock")
2680 }
2681
2682 STOP_TIMER("predict_slice")
2683 } 2633 }
2684 2634
2685 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ 2635 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2686 Plane *p= &s->plane[plane_index]; 2636 Plane *p= &s->plane[plane_index];
2687 const int mb_w= s->b_width << s->block_max_depth; 2637 const int mb_w= s->b_width << s->block_max_depth;
2693 const int obmc_stride= plane_index ? block_size : 2*block_size; 2643 const int obmc_stride= plane_index ? block_size : 2*block_size;
2694 int ref_stride= s->current_picture.linesize[plane_index]; 2644 int ref_stride= s->current_picture.linesize[plane_index];
2695 uint8_t *dst8= s->current_picture.data[plane_index]; 2645 uint8_t *dst8= s->current_picture.data[plane_index];
2696 int w= p->width; 2646 int w= p->width;
2697 int h= p->height; 2647 int h= p->height;
2698 START_TIMER
2699 2648
2700 if(s->keyframe || (s->avctx->debug&512)){ 2649 if(s->keyframe || (s->avctx->debug&512)){
2701 if(mb_y==mb_h) 2650 if(mb_y==mb_h)
2702 return; 2651 return;
2703 2652
2720 2669
2721 return; 2670 return;
2722 } 2671 }
2723 2672
2724 for(mb_x=0; mb_x<=mb_w; mb_x++){ 2673 for(mb_x=0; mb_x<=mb_w; mb_x++){
2725 START_TIMER
2726
2727 add_yblock(s, 0, NULL, buf, dst8, obmc, 2674 add_yblock(s, 0, NULL, buf, dst8, obmc,
2728 block_w*mb_x - block_w/2, 2675 block_w*mb_x - block_w/2,
2729 block_w*mb_y - block_w/2, 2676 block_w*mb_y - block_w/2,
2730 block_w, block_w, 2677 block_w, block_w,
2731 w, h, 2678 w, h,
2732 w, ref_stride, obmc_stride, 2679 w, ref_stride, obmc_stride,
2733 mb_x - 1, mb_y - 1, 2680 mb_x - 1, mb_y - 1,
2734 add, 1, plane_index); 2681 add, 1, plane_index);
2735 2682 }
2736 STOP_TIMER("add_yblock")
2737 }
2738
2739 STOP_TIMER("predict_slice")
2740 } 2683 }
2741 2684
2742 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ 2685 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2743 const int mb_h= s->b_height << s->block_max_depth; 2686 const int mb_h= s->b_height << s->block_max_depth;
2744 int mb_y; 2687 int mb_y;
3306 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); 3249 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3307 } 3250 }
3308 } 3251 }
3309 3252
3310 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ 3253 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3311 const int level= b->level;
3312 const int w= b->width; 3254 const int w= b->width;
3313 const int h= b->height; 3255 const int h= b->height;
3314 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 3256 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3315 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); 3257 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3316 int x,y, thres1, thres2; 3258 int x,y, thres1, thres2;
3317 // START_TIMER
3318 3259
3319 if(s->qlog == LOSSLESS_QLOG){ 3260 if(s->qlog == LOSSLESS_QLOG){
3320 for(y=0; y<h; y++) 3261 for(y=0; y<h; y++)
3321 for(x=0; x<w; x++) 3262 for(x=0; x<w; x++)
3322 dst[x + y*stride]= src[x + y*stride]; 3263 dst[x + y*stride]= src[x + y*stride];
3366 }else 3307 }else
3367 dst[x + y*stride]= 0; 3308 dst[x + y*stride]= 0;
3368 } 3309 }
3369 } 3310 }
3370 } 3311 }
3371 if(level+1 == s->spatial_decomposition_count){
3372 // STOP_TIMER("quantize")
3373 }
3374 } 3312 }
3375 3313
3376 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ 3314 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3377 const int w= b->width; 3315 const int w= b->width;
3378 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 3316 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3379 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 3317 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3380 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; 3318 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3381 int x,y; 3319 int x,y;
3382 START_TIMER
3383 3320
3384 if(s->qlog == LOSSLESS_QLOG) return; 3321 if(s->qlog == LOSSLESS_QLOG) return;
3385 3322
3386 for(y=start_y; y<end_y; y++){ 3323 for(y=start_y; y<end_y; y++){
3387 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); 3324 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3393 }else if(i>0){ 3330 }else if(i>0){
3394 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); 3331 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3395 } 3332 }
3396 } 3333 }
3397 } 3334 }
3398 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3399 STOP_TIMER("dquant")
3400 }
3401 } 3335 }
3402 3336
3403 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ 3337 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3404 const int w= b->width; 3338 const int w= b->width;
3405 const int h= b->height; 3339 const int h= b->height;
3406 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 3340 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3407 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 3341 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3408 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; 3342 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3409 int x,y; 3343 int x,y;
3410 START_TIMER
3411 3344
3412 if(s->qlog == LOSSLESS_QLOG) return; 3345 if(s->qlog == LOSSLESS_QLOG) return;
3413 3346
3414 for(y=0; y<h; y++){ 3347 for(y=0; y<h; y++){
3415 for(x=0; x<w; x++){ 3348 for(x=0; x<w; x++){
3418 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias 3351 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3419 }else if(i>0){ 3352 }else if(i>0){
3420 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); 3353 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3421 } 3354 }
3422 } 3355 }
3423 }
3424 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3425 STOP_TIMER("dquant")
3426 } 3356 }
3427 } 3357 }
3428 3358
3429 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ 3359 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3430 const int w= b->width; 3360 const int w= b->width;
3451 } 3381 }
3452 3382
3453 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ 3383 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3454 const int w= b->width; 3384 const int w= b->width;
3455 int x,y; 3385 int x,y;
3456
3457 // START_TIMER
3458 3386
3459 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning 3387 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3460 IDWTELEM * prev; 3388 IDWTELEM * prev;
3461 3389
3462 if (start_y != 0) 3390 if (start_y != 0)
3478 }else{ 3406 }else{
3479 if(y) line[x] += prev[x]; 3407 if(y) line[x] += prev[x];
3480 } 3408 }
3481 } 3409 }
3482 } 3410 }
3483
3484 // STOP_TIMER("correlate")
3485 } 3411 }
3486 3412
3487 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ 3413 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3488 const int w= b->width; 3414 const int w= b->width;
3489 const int h= b->height; 3415 const int h= b->height;
4437 for(x=0; x<w; x++){ 4363 for(x=0; x<w; x++){
4438 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; 4364 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4439 } 4365 }
4440 } 4366 }
4441 } 4367 }
4442 {START_TIMER
4443 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); 4368 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4444 STOP_TIMER("pred-conv")}
4445 }else{ 4369 }else{
4446 //ME/MC only 4370 //ME/MC only
4447 if(pict->pict_type == I_TYPE){ 4371 if(pict->pict_type == I_TYPE){
4448 for(y=0; y<h; y++){ 4372 for(y=0; y<h; y++){
4449 for(x=0; x<w; x++){ 4373 for(x=0; x<w; x++){
4607 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; 4531 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4608 } 4532 }
4609 } 4533 }
4610 } 4534 }
4611 4535
4612 { START_TIMER 4536 {
4613 for(level=0; level<s->spatial_decomposition_count; level++){ 4537 for(level=0; level<s->spatial_decomposition_count; level++){
4614 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 4538 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4615 SubBand *b= &p->band[level][orientation]; 4539 SubBand *b= &p->band[level][orientation];
4616 unpack_coeffs(s, b, b->parent, orientation); 4540 unpack_coeffs(s, b, b->parent, orientation);
4617 } 4541 }
4618 } 4542 }
4619 STOP_TIMER("unpack coeffs"); 4543 }
4620 } 4544
4621 4545 {
4622 {START_TIMER
4623 const int mb_h= s->b_height << s->block_max_depth; 4546 const int mb_h= s->b_height << s->block_max_depth;
4624 const int block_size = MB_SIZE >> s->block_max_depth; 4547 const int block_size = MB_SIZE >> s->block_max_depth;
4625 const int block_w = plane_index ? block_size/2 : block_size; 4548 const int block_w = plane_index ? block_size/2 : block_size;
4626 int mb_y; 4549 int mb_y;
4627 dwt_compose_t cs[MAX_DECOMPOSITIONS]; 4550 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4637 if (!(s->keyframe || s->avctx->debug&512)){ 4560 if (!(s->keyframe || s->avctx->debug&512)){
4638 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); 4561 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4639 slice_h -= (block_w >> 1); 4562 slice_h -= (block_w >> 1);
4640 } 4563 }
4641 4564
4642 {
4643 START_TIMER
4644 for(level=0; level<s->spatial_decomposition_count; level++){ 4565 for(level=0; level<s->spatial_decomposition_count; level++){
4645 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 4566 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4646 SubBand *b= &p->band[level][orientation]; 4567 SubBand *b= &p->band[level][orientation];
4647 int start_y; 4568 int start_y;
4648 int end_y; 4569 int end_y;
4670 else 4591 else
4671 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); 4592 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4672 } 4593 }
4673 } 4594 }
4674 } 4595 }
4675 STOP_TIMER("decode_subband_slice"); 4596
4676 }
4677
4678 { START_TIMER
4679 for(; yd<slice_h; yd+=4){ 4597 for(; yd<slice_h; yd+=4){
4680 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); 4598 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4681 } 4599 }
4682 STOP_TIMER("idwt slice");}
4683
4684 4600
4685 if(s->qlog == LOSSLESS_QLOG){ 4601 if(s->qlog == LOSSLESS_QLOG){
4686 for(; yq<slice_h && yq<h; yq++){ 4602 for(; yq<slice_h && yq<h; yq++){
4687 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); 4603 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4688 for(x=0; x<w; x++){ 4604 for(x=0; x<w; x++){
4698 while(y < end_y) 4614 while(y < end_y)
4699 slice_buffer_release(&s->sb, y++); 4615 slice_buffer_release(&s->sb, y++);
4700 } 4616 }
4701 4617
4702 slice_buffer_flush(&s->sb); 4618 slice_buffer_flush(&s->sb);
4703 4619 }
4704 STOP_TIMER("idwt + predict_slices")}
4705 } 4620 }
4706 4621
4707 emms_c(); 4622 emms_c();
4708 4623
4709 if(s->last_picture[s->max_ref_frames-1].data[0]){ 4624 if(s->last_picture[s->max_ref_frames-1].data[0]){
4804 memset(s.header_state, 0, sizeof(s.header_state)); 4719 memset(s.header_state, 0, sizeof(s.header_state));
4805 ff_init_range_encoder(&s.c, buffer[0], 256*256); 4720 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4806 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); 4721 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4807 4722
4808 for(i=-256; i<256; i++){ 4723 for(i=-256; i<256; i++){
4809 START_TIMER
4810 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); 4724 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4811 STOP_TIMER("put_symbol")
4812 } 4725 }
4813 ff_rac_terminate(&s.c); 4726 ff_rac_terminate(&s.c);
4814 4727
4815 memset(s.header_state, 0, sizeof(s.header_state)); 4728 memset(s.header_state, 0, sizeof(s.header_state));
4816 ff_init_range_decoder(&s.c, buffer[0], 256*256); 4729 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4817 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); 4730 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4818 4731
4819 for(i=-256; i<256; i++){ 4732 for(i=-256; i<256; i++){
4820 int j; 4733 int j;
4821 START_TIMER
4822 j= get_symbol(&s.c, s.header_state, 1); 4734 j= get_symbol(&s.c, s.header_state, 1);
4823 STOP_TIMER("get_symbol")
4824 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); 4735 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4825 } 4736 }
4826 #endif 4737 #endif
4827 { 4738 {
4828 int level, orientation, x, y; 4739 int level, orientation, x, y;