Mercurial > libavcodec.hg
comparison snow.c @ 6405:7f615bfc21a2 libavcodec
Remove START_TIMER and STOP_TIMER macros.
author | diego |
---|---|
date | Tue, 26 Feb 2008 09:35:21 +0000 |
parents | 3164768539be |
children | 1f70d64c00bc |
comparison
equal
deleted
inserted
replaced
6404:a0a1db4738dd | 6405:7f615bfc21a2 |
---|---|
921 | 921 |
922 for(y=-2; y<height; y+=2){ | 922 for(y=-2; y<height; y+=2){ |
923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | 923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; |
924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | 924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; |
925 | 925 |
926 {START_TIMER | |
927 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); | 926 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); |
928 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); | 927 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); |
929 STOP_TIMER("horizontal_decompose53i")} | 928 |
930 | |
931 {START_TIMER | |
932 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); | 929 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); |
933 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); | 930 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); |
934 STOP_TIMER("vertical_decompose53i*")} | |
935 | 931 |
936 b0=b2; | 932 b0=b2; |
937 b1=b3; | 933 b1=b3; |
938 } | 934 } |
939 } | 935 } |
994 | 990 |
995 for(y=-4; y<height; y+=2){ | 991 for(y=-4; y<height; y+=2){ |
996 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | 992 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; |
997 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | 993 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; |
998 | 994 |
999 {START_TIMER | |
1000 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width); | 995 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width); |
1001 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width); | 996 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width); |
1002 if(width>400){ | 997 |
1003 STOP_TIMER("horizontal_decompose97i") | |
1004 }} | |
1005 | |
1006 {START_TIMER | |
1007 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); | 998 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); |
1008 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); | 999 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); |
1009 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width); | 1000 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width); |
1010 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width); | 1001 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width); |
1011 | |
1012 if(width>400){ | |
1013 STOP_TIMER("vertical_decompose97i") | |
1014 }} | |
1015 | 1002 |
1016 b0=b2; | 1003 b0=b2; |
1017 b1=b3; | 1004 b1=b3; |
1018 b2=b4; | 1005 b2=b4; |
1019 b3=b5; | 1006 b3=b5; |
1112 IDWTELEM *b0= cs->b0; | 1099 IDWTELEM *b0= cs->b0; |
1113 IDWTELEM *b1= cs->b1; | 1100 IDWTELEM *b1= cs->b1; |
1114 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); | 1101 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); |
1115 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); | 1102 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); |
1116 | 1103 |
1117 {START_TIMER | |
1118 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); | 1104 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1119 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | 1105 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); |
1120 STOP_TIMER("vertical_compose53i*")} | 1106 |
1121 | |
1122 {START_TIMER | |
1123 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); | 1107 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1124 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | 1108 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); |
1125 STOP_TIMER("horizontal_compose53i")} | |
1126 | 1109 |
1127 cs->b0 = b2; | 1110 cs->b0 = b2; |
1128 cs->b1 = b3; | 1111 cs->b1 = b3; |
1129 cs->y += 2; | 1112 cs->y += 2; |
1130 } | 1113 } |
1134 IDWTELEM *b0= cs->b0; | 1117 IDWTELEM *b0= cs->b0; |
1135 IDWTELEM *b1= cs->b1; | 1118 IDWTELEM *b1= cs->b1; |
1136 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | 1119 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; |
1137 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | 1120 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; |
1138 | 1121 |
1139 {START_TIMER | |
1140 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); | 1122 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1141 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | 1123 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); |
1142 STOP_TIMER("vertical_compose53i*")} | 1124 |
1143 | |
1144 {START_TIMER | |
1145 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); | 1125 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1146 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | 1126 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); |
1147 STOP_TIMER("horizontal_compose53i")} | |
1148 | 1127 |
1149 cs->b0 = b2; | 1128 cs->b0 = b2; |
1150 cs->b1 = b3; | 1129 cs->b1 = b3; |
1151 cs->y += 2; | 1130 cs->y += 2; |
1152 } | 1131 } |
1244 IDWTELEM *b2= cs->b2; | 1223 IDWTELEM *b2= cs->b2; |
1245 IDWTELEM *b3= cs->b3; | 1224 IDWTELEM *b3= cs->b3; |
1246 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); | 1225 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); |
1247 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); | 1226 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); |
1248 | 1227 |
1249 {START_TIMER | |
1250 if(y>0 && y+4<height){ | 1228 if(y>0 && y+4<height){ |
1251 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); | 1229 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); |
1252 }else{ | 1230 }else{ |
1253 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); | 1231 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1254 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | 1232 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); |
1255 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | 1233 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); |
1256 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | 1234 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); |
1257 } | 1235 } |
1258 if(width>400){ | 1236 |
1259 STOP_TIMER("vertical_compose97i")}} | |
1260 | |
1261 {START_TIMER | |
1262 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); | 1237 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); |
1263 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); | 1238 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); |
1264 if(width>400 && y+0<(unsigned)height){ | |
1265 STOP_TIMER("horizontal_compose97i")}} | |
1266 | 1239 |
1267 cs->b0=b2; | 1240 cs->b0=b2; |
1268 cs->b1=b3; | 1241 cs->b1=b3; |
1269 cs->b2=b4; | 1242 cs->b2=b4; |
1270 cs->b3=b5; | 1243 cs->b3=b5; |
1278 IDWTELEM *b2= cs->b2; | 1251 IDWTELEM *b2= cs->b2; |
1279 IDWTELEM *b3= cs->b3; | 1252 IDWTELEM *b3= cs->b3; |
1280 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | 1253 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; |
1281 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | 1254 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; |
1282 | 1255 |
1283 {START_TIMER | |
1284 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); | 1256 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1285 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | 1257 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); |
1286 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | 1258 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); |
1287 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | 1259 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); |
1288 if(width>400){ | 1260 |
1289 STOP_TIMER("vertical_compose97i")}} | |
1290 | |
1291 {START_TIMER | |
1292 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); | 1261 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); |
1293 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); | 1262 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); |
1294 if(width>400 && b0 <= b2){ | |
1295 STOP_TIMER("horizontal_compose97i")}} | |
1296 | 1263 |
1297 cs->b0=b2; | 1264 cs->b0=b2; |
1298 cs->b1=b3; | 1265 cs->b1=b3; |
1299 cs->b2=b4; | 1266 cs->b2=b4; |
1300 cs->b3=b5; | 1267 cs->b3=b5; |
1603 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); | 1570 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
1604 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | 1571 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
1605 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; | 1572 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
1606 int new_index = 0; | 1573 int new_index = 0; |
1607 | 1574 |
1608 START_TIMER | |
1609 | |
1610 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ | 1575 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ |
1611 qadd= 0; | 1576 qadd= 0; |
1612 qmul= 1<<QEXPSHIFT; | 1577 qmul= 1<<QEXPSHIFT; |
1613 } | 1578 } |
1614 | 1579 |
1631 line[x] = (t^u) - u; | 1596 line[x] = (t^u) - u; |
1632 | 1597 |
1633 v = b->x_coeff[new_index].coeff; | 1598 v = b->x_coeff[new_index].coeff; |
1634 x = b->x_coeff[new_index++].x; | 1599 x = b->x_coeff[new_index++].x; |
1635 } | 1600 } |
1636 } | |
1637 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){ | |
1638 STOP_TIMER("decode_subband") | |
1639 } | 1601 } |
1640 | 1602 |
1641 /* Save our variables for the next slice. */ | 1603 /* Save our variables for the next slice. */ |
1642 save_state[0] = new_index; | 1604 save_state[0] = new_index; |
1643 | 1605 |
2183 int16_t tmpIt [64*(32+HTAPS_MAX)]; | 2145 int16_t tmpIt [64*(32+HTAPS_MAX)]; |
2184 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; | 2146 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; |
2185 int16_t *tmpI= tmpIt; | 2147 int16_t *tmpI= tmpIt; |
2186 uint8_t *tmp2= tmp2t[0]; | 2148 uint8_t *tmp2= tmp2t[0]; |
2187 const uint8_t *hpel[11]; | 2149 const uint8_t *hpel[11]; |
2188 START_TIMER | |
2189 assert(dx<16 && dy<16); | 2150 assert(dx<16 && dy<16); |
2190 r= brane[dx + 16*dy]&15; | 2151 r= brane[dx + 16*dy]&15; |
2191 l= brane[dx + 16*dy]>>4; | 2152 l= brane[dx + 16*dy]>>4; |
2192 | 2153 |
2193 b= needs[l] | needs[r]; | 2154 b= needs[l] | needs[r]; |
2322 src1+=stride; | 2283 src1+=stride; |
2323 src2+=stride; | 2284 src2+=stride; |
2324 dst +=stride; | 2285 dst +=stride; |
2325 } | 2286 } |
2326 } | 2287 } |
2327 STOP_TIMER("mc_block") | |
2328 } | 2288 } |
2329 | 2289 |
2330 #define mca(dx,dy,b_w)\ | 2290 #define mca(dx,dy,b_w)\ |
2331 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ | 2291 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ |
2332 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\ | 2292 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\ |
2579 else dst[x + y*dst_stride] -= v; | 2539 else dst[x + y*dst_stride] -= v; |
2580 } | 2540 } |
2581 } | 2541 } |
2582 #else | 2542 #else |
2583 if(sliced){ | 2543 if(sliced){ |
2584 START_TIMER | |
2585 | |
2586 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | 2544 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
2587 STOP_TIMER("inner_add_yblock") | |
2588 }else | 2545 }else |
2589 for(y=0; y<b_h; y++){ | 2546 for(y=0; y<b_h; y++){ |
2590 //FIXME ugly misuse of obmc_stride | 2547 //FIXME ugly misuse of obmc_stride |
2591 const uint8_t *obmc1= obmc + y*obmc_stride; | 2548 const uint8_t *obmc1= obmc + y*obmc_stride; |
2592 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); | 2549 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
2626 int obmc_stride= plane_index ? block_size : 2*block_size; | 2583 int obmc_stride= plane_index ? block_size : 2*block_size; |
2627 int ref_stride= s->current_picture.linesize[plane_index]; | 2584 int ref_stride= s->current_picture.linesize[plane_index]; |
2628 uint8_t *dst8= s->current_picture.data[plane_index]; | 2585 uint8_t *dst8= s->current_picture.data[plane_index]; |
2629 int w= p->width; | 2586 int w= p->width; |
2630 int h= p->height; | 2587 int h= p->height; |
2631 START_TIMER | |
2632 | 2588 |
2633 if(s->keyframe || (s->avctx->debug&512)){ | 2589 if(s->keyframe || (s->avctx->debug&512)){ |
2634 if(mb_y==mb_h) | 2590 if(mb_y==mb_h) |
2635 return; | 2591 return; |
2636 | 2592 |
2663 | 2619 |
2664 return; | 2620 return; |
2665 } | 2621 } |
2666 | 2622 |
2667 for(mb_x=0; mb_x<=mb_w; mb_x++){ | 2623 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2668 START_TIMER | |
2669 | |
2670 add_yblock(s, 1, sb, old_buffer, dst8, obmc, | 2624 add_yblock(s, 1, sb, old_buffer, dst8, obmc, |
2671 block_w*mb_x - block_w/2, | 2625 block_w*mb_x - block_w/2, |
2672 block_w*mb_y - block_w/2, | 2626 block_w*mb_y - block_w/2, |
2673 block_w, block_w, | 2627 block_w, block_w, |
2674 w, h, | 2628 w, h, |
2675 w, ref_stride, obmc_stride, | 2629 w, ref_stride, obmc_stride, |
2676 mb_x - 1, mb_y - 1, | 2630 mb_x - 1, mb_y - 1, |
2677 add, 0, plane_index); | 2631 add, 0, plane_index); |
2678 | 2632 } |
2679 STOP_TIMER("add_yblock") | |
2680 } | |
2681 | |
2682 STOP_TIMER("predict_slice") | |
2683 } | 2633 } |
2684 | 2634 |
2685 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ | 2635 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ |
2686 Plane *p= &s->plane[plane_index]; | 2636 Plane *p= &s->plane[plane_index]; |
2687 const int mb_w= s->b_width << s->block_max_depth; | 2637 const int mb_w= s->b_width << s->block_max_depth; |
2693 const int obmc_stride= plane_index ? block_size : 2*block_size; | 2643 const int obmc_stride= plane_index ? block_size : 2*block_size; |
2694 int ref_stride= s->current_picture.linesize[plane_index]; | 2644 int ref_stride= s->current_picture.linesize[plane_index]; |
2695 uint8_t *dst8= s->current_picture.data[plane_index]; | 2645 uint8_t *dst8= s->current_picture.data[plane_index]; |
2696 int w= p->width; | 2646 int w= p->width; |
2697 int h= p->height; | 2647 int h= p->height; |
2698 START_TIMER | |
2699 | 2648 |
2700 if(s->keyframe || (s->avctx->debug&512)){ | 2649 if(s->keyframe || (s->avctx->debug&512)){ |
2701 if(mb_y==mb_h) | 2650 if(mb_y==mb_h) |
2702 return; | 2651 return; |
2703 | 2652 |
2720 | 2669 |
2721 return; | 2670 return; |
2722 } | 2671 } |
2723 | 2672 |
2724 for(mb_x=0; mb_x<=mb_w; mb_x++){ | 2673 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2725 START_TIMER | |
2726 | |
2727 add_yblock(s, 0, NULL, buf, dst8, obmc, | 2674 add_yblock(s, 0, NULL, buf, dst8, obmc, |
2728 block_w*mb_x - block_w/2, | 2675 block_w*mb_x - block_w/2, |
2729 block_w*mb_y - block_w/2, | 2676 block_w*mb_y - block_w/2, |
2730 block_w, block_w, | 2677 block_w, block_w, |
2731 w, h, | 2678 w, h, |
2732 w, ref_stride, obmc_stride, | 2679 w, ref_stride, obmc_stride, |
2733 mb_x - 1, mb_y - 1, | 2680 mb_x - 1, mb_y - 1, |
2734 add, 1, plane_index); | 2681 add, 1, plane_index); |
2735 | 2682 } |
2736 STOP_TIMER("add_yblock") | |
2737 } | |
2738 | |
2739 STOP_TIMER("predict_slice") | |
2740 } | 2683 } |
2741 | 2684 |
2742 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ | 2685 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ |
2743 const int mb_h= s->b_height << s->block_max_depth; | 2686 const int mb_h= s->b_height << s->block_max_depth; |
2744 int mb_y; | 2687 int mb_y; |
3306 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); | 3249 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); |
3307 } | 3250 } |
3308 } | 3251 } |
3309 | 3252 |
3310 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ | 3253 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ |
3311 const int level= b->level; | |
3312 const int w= b->width; | 3254 const int w= b->width; |
3313 const int h= b->height; | 3255 const int h= b->height; |
3314 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); | 3256 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
3315 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); | 3257 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); |
3316 int x,y, thres1, thres2; | 3258 int x,y, thres1, thres2; |
3317 // START_TIMER | |
3318 | 3259 |
3319 if(s->qlog == LOSSLESS_QLOG){ | 3260 if(s->qlog == LOSSLESS_QLOG){ |
3320 for(y=0; y<h; y++) | 3261 for(y=0; y<h; y++) |
3321 for(x=0; x<w; x++) | 3262 for(x=0; x<w; x++) |
3322 dst[x + y*stride]= src[x + y*stride]; | 3263 dst[x + y*stride]= src[x + y*stride]; |
3366 }else | 3307 }else |
3367 dst[x + y*stride]= 0; | 3308 dst[x + y*stride]= 0; |
3368 } | 3309 } |
3369 } | 3310 } |
3370 } | 3311 } |
3371 if(level+1 == s->spatial_decomposition_count){ | |
3372 // STOP_TIMER("quantize") | |
3373 } | |
3374 } | 3312 } |
3375 | 3313 |
3376 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ | 3314 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ |
3377 const int w= b->width; | 3315 const int w= b->width; |
3378 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); | 3316 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
3379 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | 3317 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
3380 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; | 3318 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
3381 int x,y; | 3319 int x,y; |
3382 START_TIMER | |
3383 | 3320 |
3384 if(s->qlog == LOSSLESS_QLOG) return; | 3321 if(s->qlog == LOSSLESS_QLOG) return; |
3385 | 3322 |
3386 for(y=start_y; y<end_y; y++){ | 3323 for(y=start_y; y<end_y; y++){ |
3387 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); | 3324 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
3393 }else if(i>0){ | 3330 }else if(i>0){ |
3394 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); | 3331 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); |
3395 } | 3332 } |
3396 } | 3333 } |
3397 } | 3334 } |
3398 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ | |
3399 STOP_TIMER("dquant") | |
3400 } | |
3401 } | 3335 } |
3402 | 3336 |
3403 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ | 3337 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ |
3404 const int w= b->width; | 3338 const int w= b->width; |
3405 const int h= b->height; | 3339 const int h= b->height; |
3406 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); | 3340 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
3407 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | 3341 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
3408 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; | 3342 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
3409 int x,y; | 3343 int x,y; |
3410 START_TIMER | |
3411 | 3344 |
3412 if(s->qlog == LOSSLESS_QLOG) return; | 3345 if(s->qlog == LOSSLESS_QLOG) return; |
3413 | 3346 |
3414 for(y=0; y<h; y++){ | 3347 for(y=0; y<h; y++){ |
3415 for(x=0; x<w; x++){ | 3348 for(x=0; x<w; x++){ |
3418 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | 3351 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias |
3419 }else if(i>0){ | 3352 }else if(i>0){ |
3420 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); | 3353 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); |
3421 } | 3354 } |
3422 } | 3355 } |
3423 } | |
3424 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ | |
3425 STOP_TIMER("dquant") | |
3426 } | 3356 } |
3427 } | 3357 } |
3428 | 3358 |
3429 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ | 3359 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
3430 const int w= b->width; | 3360 const int w= b->width; |
3451 } | 3381 } |
3452 | 3382 |
3453 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ | 3383 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ |
3454 const int w= b->width; | 3384 const int w= b->width; |
3455 int x,y; | 3385 int x,y; |
3456 | |
3457 // START_TIMER | |
3458 | 3386 |
3459 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning | 3387 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning |
3460 IDWTELEM * prev; | 3388 IDWTELEM * prev; |
3461 | 3389 |
3462 if (start_y != 0) | 3390 if (start_y != 0) |
3478 }else{ | 3406 }else{ |
3479 if(y) line[x] += prev[x]; | 3407 if(y) line[x] += prev[x]; |
3480 } | 3408 } |
3481 } | 3409 } |
3482 } | 3410 } |
3483 | |
3484 // STOP_TIMER("correlate") | |
3485 } | 3411 } |
3486 | 3412 |
3487 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ | 3413 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
3488 const int w= b->width; | 3414 const int w= b->width; |
3489 const int h= b->height; | 3415 const int h= b->height; |
4437 for(x=0; x<w; x++){ | 4363 for(x=0; x<w; x++){ |
4438 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; | 4364 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; |
4439 } | 4365 } |
4440 } | 4366 } |
4441 } | 4367 } |
4442 {START_TIMER | |
4443 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); | 4368 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
4444 STOP_TIMER("pred-conv")} | |
4445 }else{ | 4369 }else{ |
4446 //ME/MC only | 4370 //ME/MC only |
4447 if(pict->pict_type == I_TYPE){ | 4371 if(pict->pict_type == I_TYPE){ |
4448 for(y=0; y<h; y++){ | 4372 for(y=0; y<h; y++){ |
4449 for(x=0; x<w; x++){ | 4373 for(x=0; x<w; x++){ |
4607 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; | 4531 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; |
4608 } | 4532 } |
4609 } | 4533 } |
4610 } | 4534 } |
4611 | 4535 |
4612 { START_TIMER | 4536 { |
4613 for(level=0; level<s->spatial_decomposition_count; level++){ | 4537 for(level=0; level<s->spatial_decomposition_count; level++){ |
4614 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | 4538 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
4615 SubBand *b= &p->band[level][orientation]; | 4539 SubBand *b= &p->band[level][orientation]; |
4616 unpack_coeffs(s, b, b->parent, orientation); | 4540 unpack_coeffs(s, b, b->parent, orientation); |
4617 } | 4541 } |
4618 } | 4542 } |
4619 STOP_TIMER("unpack coeffs"); | 4543 } |
4620 } | 4544 |
4621 | 4545 { |
4622 {START_TIMER | |
4623 const int mb_h= s->b_height << s->block_max_depth; | 4546 const int mb_h= s->b_height << s->block_max_depth; |
4624 const int block_size = MB_SIZE >> s->block_max_depth; | 4547 const int block_size = MB_SIZE >> s->block_max_depth; |
4625 const int block_w = plane_index ? block_size/2 : block_size; | 4548 const int block_w = plane_index ? block_size/2 : block_size; |
4626 int mb_y; | 4549 int mb_y; |
4627 dwt_compose_t cs[MAX_DECOMPOSITIONS]; | 4550 dwt_compose_t cs[MAX_DECOMPOSITIONS]; |
4637 if (!(s->keyframe || s->avctx->debug&512)){ | 4560 if (!(s->keyframe || s->avctx->debug&512)){ |
4638 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); | 4561 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); |
4639 slice_h -= (block_w >> 1); | 4562 slice_h -= (block_w >> 1); |
4640 } | 4563 } |
4641 | 4564 |
4642 { | |
4643 START_TIMER | |
4644 for(level=0; level<s->spatial_decomposition_count; level++){ | 4565 for(level=0; level<s->spatial_decomposition_count; level++){ |
4645 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | 4566 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
4646 SubBand *b= &p->band[level][orientation]; | 4567 SubBand *b= &p->band[level][orientation]; |
4647 int start_y; | 4568 int start_y; |
4648 int end_y; | 4569 int end_y; |
4670 else | 4591 else |
4671 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); | 4592 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); |
4672 } | 4593 } |
4673 } | 4594 } |
4674 } | 4595 } |
4675 STOP_TIMER("decode_subband_slice"); | 4596 |
4676 } | |
4677 | |
4678 { START_TIMER | |
4679 for(; yd<slice_h; yd+=4){ | 4597 for(; yd<slice_h; yd+=4){ |
4680 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); | 4598 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); |
4681 } | 4599 } |
4682 STOP_TIMER("idwt slice");} | |
4683 | |
4684 | 4600 |
4685 if(s->qlog == LOSSLESS_QLOG){ | 4601 if(s->qlog == LOSSLESS_QLOG){ |
4686 for(; yq<slice_h && yq<h; yq++){ | 4602 for(; yq<slice_h && yq<h; yq++){ |
4687 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); | 4603 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); |
4688 for(x=0; x<w; x++){ | 4604 for(x=0; x<w; x++){ |
4698 while(y < end_y) | 4614 while(y < end_y) |
4699 slice_buffer_release(&s->sb, y++); | 4615 slice_buffer_release(&s->sb, y++); |
4700 } | 4616 } |
4701 | 4617 |
4702 slice_buffer_flush(&s->sb); | 4618 slice_buffer_flush(&s->sb); |
4703 | 4619 } |
4704 STOP_TIMER("idwt + predict_slices")} | |
4705 } | 4620 } |
4706 | 4621 |
4707 emms_c(); | 4622 emms_c(); |
4708 | 4623 |
4709 if(s->last_picture[s->max_ref_frames-1].data[0]){ | 4624 if(s->last_picture[s->max_ref_frames-1].data[0]){ |
4804 memset(s.header_state, 0, sizeof(s.header_state)); | 4719 memset(s.header_state, 0, sizeof(s.header_state)); |
4805 ff_init_range_encoder(&s.c, buffer[0], 256*256); | 4720 ff_init_range_encoder(&s.c, buffer[0], 256*256); |
4806 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); | 4721 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
4807 | 4722 |
4808 for(i=-256; i<256; i++){ | 4723 for(i=-256; i<256; i++){ |
4809 START_TIMER | |
4810 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); | 4724 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); |
4811 STOP_TIMER("put_symbol") | |
4812 } | 4725 } |
4813 ff_rac_terminate(&s.c); | 4726 ff_rac_terminate(&s.c); |
4814 | 4727 |
4815 memset(s.header_state, 0, sizeof(s.header_state)); | 4728 memset(s.header_state, 0, sizeof(s.header_state)); |
4816 ff_init_range_decoder(&s.c, buffer[0], 256*256); | 4729 ff_init_range_decoder(&s.c, buffer[0], 256*256); |
4817 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); | 4730 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
4818 | 4731 |
4819 for(i=-256; i<256; i++){ | 4732 for(i=-256; i<256; i++){ |
4820 int j; | 4733 int j; |
4821 START_TIMER | |
4822 j= get_symbol(&s.c, s.header_state, 1); | 4734 j= get_symbol(&s.c, s.header_state, 1); |
4823 STOP_TIMER("get_symbol") | |
4824 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); | 4735 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); |
4825 } | 4736 } |
4826 #endif | 4737 #endif |
4827 { | 4738 { |
4828 int level, orientation, x, y; | 4739 int level, orientation, x, y; |