Mercurial > libavcodec.hg
comparison snow.c @ 3662:fc714e9a5419 libavcodec
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
author | lorenm |
---|---|
date | Fri, 01 Sep 2006 22:02:38 +0000 |
parents | b4425339894b |
children | acf9ca729bd2 |
comparison
equal
deleted
inserted
replaced
3661:b4425339894b | 3662:fc714e9a5419 |
---|---|
2549 } | 2549 } |
2550 } | 2550 } |
2551 } | 2551 } |
2552 | 2552 |
2553 //FIXME name clenup (b_w, block_w, b_width stuff) | 2553 //FIXME name clenup (b_w, block_w, b_width stuff) |
2554 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ | 2554 static always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ |
2555 DWTELEM * dst = NULL; | |
2556 const int b_width = s->b_width << s->block_max_depth; | 2555 const int b_width = s->b_width << s->block_max_depth; |
2557 const int b_height= s->b_height << s->block_max_depth; | 2556 const int b_height= s->b_height << s->block_max_depth; |
2558 const int b_stride= b_width; | 2557 const int b_stride= b_width; |
2559 BlockNode *lt= &s->block[b_x + b_y*b_stride]; | 2558 BlockNode *lt= &s->block[b_x + b_y*b_stride]; |
2560 BlockNode *rt= lt+1; | 2559 BlockNode *rt= lt+1; |
2582 } | 2581 } |
2583 | 2582 |
2584 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 | 2583 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 |
2585 obmc -= src_x; | 2584 obmc -= src_x; |
2586 b_w += src_x; | 2585 b_w += src_x; |
2586 if(!sliced && !offset_dst) | |
2587 dst -= src_x; | |
2587 src_x=0; | 2588 src_x=0; |
2588 }else if(src_x + b_w > w){ | 2589 }else if(src_x + b_w > w){ |
2589 b_w = w - src_x; | 2590 b_w = w - src_x; |
2590 } | 2591 } |
2591 if(src_y<0){ | 2592 if(src_y<0){ |
2592 obmc -= src_y*obmc_stride; | 2593 obmc -= src_y*obmc_stride; |
2593 b_h += src_y; | 2594 b_h += src_y; |
2595 if(!sliced && !offset_dst) | |
2596 dst -= src_y*dst_stride; | |
2594 src_y=0; | 2597 src_y=0; |
2595 }else if(src_y + b_h> h){ | 2598 }else if(src_y + b_h> h){ |
2596 b_h = h - src_y; | 2599 b_h = h - src_y; |
2597 } | 2600 } |
2598 | 2601 |
2599 if(b_w<=0 || b_h<=0) return; | 2602 if(b_w<=0 || b_h<=0) return; |
2600 | 2603 |
2601 assert(src_stride > 2*MB_SIZE + 5); | 2604 assert(src_stride > 2*MB_SIZE + 5); |
2602 // old_dst += src_x + src_y*dst_stride; | 2605 if(!sliced && offset_dst) |
2606 dst += src_x + src_y*dst_stride; | |
2603 dst8+= src_x + src_y*src_stride; | 2607 dst8+= src_x + src_y*src_stride; |
2604 // src += src_x + src_y*src_stride; | 2608 // src += src_x + src_y*src_stride; |
2605 | 2609 |
2606 ptmp= tmp + 3*tmp_step; | 2610 ptmp= tmp + 3*tmp_step; |
2607 block[0]= ptmp; | 2611 block[0]= ptmp; |
2668 if(add) dst[x + y*dst_stride] += v; | 2672 if(add) dst[x + y*dst_stride] += v; |
2669 else dst[x + y*dst_stride] -= v; | 2673 else dst[x + y*dst_stride] -= v; |
2670 } | 2674 } |
2671 } | 2675 } |
2672 #else | 2676 #else |
2673 { | 2677 if(sliced){ |
2674 | 2678 START_TIMER |
2675 START_TIMER | 2679 |
2676 | 2680 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
2677 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | 2681 STOP_TIMER("inner_add_yblock") |
2678 STOP_TIMER("Inner add y block") | 2682 }else |
2679 } | |
2680 #endif | |
2681 } | |
2682 | |
2683 //FIXME name clenup (b_w, block_w, b_width stuff) | |
2684 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ | |
2685 const int b_width = s->b_width << s->block_max_depth; | |
2686 const int b_height= s->b_height << s->block_max_depth; | |
2687 const int b_stride= b_width; | |
2688 BlockNode *lt= &s->block[b_x + b_y*b_stride]; | |
2689 BlockNode *rt= lt+1; | |
2690 BlockNode *lb= lt+b_stride; | |
2691 BlockNode *rb= lb+1; | |
2692 uint8_t *block[4]; | |
2693 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; | |
2694 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align | |
2695 uint8_t *ptmp; | |
2696 int x,y; | |
2697 | |
2698 if(b_x<0){ | |
2699 lt= rt; | |
2700 lb= rb; | |
2701 }else if(b_x + 1 >= b_width){ | |
2702 rt= lt; | |
2703 rb= lb; | |
2704 } | |
2705 if(b_y<0){ | |
2706 lt= lb; | |
2707 rt= rb; | |
2708 }else if(b_y + 1 >= b_height){ | |
2709 lb= lt; | |
2710 rb= rt; | |
2711 } | |
2712 | |
2713 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 | |
2714 obmc -= src_x; | |
2715 b_w += src_x; | |
2716 if(!offset_dst) | |
2717 dst -= src_x; | |
2718 src_x=0; | |
2719 }else if(src_x + b_w > w){ | |
2720 b_w = w - src_x; | |
2721 } | |
2722 if(src_y<0){ | |
2723 obmc -= src_y*obmc_stride; | |
2724 b_h += src_y; | |
2725 if(!offset_dst) | |
2726 dst -= src_y*dst_stride; | |
2727 src_y=0; | |
2728 }else if(src_y + b_h> h){ | |
2729 b_h = h - src_y; | |
2730 } | |
2731 | |
2732 if(b_w<=0 || b_h<=0) return; | |
2733 | |
2734 assert(src_stride > 2*MB_SIZE + 5); | |
2735 if(offset_dst) | |
2736 dst += src_x + src_y*dst_stride; | |
2737 dst8+= src_x + src_y*src_stride; | |
2738 // src += src_x + src_y*src_stride; | |
2739 | |
2740 ptmp= tmp + 3*tmp_step; | |
2741 block[0]= ptmp; | |
2742 ptmp+=tmp_step; | |
2743 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); | |
2744 | |
2745 if(same_block(lt, rt)){ | |
2746 block[1]= block[0]; | |
2747 }else{ | |
2748 block[1]= ptmp; | |
2749 ptmp+=tmp_step; | |
2750 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); | |
2751 } | |
2752 | |
2753 if(same_block(lt, lb)){ | |
2754 block[2]= block[0]; | |
2755 }else if(same_block(rt, lb)){ | |
2756 block[2]= block[1]; | |
2757 }else{ | |
2758 block[2]= ptmp; | |
2759 ptmp+=tmp_step; | |
2760 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); | |
2761 } | |
2762 | |
2763 if(same_block(lt, rb) ){ | |
2764 block[3]= block[0]; | |
2765 }else if(same_block(rt, rb)){ | |
2766 block[3]= block[1]; | |
2767 }else if(same_block(lb, rb)){ | |
2768 block[3]= block[2]; | |
2769 }else{ | |
2770 block[3]= ptmp; | |
2771 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); | |
2772 } | |
2773 #if 0 | |
2774 for(y=0; y<b_h; y++){ | |
2775 for(x=0; x<b_w; x++){ | |
2776 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2777 if(add) dst[x + y*dst_stride] += v; | |
2778 else dst[x + y*dst_stride] -= v; | |
2779 } | |
2780 } | |
2781 for(y=0; y<b_h; y++){ | |
2782 uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2783 for(x=0; x<b_w; x++){ | |
2784 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2785 if(add) dst[x + y*dst_stride] += v; | |
2786 else dst[x + y*dst_stride] -= v; | |
2787 } | |
2788 } | |
2789 for(y=0; y<b_h; y++){ | |
2790 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2791 for(x=0; x<b_w; x++){ | |
2792 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2793 if(add) dst[x + y*dst_stride] += v; | |
2794 else dst[x + y*dst_stride] -= v; | |
2795 } | |
2796 } | |
2797 for(y=0; y<b_h; y++){ | |
2798 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2799 uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2800 for(x=0; x<b_w; x++){ | |
2801 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2802 if(add) dst[x + y*dst_stride] += v; | |
2803 else dst[x + y*dst_stride] -= v; | |
2804 } | |
2805 } | |
2806 #else | |
2807 for(y=0; y<b_h; y++){ | 2683 for(y=0; y<b_h; y++){ |
2808 //FIXME ugly missue of obmc_stride | 2684 //FIXME ugly missue of obmc_stride |
2809 uint8_t *obmc1= obmc + y*obmc_stride; | 2685 uint8_t *obmc1= obmc + y*obmc_stride; |
2810 uint8_t *obmc2= obmc1+ (obmc_stride>>1); | 2686 uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
2811 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | 2687 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
2884 } | 2760 } |
2885 | 2761 |
2886 for(mb_x=0; mb_x<=mb_w; mb_x++){ | 2762 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2887 START_TIMER | 2763 START_TIMER |
2888 | 2764 |
2889 add_yblock_buffered(s, sb, old_buffer, dst8, obmc, | 2765 add_yblock(s, 1, sb, old_buffer, dst8, obmc, |
2890 block_w*mb_x - block_w/2, | 2766 block_w*mb_x - block_w/2, |
2891 block_w*mb_y - block_w/2, | 2767 block_w*mb_y - block_w/2, |
2892 block_w, block_w, | 2768 block_w, block_w, |
2893 w, h, | 2769 w, h, |
2894 w, ref_stride, obmc_stride, | 2770 w, ref_stride, obmc_stride, |
2895 mb_x - 1, mb_y - 1, | 2771 mb_x - 1, mb_y - 1, |
2896 add, plane_index); | 2772 add, 0, plane_index); |
2897 | 2773 |
2898 STOP_TIMER("add_yblock") | 2774 STOP_TIMER("add_yblock") |
2899 } | 2775 } |
2900 | 2776 |
2901 STOP_TIMER("predict_slice") | 2777 STOP_TIMER("predict_slice") |
2941 } | 2817 } |
2942 | 2818 |
2943 for(mb_x=0; mb_x<=mb_w; mb_x++){ | 2819 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2944 START_TIMER | 2820 START_TIMER |
2945 | 2821 |
2946 add_yblock(s, buf, dst8, obmc, | 2822 add_yblock(s, 0, NULL, buf, dst8, obmc, |
2947 block_w*mb_x - block_w/2, | 2823 block_w*mb_x - block_w/2, |
2948 block_w*mb_y - block_w/2, | 2824 block_w*mb_y - block_w/2, |
2949 block_w, block_w, | 2825 block_w, block_w, |
2950 w, h, | 2826 w, h, |
2951 w, ref_stride, obmc_stride, | 2827 w, ref_stride, obmc_stride, |
2992 int mb_x2= mb_x + (i &1) - 1; | 2868 int mb_x2= mb_x + (i &1) - 1; |
2993 int mb_y2= mb_y + (i>>1) - 1; | 2869 int mb_y2= mb_y + (i>>1) - 1; |
2994 int x= block_w*mb_x2 + block_w/2; | 2870 int x= block_w*mb_x2 + block_w/2; |
2995 int y= block_w*mb_y2 + block_w/2; | 2871 int y= block_w*mb_y2 + block_w/2; |
2996 | 2872 |
2997 add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, | 2873 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, |
2998 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); | 2874 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); |
2999 | 2875 |
3000 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ | 2876 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ |
3001 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ | 2877 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ |
3002 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; | 2878 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; |
3176 int mb_x2= mb_x + (i%3) - 1; | 3052 int mb_x2= mb_x + (i%3) - 1; |
3177 int mb_y2= mb_y + (i/3) - 1; | 3053 int mb_y2= mb_y + (i/3) - 1; |
3178 int x= block_w*mb_x2 + block_w/2; | 3054 int x= block_w*mb_x2 + block_w/2; |
3179 int y= block_w*mb_y2 + block_w/2; | 3055 int y= block_w*mb_y2 + block_w/2; |
3180 | 3056 |
3181 add_yblock(s, zero_dst, dst, obmc, | 3057 add_yblock(s, 0, NULL, zero_dst, dst, obmc, |
3182 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); | 3058 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); |
3183 | 3059 |
3184 //FIXME find a cleaner/simpler way to skip the outside stuff | 3060 //FIXME find a cleaner/simpler way to skip the outside stuff |
3185 for(y2= y; y2<0; y2++) | 3061 for(y2= y; y2<0; y2++) |
3186 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | 3062 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); |