comparison rv34.c @ 10118:4dafbd9c8918 libavcodec

Zeroing pic->motion_val in RV3/4 causes alignment problems on some 64-bit architectures since stride is multiple of 4 and not of 8, so split fill_rectangle() calls to operate on 32-bit words instead of 64-bit ones.
author kostya
date Sun, 30 Aug 2009 06:30:53 +0000
parents d0f2f4e4436c
children e37de8b92b10
comparison
equal deleted inserted replaced
10117:d0f2f4e4436c 10118:4dafbd9c8918
34 #include "rv34vlc.h" 34 #include "rv34vlc.h"
35 #include "rv34data.h" 35 #include "rv34data.h"
36 #include "rv34.h" 36 #include "rv34.h"
37 37
38 //#define DEBUG 38 //#define DEBUG
39
40 #define ZERO8x2(dst, stride) \
41 fill_rectangle(dst, 1, 2, stride, 0, 4); \
42 fill_rectangle(((uint8_t*)(dst))+4, 1, 2, stride, 0, 4); \
39 43
40 /** translation of RV30/40 macroblock types to lavc ones */ 44 /** translation of RV30/40 macroblock types to lavc ones */
41 static const int rv34_mb_type_to_lavc[12] = { 45 static const int rv34_mb_type_to_lavc[12] = {
42 MB_TYPE_INTRA, 46 MB_TYPE_INTRA,
43 MB_TYPE_INTRA16x16 | MB_TYPE_SEPARATE_DC, 47 MB_TYPE_INTRA16x16 | MB_TYPE_SEPARATE_DC,
582 for(i = 0; i < 2; i++){ 586 for(i = 0; i < 2; i++){
583 cur_pic->motion_val[dir][mv_pos + i + j*s->b8_stride][0] = mx; 587 cur_pic->motion_val[dir][mv_pos + i + j*s->b8_stride][0] = mx;
584 cur_pic->motion_val[dir][mv_pos + i + j*s->b8_stride][1] = my; 588 cur_pic->motion_val[dir][mv_pos + i + j*s->b8_stride][1] = my;
585 } 589 }
586 } 590 }
587 if(block_type == RV34_MB_B_BACKWARD || block_type == RV34_MB_B_FORWARD) 591 if(block_type == RV34_MB_B_BACKWARD || block_type == RV34_MB_B_FORWARD){
588 fill_rectangle(cur_pic->motion_val[!dir][mv_pos], 2, 2, s->b8_stride, 0, 4); 592 ZERO8x2(cur_pic->motion_val[!dir][mv_pos], s->b8_stride);
593 }
589 } 594 }
590 595
591 /** 596 /**
592 * motion vector prediction - RV3 version 597 * motion vector prediction - RV3 version
593 */ 598 */
804 r->dmv[i][1] = svq3_get_se_golomb(gb); 809 r->dmv[i][1] = svq3_get_se_golomb(gb);
805 } 810 }
806 switch(block_type){ 811 switch(block_type){
807 case RV34_MB_TYPE_INTRA: 812 case RV34_MB_TYPE_INTRA:
808 case RV34_MB_TYPE_INTRA16x16: 813 case RV34_MB_TYPE_INTRA16x16:
809 fill_rectangle(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, 2, s->b8_stride, 0, 4); 814 ZERO8x2(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], s->b8_stride);
810 return 0; 815 return 0;
811 case RV34_MB_SKIP: 816 case RV34_MB_SKIP:
812 if(s->pict_type == FF_P_TYPE){ 817 if(s->pict_type == FF_P_TYPE){
813 fill_rectangle(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, 2, s->b8_stride, 0, 4); 818 ZERO8x2(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], s->b8_stride);
814 rv34_mc_1mv (r, block_type, 0, 0, 0, 2, 2, 0); 819 rv34_mc_1mv (r, block_type, 0, 0, 0, 2, 2, 0);
815 break; 820 break;
816 } 821 }
817 case RV34_MB_B_DIRECT: 822 case RV34_MB_B_DIRECT:
818 //surprisingly, it uses motion scheme from next reference frame 823 //surprisingly, it uses motion scheme from next reference frame
819 next_bt = s->next_picture_ptr->mb_type[s->mb_x + s->mb_y * s->mb_stride]; 824 next_bt = s->next_picture_ptr->mb_type[s->mb_x + s->mb_y * s->mb_stride];
820 if(IS_INTRA(next_bt) || IS_SKIP(next_bt)){ 825 if(IS_INTRA(next_bt) || IS_SKIP(next_bt)){
821 fill_rectangle(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, 2, s->b8_stride, 0, 4); 826 ZERO8x2(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], s->b8_stride);
822 fill_rectangle(s->current_picture_ptr->motion_val[1][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, 2, s->b8_stride, 0, 4); 827 ZERO8x2(s->current_picture_ptr->motion_val[1][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], s->b8_stride);
823 }else 828 }else
824 for(j = 0; j < 2; j++) 829 for(j = 0; j < 2; j++)
825 for(i = 0; i < 2; i++) 830 for(i = 0; i < 2; i++)
826 for(k = 0; k < 2; k++) 831 for(k = 0; k < 2; k++)
827 for(l = 0; l < 2; l++) 832 for(l = 0; l < 2; l++)
828 s->current_picture_ptr->motion_val[l][mv_pos + i + j*s->b8_stride][k] = calc_add_mv(r, l, s->next_picture_ptr->motion_val[0][mv_pos + i + j*s->b8_stride][k]); 833 s->current_picture_ptr->motion_val[l][mv_pos + i + j*s->b8_stride][k] = calc_add_mv(r, l, s->next_picture_ptr->motion_val[0][mv_pos + i + j*s->b8_stride][k]);
829 if(!(IS_16X8(next_bt) || IS_8X16(next_bt) || IS_8X8(next_bt))) //we can use whole macroblock MC 834 if(!(IS_16X8(next_bt) || IS_8X16(next_bt) || IS_8X8(next_bt))) //we can use whole macroblock MC
830 rv34_mc_2mv(r, block_type); 835 rv34_mc_2mv(r, block_type);
831 else 836 else
832 rv34_mc_2mv_skip(r); 837 rv34_mc_2mv_skip(r);
833 fill_rectangle(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, 2, s->b8_stride, 0, 4); 838 ZERO8x2(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], s->b8_stride);
834 break; 839 break;
835 case RV34_MB_P_16x16: 840 case RV34_MB_P_16x16:
836 case RV34_MB_P_MIX16x16: 841 case RV34_MB_P_MIX16x16:
837 rv34_pred_mv(r, block_type, 0, 0); 842 rv34_pred_mv(r, block_type, 0, 0);
838 rv34_mc_1mv (r, block_type, 0, 0, 0, 2, 2, 0); 843 rv34_mc_1mv (r, block_type, 0, 0, 0, 2, 2, 0);