comparison motion_est.c @ 1799:95612d423fde libavcodec

multithreaded/SMP motion estimation multithreaded/SMP encoding for MPEG1/MPEG2/MPEG4/H263 all pthread specific code is in pthread.c to try it, run configure --enable-pthreads and ffmpeg ... -threads <num> the internal thread API is a simple AVCodecContext.execute() callback which executes a given function pointer with different arguments and returns after finishing all, that way no mutexes or other thread-mess is needed outside pthread.c
author michael
date Fri, 13 Feb 2004 17:54:10 +0000
parents 5f42b809bc04
children 129236143f2e
comparison
equal deleted inserted replaced
1798:a3da4b429984 1799:95612d423fde
803 P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1]; 803 P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
804 804
805 if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift); 805 if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
806 806
807 /* special case for first line */ 807 /* special case for first line */
808 if (s->mb_y == 0 && block<2) { 808 if (s->first_slice_line && block<2) {
809 pred_x4= P_LEFT[0]; 809 pred_x4= P_LEFT[0];
810 pred_y4= P_LEFT[1]; 810 pred_y4= P_LEFT[1];
811 } else { 811 } else {
812 P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0]; 812 P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0];
813 P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1]; 813 P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1];
843 if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0] 843 if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]
844 && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE){ 844 && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE){
845 int dxy; 845 int dxy;
846 const int offset= ((block&1) + (block>>1)*stride)*8; 846 const int offset= ((block&1) + (block>>1)*stride)*8;
847 uint8_t *dest_y = s->me.scratchpad + offset; 847 uint8_t *dest_y = s->me.scratchpad + offset;
848
849 if(s->quarter_sample){ 848 if(s->quarter_sample){
850 uint8_t *ref= ref_data[0] + (mx4>>2) + (my4>>2)*stride; 849 uint8_t *ref= ref_data[0] + (mx4>>2) + (my4>>2)*stride;
851 dxy = ((my4 & 3) << 2) | (mx4 & 3); 850 dxy = ((my4 & 3) << 2) | (mx4 & 3);
852 851
853 if(s->no_rounding) 852 if(s->no_rounding)
854 s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , s->linesize); 853 s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , stride);
855 else 854 else
856 s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , stride); 855 s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , stride);
857 }else{ 856 }else{
858 uint8_t *ref= ref_data[0] + (mx4>>1) + (my4>>1)*stride; 857 uint8_t *ref= ref_data[0] + (mx4>>1) + (my4>>1)*stride;
859 dxy = ((my4 & 1) << 1) | (mx4 & 1); 858 dxy = ((my4 & 1) << 1) | (mx4 & 1);
964 if(P_LEFT[0] > (s->me.xmax<<1)) P_LEFT[0] = (s->me.xmax<<1); 963 if(P_LEFT[0] > (s->me.xmax<<1)) P_LEFT[0] = (s->me.xmax<<1);
965 964
966 pred_x= P_LEFT[0]; 965 pred_x= P_LEFT[0];
967 pred_y= P_LEFT[1]; 966 pred_y= P_LEFT[1];
968 967
969 if(s->mb_y){ 968 if(!s->first_slice_line){
970 P_TOP[0] = mv_table[xy - mot_stride][0]; 969 P_TOP[0] = mv_table[xy - mot_stride][0];
971 P_TOP[1] = mv_table[xy - mot_stride][1]; 970 P_TOP[1] = mv_table[xy - mot_stride][1];
972 P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0]; 971 P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
973 P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1]; 972 P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
974 if(P_TOP[1] > (s->me.ymax<<1)) P_TOP[1] = (s->me.ymax<<1); 973 if(P_TOP[1] > (s->me.ymax<<1)) P_TOP[1] = (s->me.ymax<<1);
1113 P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0]; 1112 P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
1114 P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1]; 1113 P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
1115 1114
1116 if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift); 1115 if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
1117 1116
1118 if(mb_y) { 1117 if(!s->first_slice_line) {
1119 P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0]; 1118 P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0];
1120 P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1]; 1119 P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1];
1121 P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0]; 1120 P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
1122 P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1]; 1121 P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
1123 if(P_TOP[1] > (s->me.ymax<<shift)) P_TOP[1] = (s->me.ymax<<shift); 1122 if(P_TOP[1] > (s->me.ymax<<shift)) P_TOP[1] = (s->me.ymax<<shift);
1162 //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); 1161 //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
1163 pic->mb_var [s->mb_stride * mb_y + mb_x] = varc; 1162 pic->mb_var [s->mb_stride * mb_y + mb_x] = varc;
1164 pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = vard; 1163 pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = vard;
1165 pic->mb_mean [s->mb_stride * mb_y + mb_x] = (sum+128)>>8; 1164 pic->mb_mean [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1166 // pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin; 1165 // pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1167 pic->mb_var_sum += varc; 1166 s->mb_var_sum_temp += varc;
1168 pic->mc_mb_var_sum += vard; 1167 s->mc_mb_var_sum_temp += vard;
1169 //printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); 1168 //printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
1170 1169
1171 #if 0 1170 #if 0
1172 printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n", 1171 printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
1173 varc, s->avg_mb_var, sum, vard, mx - xx, my - yy); 1172 varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
1324 P_LEFT[1] = s->p_mv_table[xy + 1][1]; 1323 P_LEFT[1] = s->p_mv_table[xy + 1][1];
1325 1324
1326 if(P_LEFT[0] < (s->me.xmin<<shift)) P_LEFT[0] = (s->me.xmin<<shift); 1325 if(P_LEFT[0] < (s->me.xmin<<shift)) P_LEFT[0] = (s->me.xmin<<shift);
1327 1326
1328 /* special case for first line */ 1327 /* special case for first line */
1329 if (mb_y == s->mb_height-1) { 1328 if (s->first_slice_line) {
1330 pred_x= P_LEFT[0]; 1329 pred_x= P_LEFT[0];
1331 pred_y= P_LEFT[1]; 1330 pred_y= P_LEFT[1];
1332 P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]= 1331 P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
1333 P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME 1332 P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME
1334 } else { 1333 } else {
1407 P_LEFT[1] = mv_table[mot_xy - 1][1]; 1406 P_LEFT[1] = mv_table[mot_xy - 1][1];
1408 1407
1409 if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift); 1408 if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
1410 1409
1411 /* special case for first line */ 1410 /* special case for first line */
1412 if (mb_y) { 1411 if (!s->first_slice_line) {
1413 P_TOP[0] = mv_table[mot_xy - mot_stride ][0]; 1412 P_TOP[0] = mv_table[mot_xy - mot_stride ][0];
1414 P_TOP[1] = mv_table[mot_xy - mot_stride ][1]; 1413 P_TOP[1] = mv_table[mot_xy - mot_stride ][1];
1415 P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1 ][0]; 1414 P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1 ][0];
1416 P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1 ][1]; 1415 P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1 ][1];
1417 if(P_TOP[1] > (s->me.ymax<<shift)) P_TOP[1]= (s->me.ymax<<shift); 1416 if(P_TOP[1] > (s->me.ymax<<shift)) P_TOP[1]= (s->me.ymax<<shift);
1608 1607
1609 P_LEFT[0] = clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift); 1608 P_LEFT[0] = clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
1610 P_LEFT[1] = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift); 1609 P_LEFT[1] = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
1611 1610
1612 /* special case for first line */ 1611 /* special case for first line */
1613 if (mb_y) { 1612 if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as its cliped
1614 P_TOP[0] = clip(mv_table[mot_xy - mot_stride ][0], xmin<<shift, xmax<<shift); 1613 P_TOP[0] = clip(mv_table[mot_xy - mot_stride ][0], xmin<<shift, xmax<<shift);
1615 P_TOP[1] = clip(mv_table[mot_xy - mot_stride ][1], ymin<<shift, ymax<<shift); 1614 P_TOP[1] = clip(mv_table[mot_xy - mot_stride ][1], ymin<<shift, ymax<<shift);
1616 P_TOPRIGHT[0] = clip(mv_table[mot_xy - mot_stride + 1 ][0], xmin<<shift, xmax<<shift); 1615 P_TOPRIGHT[0] = clip(mv_table[mot_xy - mot_stride + 1 ][0], xmin<<shift, xmax<<shift);
1617 P_TOPRIGHT[1] = clip(mv_table[mot_xy - mot_stride + 1 ][1], ymin<<shift, ymax<<shift); 1616 P_TOPRIGHT[1] = clip(mv_table[mot_xy - mot_stride + 1 ][1], ymin<<shift, ymax<<shift);
1618 1617
1725 score=bimin; 1724 score=bimin;
1726 type= CANDIDATE_MB_TYPE_BACKWARD_I; 1725 type= CANDIDATE_MB_TYPE_BACKWARD_I;
1727 } 1726 }
1728 1727
1729 score= ((unsigned)(score*score + 128*256))>>16; 1728 score= ((unsigned)(score*score + 128*256))>>16;
1730 s->current_picture.mc_mb_var_sum += score; 1729 s->mc_mb_var_sum_temp += score;
1731 s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE 1730 s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
1732 } 1731 }
1733 1732
1734 if(s->avctx->mb_decision > FF_MB_DECISION_SIMPLE){ 1733 if(s->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1735 type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT; 1734 type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;