comparison mpegvideo.c @ 1718:fdd1bc71da55 libavcodec

more trellis quant optimizations
author michael
date Fri, 02 Jan 2004 19:22:00 +0000
parents da5d64a0fa02
children 4e72fb256b25
comparison
equal deleted inserted replaced
1717:6a7e68899d8a 1718:fdd1bc71da55
4640 static int dct_quantize_trellis_c(MpegEncContext *s, 4640 static int dct_quantize_trellis_c(MpegEncContext *s,
4641 DCTELEM *block, int n, 4641 DCTELEM *block, int n,
4642 int qscale, int *overflow){ 4642 int qscale, int *overflow){
4643 const int *qmat; 4643 const int *qmat;
4644 const uint8_t *scantable= s->intra_scantable.scantable; 4644 const uint8_t *scantable= s->intra_scantable.scantable;
4645 const uint8_t *perm_scantable= s->intra_scantable.permutated;
4645 int max=0; 4646 int max=0;
4646 unsigned int threshold1, threshold2; 4647 unsigned int threshold1, threshold2;
4647 int bias=0; 4648 int bias=0;
4648 int run_tab[65]; 4649 int run_tab[65];
4649 int level_tab[65]; 4650 int level_tab[65];
4650 int score_tab[65]; 4651 int score_tab[65];
4652 int survivor[65];
4653 int survivor_count;
4651 int last_run=0; 4654 int last_run=0;
4652 int last_level=0; 4655 int last_level=0;
4653 int last_score= 0; 4656 int last_score= 0;
4654 int last_i= 0; 4657 int last_i;
4655 int coeff[2][64]; 4658 int coeff[2][64];
4656 int coeff_count[64]; 4659 int coeff_count[64];
4657 int qmul, qadd, start_i, last_non_zero, i, dc; 4660 int qmul, qadd, start_i, last_non_zero, i, dc;
4658 const int esc_length= s->ac_esc_length; 4661 const int esc_length= s->ac_esc_length;
4659 uint8_t * length; 4662 uint8_t * length;
4660 uint8_t * last_length; 4663 uint8_t * last_length;
4661 int score_limit=0;
4662 int left_limit= 0;
4663 const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6); 4664 const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
4664 const int patch_table= s->out_format == FMT_MPEG1 && !s->mb_intra;
4665 4665
4666 s->dsp.fdct (block); 4666 s->dsp.fdct (block);
4667 4667
4668 if(s->dct_error_sum) 4668 if(s->dct_error_sum)
4669 ff_denoise_dct(s, block); 4669 ff_denoise_dct(s, block);
4698 last_non_zero = -1; 4698 last_non_zero = -1;
4699 qmat = s->q_inter_matrix[qscale]; 4699 qmat = s->q_inter_matrix[qscale];
4700 length = s->inter_ac_vlc_length; 4700 length = s->inter_ac_vlc_length;
4701 last_length= s->inter_ac_vlc_last_length; 4701 last_length= s->inter_ac_vlc_last_length;
4702 } 4702 }
4703 last_i= start_i;
4703 4704
4704 threshold1= (1<<QMAT_SHIFT) - bias - 1; 4705 threshold1= (1<<QMAT_SHIFT) - bias - 1;
4705 threshold2= (threshold1<<1); 4706 threshold2= (threshold1<<1);
4706 4707
4707 for(i=63; i>=start_i; i--) { 4708 for(i=63; i>=start_i; i--) {
4714 } 4715 }
4715 } 4716 }
4716 4717
4717 for(i=start_i; i<=last_non_zero; i++) { 4718 for(i=start_i; i<=last_non_zero; i++) {
4718 const int j = scantable[i]; 4719 const int j = scantable[i];
4719 const int k= i-start_i;
4720 int level = block[j] * qmat[j]; 4720 int level = block[j] * qmat[j];
4721 4721
4722 // if( bias+level >= (1<<(QMAT_SHIFT - 3)) 4722 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
4723 // || bias-level >= (1<<(QMAT_SHIFT - 3))){ 4723 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
4724 if(((unsigned)(level+threshold1))>threshold2){ 4724 if(((unsigned)(level+threshold1))>threshold2){
4725 if(level>0){ 4725 if(level>0){
4726 level= (bias + level)>>QMAT_SHIFT; 4726 level= (bias + level)>>QMAT_SHIFT;
4727 coeff[0][k]= level; 4727 coeff[0][i]= level;
4728 coeff[1][k]= level-1; 4728 coeff[1][i]= level-1;
4729 // coeff[2][k]= level-2; 4729 // coeff[2][k]= level-2;
4730 }else{ 4730 }else{
4731 level= (bias - level)>>QMAT_SHIFT; 4731 level= (bias - level)>>QMAT_SHIFT;
4732 coeff[0][k]= -level; 4732 coeff[0][i]= -level;
4733 coeff[1][k]= -level+1; 4733 coeff[1][i]= -level+1;
4734 // coeff[2][k]= -level+2; 4734 // coeff[2][k]= -level+2;
4735 } 4735 }
4736 coeff_count[k]= FFMIN(level, 2); 4736 coeff_count[i]= FFMIN(level, 2);
4737 assert(coeff_count[k]); 4737 assert(coeff_count[i]);
4738 max |=level; 4738 max |=level;
4739 }else{ 4739 }else{
4740 coeff[0][k]= (level>>31)|1; 4740 coeff[0][i]= (level>>31)|1;
4741 coeff_count[k]= 1; 4741 coeff_count[i]= 1;
4742 } 4742 }
4743 } 4743 }
4744 4744
4745 *overflow= s->max_qcoeff < max; //overflow might have happend 4745 *overflow= s->max_qcoeff < max; //overflow might have happend
4746 4746
4747 if(last_non_zero < start_i){ 4747 if(last_non_zero < start_i){
4748 memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM)); 4748 memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4749 return last_non_zero; 4749 return last_non_zero;
4750 } 4750 }
4751 4751
4752 score_tab[0]= 0; 4752 score_tab[start_i]= 0;
4753 4753 survivor[0]= start_i;
4754 if(patch_table){ 4754 survivor_count= 1;
4755 // length[UNI_AC_ENC_INDEX(0, 63)]= 4755
4756 // length[UNI_AC_ENC_INDEX(0, 65)]= 2; 4756 for(i=start_i; i<=last_non_zero; i++){
4757 } 4757 int level_index, j;
4758 4758 const int dct_coeff= ABS(block[ scantable[i] ]);
4759 for(i=0; i<=last_non_zero - start_i; i++){
4760 int level_index, run, j;
4761 const int dct_coeff= ABS(block[ scantable[i + start_i] ]);
4762 const int zero_distoration= dct_coeff*dct_coeff; 4759 const int zero_distoration= dct_coeff*dct_coeff;
4763 int best_score=256*256*256*120; 4760 int best_score=256*256*256*120;
4764
4765 for(level_index=0; level_index < coeff_count[i]; level_index++){ 4761 for(level_index=0; level_index < coeff_count[i]; level_index++){
4766 int distoration; 4762 int distoration;
4767 int level= coeff[level_index][i]; 4763 int level= coeff[level_index][i];
4768 const int alevel= ABS(level); 4764 const int alevel= ABS(level);
4769 int unquant_coeff; 4765 int unquant_coeff;
4771 assert(level); 4767 assert(level);
4772 4768
4773 if(s->out_format == FMT_H263){ 4769 if(s->out_format == FMT_H263){
4774 unquant_coeff= alevel*qmul + qadd; 4770 unquant_coeff= alevel*qmul + qadd;
4775 }else{ //MPEG1 4771 }else{ //MPEG1
4776 j= s->dsp.idct_permutation[ scantable[i + start_i] ]; //FIXME optimize 4772 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
4777 if(s->mb_intra){ 4773 if(s->mb_intra){
4778 unquant_coeff = (int)( alevel * qscale * s->intra_matrix[j]) >> 3; 4774 unquant_coeff = (int)( alevel * qscale * s->intra_matrix[j]) >> 3;
4779 unquant_coeff = (unquant_coeff - 1) | 1; 4775 unquant_coeff = (unquant_coeff - 1) | 1;
4780 }else{ 4776 }else{
4781 unquant_coeff = ((( alevel << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4; 4777 unquant_coeff = ((( alevel << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
4785 } 4781 }
4786 4782
4787 distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration; 4783 distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
4788 level+=64; 4784 level+=64;
4789 if((level&(~127)) == 0){ 4785 if((level&(~127)) == 0){
4790 for(run=0; run<=i - left_limit; run++){ 4786 for(j=survivor_count-1; j>=0; j--){
4787 int run= i - survivor[j];
4791 int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda; 4788 int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4792 score += score_tab[i-run]; 4789 score += score_tab[i-run];
4793 4790
4794 if(score < best_score){ 4791 if(score < best_score){
4795 best_score= 4792 best_score= score;
4796 score_tab[i+1]= score;
4797 run_tab[i+1]= run; 4793 run_tab[i+1]= run;
4798 level_tab[i+1]= level-64; 4794 level_tab[i+1]= level-64;
4799 } 4795 }
4800 } 4796 }
4801 4797
4802 if(s->out_format == FMT_H263){ 4798 if(s->out_format == FMT_H263){
4803 for(run=0; run<=i - left_limit; run++){ 4799 for(j=survivor_count-1; j>=0; j--){
4800 int run= i - survivor[j];
4804 int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda; 4801 int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4805 score += score_tab[i-run]; 4802 score += score_tab[i-run];
4806 if(score < last_score){ 4803 if(score < last_score){
4807 last_score= score; 4804 last_score= score;
4808 last_run= run; 4805 last_run= run;
4811 } 4808 }
4812 } 4809 }
4813 } 4810 }
4814 }else{ 4811 }else{
4815 distoration += esc_length*lambda; 4812 distoration += esc_length*lambda;
4816 for(run=0; run<=i - left_limit; run++){ 4813 for(j=survivor_count-1; j>=0; j--){
4814 int run= i - survivor[j];
4817 int score= distoration + score_tab[i-run]; 4815 int score= distoration + score_tab[i-run];
4818 4816
4819 if(score < best_score){ 4817 if(score < best_score){
4820 best_score= 4818 best_score= score;
4821 score_tab[i+1]= score;
4822 run_tab[i+1]= run; 4819 run_tab[i+1]= run;
4823 level_tab[i+1]= level-64; 4820 level_tab[i+1]= level-64;
4824 } 4821 }
4825 } 4822 }
4826 4823
4827 if(s->out_format == FMT_H263){ 4824 if(s->out_format == FMT_H263){
4828 for(run=0; run<=i - left_limit; run++){ 4825 for(j=survivor_count-1; j>=0; j--){
4826 int run= i - survivor[j];
4829 int score= distoration + score_tab[i-run]; 4827 int score= distoration + score_tab[i-run];
4830 if(score < last_score){ 4828 if(score < last_score){
4831 last_score= score; 4829 last_score= score;
4832 last_run= run; 4830 last_run= run;
4833 last_level= level-64; 4831 last_level= level-64;
4835 } 4833 }
4836 } 4834 }
4837 } 4835 }
4838 } 4836 }
4839 } 4837 }
4840
4841 if(score_tab[i+1] < score_limit)
4842 score_limit= score_tab[i+1];
4843 4838
4839 score_tab[i+1]= best_score;
4840
4844 //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level 4841 //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4845 while(score_tab[ left_limit ] > score_limit + lambda) left_limit++; 4842 if(last_non_zero <= 27){
4846 4843 for(; survivor_count; survivor_count--){
4847 if(patch_table){ 4844 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4848 // length[UNI_AC_ENC_INDEX(0, 63)]= 4845 break;
4849 // length[UNI_AC_ENC_INDEX(0, 65)]= 3; 4846 }
4850 } 4847 }else{
4848 for(; survivor_count; survivor_count--){
4849 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4850 break;
4851 }
4852 }
4853
4854 survivor[ survivor_count++ ]= i+1;
4851 } 4855 }
4852 4856
4853 if(s->out_format != FMT_H263){ 4857 if(s->out_format != FMT_H263){
4854 last_score= 256*256*256*120; 4858 last_score= 256*256*256*120;
4855 for(i= left_limit; i<=last_non_zero - start_i + 1; i++){ 4859 for(i= survivor[0]; i<=last_non_zero + 1; i++){
4856 int score= score_tab[i]; 4860 int score= score_tab[i];
4857 if(i) score += lambda*2; //FIXME exacter? 4861 if(i) score += lambda*2; //FIXME exacter?
4858 4862
4859 if(score < last_score){ 4863 if(score < last_score){
4860 last_score= score; 4864 last_score= score;
4866 } 4870 }
4867 4871
4868 s->coded_score[n] = last_score; 4872 s->coded_score[n] = last_score;
4869 4873
4870 dc= ABS(block[0]); 4874 dc= ABS(block[0]);
4871 last_non_zero= last_i - 1 + start_i; 4875 last_non_zero= last_i - 1;
4872 memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM)); 4876 memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4873 4877
4874 if(last_non_zero < start_i) 4878 if(last_non_zero < start_i)
4875 return last_non_zero; 4879 return last_non_zero;
4876 4880
4908 else return last_non_zero; 4912 else return last_non_zero;
4909 } 4913 }
4910 4914
4911 i= last_i; 4915 i= last_i;
4912 assert(last_level); 4916 assert(last_level);
4913 //FIXME use permutated scantable 4917
4914 block[ s->dsp.idct_permutation[ scantable[last_non_zero] ] ]= last_level; 4918 block[ perm_scantable[last_non_zero] ]= last_level;
4915 i -= last_run + 1; 4919 i -= last_run + 1;
4916 4920
4917 for(;i>0 ; i -= run_tab[i] + 1){ 4921 for(; i>start_i; i -= run_tab[i] + 1){
4918 const int j= s->dsp.idct_permutation[ scantable[i - 1 + start_i] ]; 4922 block[ perm_scantable[i-1] ]= level_tab[i];
4919
4920 block[j]= level_tab[i];
4921 assert(block[j]);
4922 } 4923 }
4923 4924
4924 return last_non_zero; 4925 return last_non_zero;
4925 } 4926 }
4926 4927