Mercurial > libavcodec.hg
comparison mpegvideo.c @ 1718:fdd1bc71da55 libavcodec
more trellis quant optimizations
author | michael |
---|---|
date | Fri, 02 Jan 2004 19:22:00 +0000 |
parents | da5d64a0fa02 |
children | 4e72fb256b25 |
comparison
equal
deleted
inserted
replaced
1717:6a7e68899d8a | 1718:fdd1bc71da55 |
---|---|
4640 static int dct_quantize_trellis_c(MpegEncContext *s, | 4640 static int dct_quantize_trellis_c(MpegEncContext *s, |
4641 DCTELEM *block, int n, | 4641 DCTELEM *block, int n, |
4642 int qscale, int *overflow){ | 4642 int qscale, int *overflow){ |
4643 const int *qmat; | 4643 const int *qmat; |
4644 const uint8_t *scantable= s->intra_scantable.scantable; | 4644 const uint8_t *scantable= s->intra_scantable.scantable; |
4645 const uint8_t *perm_scantable= s->intra_scantable.permutated; | |
4645 int max=0; | 4646 int max=0; |
4646 unsigned int threshold1, threshold2; | 4647 unsigned int threshold1, threshold2; |
4647 int bias=0; | 4648 int bias=0; |
4648 int run_tab[65]; | 4649 int run_tab[65]; |
4649 int level_tab[65]; | 4650 int level_tab[65]; |
4650 int score_tab[65]; | 4651 int score_tab[65]; |
4652 int survivor[65]; | |
4653 int survivor_count; | |
4651 int last_run=0; | 4654 int last_run=0; |
4652 int last_level=0; | 4655 int last_level=0; |
4653 int last_score= 0; | 4656 int last_score= 0; |
4654 int last_i= 0; | 4657 int last_i; |
4655 int coeff[2][64]; | 4658 int coeff[2][64]; |
4656 int coeff_count[64]; | 4659 int coeff_count[64]; |
4657 int qmul, qadd, start_i, last_non_zero, i, dc; | 4660 int qmul, qadd, start_i, last_non_zero, i, dc; |
4658 const int esc_length= s->ac_esc_length; | 4661 const int esc_length= s->ac_esc_length; |
4659 uint8_t * length; | 4662 uint8_t * length; |
4660 uint8_t * last_length; | 4663 uint8_t * last_length; |
4661 int score_limit=0; | |
4662 int left_limit= 0; | |
4663 const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6); | 4664 const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6); |
4664 const int patch_table= s->out_format == FMT_MPEG1 && !s->mb_intra; | |
4665 | 4665 |
4666 s->dsp.fdct (block); | 4666 s->dsp.fdct (block); |
4667 | 4667 |
4668 if(s->dct_error_sum) | 4668 if(s->dct_error_sum) |
4669 ff_denoise_dct(s, block); | 4669 ff_denoise_dct(s, block); |
4698 last_non_zero = -1; | 4698 last_non_zero = -1; |
4699 qmat = s->q_inter_matrix[qscale]; | 4699 qmat = s->q_inter_matrix[qscale]; |
4700 length = s->inter_ac_vlc_length; | 4700 length = s->inter_ac_vlc_length; |
4701 last_length= s->inter_ac_vlc_last_length; | 4701 last_length= s->inter_ac_vlc_last_length; |
4702 } | 4702 } |
4703 last_i= start_i; | |
4703 | 4704 |
4704 threshold1= (1<<QMAT_SHIFT) - bias - 1; | 4705 threshold1= (1<<QMAT_SHIFT) - bias - 1; |
4705 threshold2= (threshold1<<1); | 4706 threshold2= (threshold1<<1); |
4706 | 4707 |
4707 for(i=63; i>=start_i; i--) { | 4708 for(i=63; i>=start_i; i--) { |
4714 } | 4715 } |
4715 } | 4716 } |
4716 | 4717 |
4717 for(i=start_i; i<=last_non_zero; i++) { | 4718 for(i=start_i; i<=last_non_zero; i++) { |
4718 const int j = scantable[i]; | 4719 const int j = scantable[i]; |
4719 const int k= i-start_i; | |
4720 int level = block[j] * qmat[j]; | 4720 int level = block[j] * qmat[j]; |
4721 | 4721 |
4722 // if( bias+level >= (1<<(QMAT_SHIFT - 3)) | 4722 // if( bias+level >= (1<<(QMAT_SHIFT - 3)) |
4723 // || bias-level >= (1<<(QMAT_SHIFT - 3))){ | 4723 // || bias-level >= (1<<(QMAT_SHIFT - 3))){ |
4724 if(((unsigned)(level+threshold1))>threshold2){ | 4724 if(((unsigned)(level+threshold1))>threshold2){ |
4725 if(level>0){ | 4725 if(level>0){ |
4726 level= (bias + level)>>QMAT_SHIFT; | 4726 level= (bias + level)>>QMAT_SHIFT; |
4727 coeff[0][k]= level; | 4727 coeff[0][i]= level; |
4728 coeff[1][k]= level-1; | 4728 coeff[1][i]= level-1; |
4729 // coeff[2][k]= level-2; | 4729 // coeff[2][k]= level-2; |
4730 }else{ | 4730 }else{ |
4731 level= (bias - level)>>QMAT_SHIFT; | 4731 level= (bias - level)>>QMAT_SHIFT; |
4732 coeff[0][k]= -level; | 4732 coeff[0][i]= -level; |
4733 coeff[1][k]= -level+1; | 4733 coeff[1][i]= -level+1; |
4734 // coeff[2][k]= -level+2; | 4734 // coeff[2][k]= -level+2; |
4735 } | 4735 } |
4736 coeff_count[k]= FFMIN(level, 2); | 4736 coeff_count[i]= FFMIN(level, 2); |
4737 assert(coeff_count[k]); | 4737 assert(coeff_count[i]); |
4738 max |=level; | 4738 max |=level; |
4739 }else{ | 4739 }else{ |
4740 coeff[0][k]= (level>>31)|1; | 4740 coeff[0][i]= (level>>31)|1; |
4741 coeff_count[k]= 1; | 4741 coeff_count[i]= 1; |
4742 } | 4742 } |
4743 } | 4743 } |
4744 | 4744 |
4745 *overflow= s->max_qcoeff < max; //overflow might have happend | 4745 *overflow= s->max_qcoeff < max; //overflow might have happend |
4746 | 4746 |
4747 if(last_non_zero < start_i){ | 4747 if(last_non_zero < start_i){ |
4748 memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM)); | 4748 memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM)); |
4749 return last_non_zero; | 4749 return last_non_zero; |
4750 } | 4750 } |
4751 | 4751 |
4752 score_tab[0]= 0; | 4752 score_tab[start_i]= 0; |
4753 | 4753 survivor[0]= start_i; |
4754 if(patch_table){ | 4754 survivor_count= 1; |
4755 // length[UNI_AC_ENC_INDEX(0, 63)]= | 4755 |
4756 // length[UNI_AC_ENC_INDEX(0, 65)]= 2; | 4756 for(i=start_i; i<=last_non_zero; i++){ |
4757 } | 4757 int level_index, j; |
4758 | 4758 const int dct_coeff= ABS(block[ scantable[i] ]); |
4759 for(i=0; i<=last_non_zero - start_i; i++){ | |
4760 int level_index, run, j; | |
4761 const int dct_coeff= ABS(block[ scantable[i + start_i] ]); | |
4762 const int zero_distoration= dct_coeff*dct_coeff; | 4759 const int zero_distoration= dct_coeff*dct_coeff; |
4763 int best_score=256*256*256*120; | 4760 int best_score=256*256*256*120; |
4764 | |
4765 for(level_index=0; level_index < coeff_count[i]; level_index++){ | 4761 for(level_index=0; level_index < coeff_count[i]; level_index++){ |
4766 int distoration; | 4762 int distoration; |
4767 int level= coeff[level_index][i]; | 4763 int level= coeff[level_index][i]; |
4768 const int alevel= ABS(level); | 4764 const int alevel= ABS(level); |
4769 int unquant_coeff; | 4765 int unquant_coeff; |
4771 assert(level); | 4767 assert(level); |
4772 | 4768 |
4773 if(s->out_format == FMT_H263){ | 4769 if(s->out_format == FMT_H263){ |
4774 unquant_coeff= alevel*qmul + qadd; | 4770 unquant_coeff= alevel*qmul + qadd; |
4775 }else{ //MPEG1 | 4771 }else{ //MPEG1 |
4776 j= s->dsp.idct_permutation[ scantable[i + start_i] ]; //FIXME optimize | 4772 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize |
4777 if(s->mb_intra){ | 4773 if(s->mb_intra){ |
4778 unquant_coeff = (int)( alevel * qscale * s->intra_matrix[j]) >> 3; | 4774 unquant_coeff = (int)( alevel * qscale * s->intra_matrix[j]) >> 3; |
4779 unquant_coeff = (unquant_coeff - 1) | 1; | 4775 unquant_coeff = (unquant_coeff - 1) | 1; |
4780 }else{ | 4776 }else{ |
4781 unquant_coeff = ((( alevel << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4; | 4777 unquant_coeff = ((( alevel << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4; |
4785 } | 4781 } |
4786 | 4782 |
4787 distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration; | 4783 distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration; |
4788 level+=64; | 4784 level+=64; |
4789 if((level&(~127)) == 0){ | 4785 if((level&(~127)) == 0){ |
4790 for(run=0; run<=i - left_limit; run++){ | 4786 for(j=survivor_count-1; j>=0; j--){ |
4787 int run= i - survivor[j]; | |
4791 int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda; | 4788 int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda; |
4792 score += score_tab[i-run]; | 4789 score += score_tab[i-run]; |
4793 | 4790 |
4794 if(score < best_score){ | 4791 if(score < best_score){ |
4795 best_score= | 4792 best_score= score; |
4796 score_tab[i+1]= score; | |
4797 run_tab[i+1]= run; | 4793 run_tab[i+1]= run; |
4798 level_tab[i+1]= level-64; | 4794 level_tab[i+1]= level-64; |
4799 } | 4795 } |
4800 } | 4796 } |
4801 | 4797 |
4802 if(s->out_format == FMT_H263){ | 4798 if(s->out_format == FMT_H263){ |
4803 for(run=0; run<=i - left_limit; run++){ | 4799 for(j=survivor_count-1; j>=0; j--){ |
4800 int run= i - survivor[j]; | |
4804 int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda; | 4801 int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda; |
4805 score += score_tab[i-run]; | 4802 score += score_tab[i-run]; |
4806 if(score < last_score){ | 4803 if(score < last_score){ |
4807 last_score= score; | 4804 last_score= score; |
4808 last_run= run; | 4805 last_run= run; |
4811 } | 4808 } |
4812 } | 4809 } |
4813 } | 4810 } |
4814 }else{ | 4811 }else{ |
4815 distoration += esc_length*lambda; | 4812 distoration += esc_length*lambda; |
4816 for(run=0; run<=i - left_limit; run++){ | 4813 for(j=survivor_count-1; j>=0; j--){ |
4814 int run= i - survivor[j]; | |
4817 int score= distoration + score_tab[i-run]; | 4815 int score= distoration + score_tab[i-run]; |
4818 | 4816 |
4819 if(score < best_score){ | 4817 if(score < best_score){ |
4820 best_score= | 4818 best_score= score; |
4821 score_tab[i+1]= score; | |
4822 run_tab[i+1]= run; | 4819 run_tab[i+1]= run; |
4823 level_tab[i+1]= level-64; | 4820 level_tab[i+1]= level-64; |
4824 } | 4821 } |
4825 } | 4822 } |
4826 | 4823 |
4827 if(s->out_format == FMT_H263){ | 4824 if(s->out_format == FMT_H263){ |
4828 for(run=0; run<=i - left_limit; run++){ | 4825 for(j=survivor_count-1; j>=0; j--){ |
4826 int run= i - survivor[j]; | |
4829 int score= distoration + score_tab[i-run]; | 4827 int score= distoration + score_tab[i-run]; |
4830 if(score < last_score){ | 4828 if(score < last_score){ |
4831 last_score= score; | 4829 last_score= score; |
4832 last_run= run; | 4830 last_run= run; |
4833 last_level= level-64; | 4831 last_level= level-64; |
4835 } | 4833 } |
4836 } | 4834 } |
4837 } | 4835 } |
4838 } | 4836 } |
4839 } | 4837 } |
4840 | |
4841 if(score_tab[i+1] < score_limit) | |
4842 score_limit= score_tab[i+1]; | |
4843 | 4838 |
4839 score_tab[i+1]= best_score; | |
4840 | |
4844 //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level | 4841 //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level |
4845 while(score_tab[ left_limit ] > score_limit + lambda) left_limit++; | 4842 if(last_non_zero <= 27){ |
4846 | 4843 for(; survivor_count; survivor_count--){ |
4847 if(patch_table){ | 4844 if(score_tab[ survivor[survivor_count-1] ] <= best_score) |
4848 // length[UNI_AC_ENC_INDEX(0, 63)]= | 4845 break; |
4849 // length[UNI_AC_ENC_INDEX(0, 65)]= 3; | 4846 } |
4850 } | 4847 }else{ |
4848 for(; survivor_count; survivor_count--){ | |
4849 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda) | |
4850 break; | |
4851 } | |
4852 } | |
4853 | |
4854 survivor[ survivor_count++ ]= i+1; | |
4851 } | 4855 } |
4852 | 4856 |
4853 if(s->out_format != FMT_H263){ | 4857 if(s->out_format != FMT_H263){ |
4854 last_score= 256*256*256*120; | 4858 last_score= 256*256*256*120; |
4855 for(i= left_limit; i<=last_non_zero - start_i + 1; i++){ | 4859 for(i= survivor[0]; i<=last_non_zero + 1; i++){ |
4856 int score= score_tab[i]; | 4860 int score= score_tab[i]; |
4857 if(i) score += lambda*2; //FIXME exacter? | 4861 if(i) score += lambda*2; //FIXME exacter? |
4858 | 4862 |
4859 if(score < last_score){ | 4863 if(score < last_score){ |
4860 last_score= score; | 4864 last_score= score; |
4866 } | 4870 } |
4867 | 4871 |
4868 s->coded_score[n] = last_score; | 4872 s->coded_score[n] = last_score; |
4869 | 4873 |
4870 dc= ABS(block[0]); | 4874 dc= ABS(block[0]); |
4871 last_non_zero= last_i - 1 + start_i; | 4875 last_non_zero= last_i - 1; |
4872 memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM)); | 4876 memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM)); |
4873 | 4877 |
4874 if(last_non_zero < start_i) | 4878 if(last_non_zero < start_i) |
4875 return last_non_zero; | 4879 return last_non_zero; |
4876 | 4880 |
4908 else return last_non_zero; | 4912 else return last_non_zero; |
4909 } | 4913 } |
4910 | 4914 |
4911 i= last_i; | 4915 i= last_i; |
4912 assert(last_level); | 4916 assert(last_level); |
4913 //FIXME use permutated scantable | 4917 |
4914 block[ s->dsp.idct_permutation[ scantable[last_non_zero] ] ]= last_level; | 4918 block[ perm_scantable[last_non_zero] ]= last_level; |
4915 i -= last_run + 1; | 4919 i -= last_run + 1; |
4916 | 4920 |
4917 for(;i>0 ; i -= run_tab[i] + 1){ | 4921 for(; i>start_i; i -= run_tab[i] + 1){ |
4918 const int j= s->dsp.idct_permutation[ scantable[i - 1 + start_i] ]; | 4922 block[ perm_scantable[i-1] ]= level_tab[i]; |
4919 | |
4920 block[j]= level_tab[i]; | |
4921 assert(block[j]); | |
4922 } | 4923 } |
4923 | 4924 |
4924 return last_non_zero; | 4925 return last_non_zero; |
4925 } | 4926 } |
4926 | 4927 |