Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 791:4f61ca80b6c1 libavcodec
better deblocking filter
author | michael |
---|---|
date | Tue, 29 Oct 2002 18:35:15 +0000 |
parents | 4914252c963a |
children | 2d1283d511b7 |
comparison
equal
deleted
inserted
replaced
790:b9156f8e6747 | 791:4f61ca80b6c1 |
---|---|
57 (the if/else stuff per block is slowing things down) | 57 (the if/else stuff per block is slowing things down) |
58 compare the quality & speed of all filters | 58 compare the quality & speed of all filters |
59 split this huge file | 59 split this huge file |
60 optimize c versions | 60 optimize c versions |
61 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks | 61 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks |
62 put fastmemcpy back | |
63 ... | 62 ... |
64 */ | 63 */ |
65 | 64 |
66 //Changelog: use the CVS log | 65 //Changelog: use the CVS log |
67 | 66 |
147 uint8_t *tempSrcBlock; | 146 uint8_t *tempSrcBlock; |
148 uint8_t *deintTemp; | 147 uint8_t *deintTemp; |
149 | 148 |
150 uint64_t __attribute__((aligned(8))) pQPb; | 149 uint64_t __attribute__((aligned(8))) pQPb; |
151 uint64_t __attribute__((aligned(8))) pQPb2; | 150 uint64_t __attribute__((aligned(8))) pQPb2; |
151 | |
152 uint64_t __attribute__((aligned(8))) mmxDcOffset[32]; | |
153 uint64_t __attribute__((aligned(8))) mmxDcThreshold[32]; | |
152 | 154 |
153 uint64_t __attribute__((aligned(8))) mmxDcOffset; | 155 QP_STORE_T *nonBQPTable; |
154 uint64_t __attribute__((aligned(8))) mmxDcThreshold; | 156 |
155 | |
156 int QP; | 157 int QP; |
157 int dcOffset; | 158 int nonBQP; |
158 int dcThreshold; | |
159 | 159 |
160 int frameNum; | 160 int frameNum; |
161 | 161 |
162 PPMode ppMode; | 162 PPMode ppMode; |
163 } PPContext; | 163 } PPContext; |
245 */ | 245 */ |
246 static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) | 246 static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) |
247 { | 247 { |
248 int numEq= 0; | 248 int numEq= 0; |
249 int y; | 249 int y; |
250 const int dcOffset= c->dcOffset; | 250 const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1; |
251 const int dcThreshold= c->dcThreshold; | 251 const int dcThreshold= dcOffset*2 + 1; |
252 for(y=0; y<BLOCK_SIZE; y++) | 252 for(y=0; y<BLOCK_SIZE; y++) |
253 { | 253 { |
254 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; | 254 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; |
255 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; | 255 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; |
256 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; | 256 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; |
267 * Check if the middle 8x8 Block in the given 8x16 block is flat | 267 * Check if the middle 8x8 Block in the given 8x16 block is flat |
268 */ | 268 */ |
269 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ | 269 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ |
270 int numEq= 0; | 270 int numEq= 0; |
271 int y; | 271 int y; |
272 const int dcOffset= c->dcOffset; | 272 const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1; |
273 const int dcThreshold= c->dcThreshold; | 273 const int dcThreshold= dcOffset*2 + 1; |
274 src+= stride*4; // src points to begin of the 8x8 Block | 274 src+= stride*4; // src points to begin of the 8x8 Block |
275 for(y=0; y<BLOCK_SIZE-1; y++) | 275 for(y=0; y<BLOCK_SIZE-1; y++) |
276 { | 276 { |
277 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; | 277 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; |
278 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; | 278 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; |
723 } | 723 } |
724 } | 724 } |
725 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK) | 725 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK) |
726 { | 726 { |
727 int o; | 727 int o; |
728 ppMode.maxDcDiff=1; | 728 ppMode.baseDcDiff=256/4; |
729 // hFlatnessThreshold= 40; | 729 // hFlatnessThreshold= 40; |
730 // vFlatnessThreshold= 40; | 730 // vFlatnessThreshold= 40; |
731 | 731 |
732 for(o=0; options[o]!=NULL && o<2; o++) | 732 for(o=0; options[o]!=NULL && o<2; o++) |
733 { | 733 { |
734 char *tail; | 734 char *tail; |
735 int val= strtol(options[o], &tail, 0); | 735 int val= strtol(options[o], &tail, 0); |
736 if(tail==options[o]) break; | 736 if(tail==options[o]) break; |
737 | 737 |
738 numOfUnknownOptions--; | 738 numOfUnknownOptions--; |
739 if(o==0) ppMode.maxDcDiff= val; | 739 if(o==0) ppMode.baseDcDiff= val; |
740 else ppMode.flatnessThreshold= val; | 740 else ppMode.flatnessThreshold= val; |
741 } | 741 } |
742 } | 742 } |
743 else if(filters[i].mask == FORCE_QUANT) | 743 else if(filters[i].mask == FORCE_QUANT) |
744 { | 744 { |
766 } | 766 } |
767 | 767 |
768 void *getPPContext(int width, int height){ | 768 void *getPPContext(int width, int height){ |
769 PPContext *c= memalign(32, sizeof(PPContext)); | 769 PPContext *c= memalign(32, sizeof(PPContext)); |
770 int i; | 770 int i; |
771 int mbWidth = (width+15)>>4; | |
772 int mbHeight= (height+15)>>4; | |
771 | 773 |
772 c->tempBlocks= (uint8_t*)memalign(8, 2*16*8); | 774 c->tempBlocks= (uint8_t*)memalign(8, 2*16*8); |
773 c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t)); | 775 c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t)); |
774 for(i=0; i<256; i++) | 776 for(i=0; i<256; i++) |
775 c->yHistogram[i]= width*height/64*15/256; | 777 c->yHistogram[i]= width*height/64*15/256; |
787 c->tempDst= (uint8_t*)memalign(8, 1024*24); | 789 c->tempDst= (uint8_t*)memalign(8, 1024*24); |
788 c->tempSrc= (uint8_t*)memalign(8, 1024*24); | 790 c->tempSrc= (uint8_t*)memalign(8, 1024*24); |
789 c->tempDstBlock= (uint8_t*)memalign(8, 1024*24); | 791 c->tempDstBlock= (uint8_t*)memalign(8, 1024*24); |
790 c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24); | 792 c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24); |
791 c->deintTemp= (uint8_t*)memalign(8, width+16); | 793 c->deintTemp= (uint8_t*)memalign(8, width+16); |
794 c->nonBQPTable= (QP_STORE_T*)memalign(8, mbWidth*mbHeight*sizeof(QP_STORE_T)); | |
795 memset(c->nonBQPTable, 0, mbWidth*mbHeight*sizeof(QP_STORE_T)); | |
792 | 796 |
793 c->frameNum=-1; | 797 c->frameNum=-1; |
794 | 798 |
795 return c; | 799 return c; |
796 } | 800 } |
807 free(c->tempDst); | 811 free(c->tempDst); |
808 free(c->tempSrc); | 812 free(c->tempSrc); |
809 free(c->tempDstBlock); | 813 free(c->tempDstBlock); |
810 free(c->tempSrcBlock); | 814 free(c->tempSrcBlock); |
811 free(c->deintTemp); | 815 free(c->deintTemp); |
816 free(c->nonBQPTable); | |
812 | 817 |
813 free(c); | 818 free(c); |
814 } | 819 } |
815 | 820 |
816 //FIXME move this shit away from here | 821 //FIXME move this shit away from here |
839 } | 844 } |
840 | 845 |
841 | 846 |
842 void postprocess(uint8_t * src[3], int srcStride[3], | 847 void postprocess(uint8_t * src[3], int srcStride[3], |
843 uint8_t * dst[3], int dstStride[3], | 848 uint8_t * dst[3], int dstStride[3], |
844 int horizontalSize, int verticalSize, | 849 int width, int height, |
845 QP_STORE_T *QP_store, int QPStride, | 850 QP_STORE_T *QP_store, int QPStride, |
846 PPMode *mode, void *c) | 851 PPMode *mode, void *vc, int pict_type) |
847 { | 852 { |
848 | 853 int mbWidth = (width+15)>>4; |
854 int mbHeight= (height+15)>>4; | |
849 QP_STORE_T quantArray[2048/8]; | 855 QP_STORE_T quantArray[2048/8]; |
856 PPContext *c = (PPContext*)vc; | |
850 | 857 |
851 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) | 858 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) |
852 { | 859 { |
853 int i; | 860 int i; |
854 QP_store= quantArray; | 861 QP_store= quantArray; |
856 if(mode->lumMode & FORCE_QUANT) | 863 if(mode->lumMode & FORCE_QUANT) |
857 for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant; | 864 for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant; |
858 else | 865 else |
859 for(i=0; i<2048/8; i++) quantArray[i]= 1; | 866 for(i=0; i<2048/8; i++) quantArray[i]= 1; |
860 } | 867 } |
868 if(0){ | |
869 int x,y; | |
870 for(y=0; y<mbHeight; y++){ | |
871 for(x=0; x<mbWidth; x++){ | |
872 printf("%2d ", QP_store[x + y*QPStride]); | |
873 } | |
874 printf("\n"); | |
875 } | |
876 printf("\n"); | |
877 } | |
878 //printf("pict_type:%d\n", pict_type); | |
879 if(pict_type!=3) | |
880 { | |
881 int x,y; | |
882 for(y=0; y<mbHeight; y++){ | |
883 for(x=0; x<mbWidth; x++){ | |
884 int qscale= QP_store[x + y*QPStride]; | |
885 if(qscale&~31) | |
886 qscale=31; | |
887 c->nonBQPTable[y*mbWidth + x]= qscale; | |
888 } | |
889 } | |
890 } | |
861 | 891 |
862 if(firstTime2 && verbose) | 892 if(firstTime2 && verbose) |
863 { | 893 { |
864 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode); | 894 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode); |
865 firstTime2=0; | 895 firstTime2=0; |
866 } | 896 } |
867 | 897 |
868 postProcess(src[0], srcStride[0], dst[0], dstStride[0], | 898 postProcess(src[0], srcStride[0], dst[0], dstStride[0], |
869 horizontalSize, verticalSize, QP_store, QPStride, 0, mode, c); | 899 width, height, QP_store, QPStride, 0, mode, c); |
870 | 900 |
871 horizontalSize = (horizontalSize+1)>> 1; | 901 width = (width +1)>>1; |
872 verticalSize = (verticalSize+1)>>1; | 902 height = (height+1)>>1; |
873 | 903 |
874 if(mode->chromMode) | 904 if(mode->chromMode) |
875 { | 905 { |
876 postProcess(src[1], srcStride[1], dst[1], dstStride[1], | 906 postProcess(src[1], srcStride[1], dst[1], dstStride[1], |
877 horizontalSize, verticalSize, QP_store, QPStride, 1, mode, c); | 907 width, height, QP_store, QPStride, 1, mode, c); |
878 postProcess(src[2], srcStride[2], dst[2], dstStride[2], | 908 postProcess(src[2], srcStride[2], dst[2], dstStride[2], |
879 horizontalSize, verticalSize, QP_store, QPStride, 2, mode, c); | 909 width, height, QP_store, QPStride, 2, mode, c); |
880 } | 910 } |
881 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) | 911 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) |
882 { | 912 { |
883 memcpy(dst[1], src[1], srcStride[1]*verticalSize); | 913 memcpy(dst[1], src[1], srcStride[1]*height); |
884 memcpy(dst[2], src[2], srcStride[2]*verticalSize); | 914 memcpy(dst[2], src[2], srcStride[2]*height); |
885 } | 915 } |
886 else | 916 else |
887 { | 917 { |
888 int y; | 918 int y; |
889 for(y=0; y<verticalSize; y++) | 919 for(y=0; y<height; y++) |
890 { | 920 { |
891 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), horizontalSize); | 921 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); |
892 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), horizontalSize); | 922 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); |
893 } | 923 } |
894 } | 924 } |
895 } | 925 } |
896 | 926 |