comparison libpostproc/postprocess.c @ 791:4f61ca80b6c1 libavcodec

better deblocking filter
author michael
date Tue, 29 Oct 2002 18:35:15 +0000
parents 4914252c963a
children 2d1283d511b7
comparison
equal deleted inserted replaced
790:b9156f8e6747 791:4f61ca80b6c1
57 (the if/else stuff per block is slowing things down) 57 (the if/else stuff per block is slowing things down)
58 compare the quality & speed of all filters 58 compare the quality & speed of all filters
59 split this huge file 59 split this huge file
60 optimize c versions 60 optimize c versions
61 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks 61 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
62 put fastmemcpy back
63 ... 62 ...
64 */ 63 */
65 64
66 //Changelog: use the CVS log 65 //Changelog: use the CVS log
67 66
147 uint8_t *tempSrcBlock; 146 uint8_t *tempSrcBlock;
148 uint8_t *deintTemp; 147 uint8_t *deintTemp;
149 148
150 uint64_t __attribute__((aligned(8))) pQPb; 149 uint64_t __attribute__((aligned(8))) pQPb;
151 uint64_t __attribute__((aligned(8))) pQPb2; 150 uint64_t __attribute__((aligned(8))) pQPb2;
151
152 uint64_t __attribute__((aligned(8))) mmxDcOffset[32];
153 uint64_t __attribute__((aligned(8))) mmxDcThreshold[32];
152 154
153 uint64_t __attribute__((aligned(8))) mmxDcOffset; 155 QP_STORE_T *nonBQPTable;
154 uint64_t __attribute__((aligned(8))) mmxDcThreshold; 156
155
156 int QP; 157 int QP;
157 int dcOffset; 158 int nonBQP;
158 int dcThreshold;
159 159
160 int frameNum; 160 int frameNum;
161 161
162 PPMode ppMode; 162 PPMode ppMode;
163 } PPContext; 163 } PPContext;
245 */ 245 */
246 static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) 246 static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
247 { 247 {
248 int numEq= 0; 248 int numEq= 0;
249 int y; 249 int y;
250 const int dcOffset= c->dcOffset; 250 const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
251 const int dcThreshold= c->dcThreshold; 251 const int dcThreshold= dcOffset*2 + 1;
252 for(y=0; y<BLOCK_SIZE; y++) 252 for(y=0; y<BLOCK_SIZE; y++)
253 { 253 {
254 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; 254 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
255 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; 255 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
256 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; 256 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
267 * Check if the middle 8x8 Block in the given 8x16 block is flat 267 * Check if the middle 8x8 Block in the given 8x16 block is flat
268 */ 268 */
269 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ 269 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
270 int numEq= 0; 270 int numEq= 0;
271 int y; 271 int y;
272 const int dcOffset= c->dcOffset; 272 const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
273 const int dcThreshold= c->dcThreshold; 273 const int dcThreshold= dcOffset*2 + 1;
274 src+= stride*4; // src points to begin of the 8x8 Block 274 src+= stride*4; // src points to begin of the 8x8 Block
275 for(y=0; y<BLOCK_SIZE-1; y++) 275 for(y=0; y<BLOCK_SIZE-1; y++)
276 { 276 {
277 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; 277 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
278 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; 278 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
723 } 723 }
724 } 724 }
725 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK) 725 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
726 { 726 {
727 int o; 727 int o;
728 ppMode.maxDcDiff=1; 728 ppMode.baseDcDiff=256/4;
729 // hFlatnessThreshold= 40; 729 // hFlatnessThreshold= 40;
730 // vFlatnessThreshold= 40; 730 // vFlatnessThreshold= 40;
731 731
732 for(o=0; options[o]!=NULL && o<2; o++) 732 for(o=0; options[o]!=NULL && o<2; o++)
733 { 733 {
734 char *tail; 734 char *tail;
735 int val= strtol(options[o], &tail, 0); 735 int val= strtol(options[o], &tail, 0);
736 if(tail==options[o]) break; 736 if(tail==options[o]) break;
737 737
738 numOfUnknownOptions--; 738 numOfUnknownOptions--;
739 if(o==0) ppMode.maxDcDiff= val; 739 if(o==0) ppMode.baseDcDiff= val;
740 else ppMode.flatnessThreshold= val; 740 else ppMode.flatnessThreshold= val;
741 } 741 }
742 } 742 }
743 else if(filters[i].mask == FORCE_QUANT) 743 else if(filters[i].mask == FORCE_QUANT)
744 { 744 {
766 } 766 }
767 767
768 void *getPPContext(int width, int height){ 768 void *getPPContext(int width, int height){
769 PPContext *c= memalign(32, sizeof(PPContext)); 769 PPContext *c= memalign(32, sizeof(PPContext));
770 int i; 770 int i;
771 int mbWidth = (width+15)>>4;
772 int mbHeight= (height+15)>>4;
771 773
772 c->tempBlocks= (uint8_t*)memalign(8, 2*16*8); 774 c->tempBlocks= (uint8_t*)memalign(8, 2*16*8);
773 c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t)); 775 c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t));
774 for(i=0; i<256; i++) 776 for(i=0; i<256; i++)
775 c->yHistogram[i]= width*height/64*15/256; 777 c->yHistogram[i]= width*height/64*15/256;
787 c->tempDst= (uint8_t*)memalign(8, 1024*24); 789 c->tempDst= (uint8_t*)memalign(8, 1024*24);
788 c->tempSrc= (uint8_t*)memalign(8, 1024*24); 790 c->tempSrc= (uint8_t*)memalign(8, 1024*24);
789 c->tempDstBlock= (uint8_t*)memalign(8, 1024*24); 791 c->tempDstBlock= (uint8_t*)memalign(8, 1024*24);
790 c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24); 792 c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
791 c->deintTemp= (uint8_t*)memalign(8, width+16); 793 c->deintTemp= (uint8_t*)memalign(8, width+16);
794 c->nonBQPTable= (QP_STORE_T*)memalign(8, mbWidth*mbHeight*sizeof(QP_STORE_T));
795 memset(c->nonBQPTable, 0, mbWidth*mbHeight*sizeof(QP_STORE_T));
792 796
793 c->frameNum=-1; 797 c->frameNum=-1;
794 798
795 return c; 799 return c;
796 } 800 }
807 free(c->tempDst); 811 free(c->tempDst);
808 free(c->tempSrc); 812 free(c->tempSrc);
809 free(c->tempDstBlock); 813 free(c->tempDstBlock);
810 free(c->tempSrcBlock); 814 free(c->tempSrcBlock);
811 free(c->deintTemp); 815 free(c->deintTemp);
816 free(c->nonBQPTable);
812 817
813 free(c); 818 free(c);
814 } 819 }
815 820
816 //FIXME move this shit away from here 821 //FIXME move this shit away from here
839 } 844 }
840 845
841 846
842 void postprocess(uint8_t * src[3], int srcStride[3], 847 void postprocess(uint8_t * src[3], int srcStride[3],
843 uint8_t * dst[3], int dstStride[3], 848 uint8_t * dst[3], int dstStride[3],
844 int horizontalSize, int verticalSize, 849 int width, int height,
845 QP_STORE_T *QP_store, int QPStride, 850 QP_STORE_T *QP_store, int QPStride,
846 PPMode *mode, void *c) 851 PPMode *mode, void *vc, int pict_type)
847 { 852 {
848 853 int mbWidth = (width+15)>>4;
854 int mbHeight= (height+15)>>4;
849 QP_STORE_T quantArray[2048/8]; 855 QP_STORE_T quantArray[2048/8];
856 PPContext *c = (PPContext*)vc;
850 857
851 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 858 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
852 { 859 {
853 int i; 860 int i;
854 QP_store= quantArray; 861 QP_store= quantArray;
856 if(mode->lumMode & FORCE_QUANT) 863 if(mode->lumMode & FORCE_QUANT)
857 for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant; 864 for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant;
858 else 865 else
859 for(i=0; i<2048/8; i++) quantArray[i]= 1; 866 for(i=0; i<2048/8; i++) quantArray[i]= 1;
860 } 867 }
868 if(0){
869 int x,y;
870 for(y=0; y<mbHeight; y++){
871 for(x=0; x<mbWidth; x++){
872 printf("%2d ", QP_store[x + y*QPStride]);
873 }
874 printf("\n");
875 }
876 printf("\n");
877 }
878 //printf("pict_type:%d\n", pict_type);
879 if(pict_type!=3)
880 {
881 int x,y;
882 for(y=0; y<mbHeight; y++){
883 for(x=0; x<mbWidth; x++){
884 int qscale= QP_store[x + y*QPStride];
885 if(qscale&~31)
886 qscale=31;
887 c->nonBQPTable[y*mbWidth + x]= qscale;
888 }
889 }
890 }
861 891
862 if(firstTime2 && verbose) 892 if(firstTime2 && verbose)
863 { 893 {
864 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode); 894 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
865 firstTime2=0; 895 firstTime2=0;
866 } 896 }
867 897
868 postProcess(src[0], srcStride[0], dst[0], dstStride[0], 898 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
869 horizontalSize, verticalSize, QP_store, QPStride, 0, mode, c); 899 width, height, QP_store, QPStride, 0, mode, c);
870 900
871 horizontalSize = (horizontalSize+1)>> 1; 901 width = (width +1)>>1;
872 verticalSize = (verticalSize+1)>>1; 902 height = (height+1)>>1;
873 903
874 if(mode->chromMode) 904 if(mode->chromMode)
875 { 905 {
876 postProcess(src[1], srcStride[1], dst[1], dstStride[1], 906 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
877 horizontalSize, verticalSize, QP_store, QPStride, 1, mode, c); 907 width, height, QP_store, QPStride, 1, mode, c);
878 postProcess(src[2], srcStride[2], dst[2], dstStride[2], 908 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
879 horizontalSize, verticalSize, QP_store, QPStride, 2, mode, c); 909 width, height, QP_store, QPStride, 2, mode, c);
880 } 910 }
881 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) 911 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
882 { 912 {
883 memcpy(dst[1], src[1], srcStride[1]*verticalSize); 913 memcpy(dst[1], src[1], srcStride[1]*height);
884 memcpy(dst[2], src[2], srcStride[2]*verticalSize); 914 memcpy(dst[2], src[2], srcStride[2]*height);
885 } 915 }
886 else 916 else
887 { 917 {
888 int y; 918 int y;
889 for(y=0; y<verticalSize; y++) 919 for(y=0; y<height; y++)
890 { 920 {
891 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), horizontalSize); 921 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
892 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), horizontalSize); 922 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
893 } 923 }
894 } 924 }
895 } 925 }
896 926