Mercurial > mplayer.hg
comparison postproc/postprocess.c @ 2285:4840e356d0d3
fixed a bug in the tmp buffer
fixed the color range for yuv
fixed the width %8!=0 bug (another 1% speed loss)
author | michael |
---|---|
date | Fri, 19 Oct 2001 13:41:38 +0000 |
parents | f7c1485b33be |
children | 58b3f908201d |
comparison
equal
deleted
inserted
replaced
2284:0c5fea3a0b91 | 2285:4840e356d0d3 |
---|---|
120 static uint64_t temp2=0; | 120 static uint64_t temp2=0; |
121 static uint64_t temp3=0; | 121 static uint64_t temp3=0; |
122 static uint64_t temp4=0; | 122 static uint64_t temp4=0; |
123 static uint64_t temp5=0; | 123 static uint64_t temp5=0; |
124 static uint64_t pQPb=0; | 124 static uint64_t pQPb=0; |
125 static uint8_t tempBlock[16*16]; | 125 static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data |
126 | 126 |
127 int hFlatnessThreshold= 56 - 16; | 127 int hFlatnessThreshold= 56 - 16; |
128 int vFlatnessThreshold= 56 - 16; | 128 int vFlatnessThreshold= 56 - 16; |
129 | 129 |
130 //amount of "black" u r willing to loose to get a brightness corrected picture | 130 //amount of "black" u r willing to loose to get a brightness corrected picture |
131 double maxClippedThreshold= 0.01; | 131 double maxClippedThreshold= 0.01; |
132 | 132 |
133 int maxAllowedY=255; | 133 int maxAllowedY=255; |
134 //FIXME can never make a movieŽs black brighter (anyone needs that?) | 134 //FIXME can never make a movieŽs black brighter (anyone needs that?) |
135 int minAllowedY=0; | 135 int minAllowedY=16; |
136 | 136 |
137 #ifdef TIMING | 137 #ifdef TIMING |
138 static inline long long rdtsc() | 138 static inline long long rdtsc() |
139 { | 139 { |
140 long long l; | 140 long long l; |
2396 | 2396 |
2397 /* Temporary buffers for handling the last row(s) */ | 2397 /* Temporary buffers for handling the last row(s) */ |
2398 static uint8_t *tempDst= NULL; | 2398 static uint8_t *tempDst= NULL; |
2399 static uint8_t *tempSrc= NULL; | 2399 static uint8_t *tempSrc= NULL; |
2400 | 2400 |
2401 /* Temporary buffers for handling the last block */ | |
2402 static uint8_t *tempDstBlock= NULL; | |
2403 static uint8_t *tempSrcBlock= NULL; | |
2404 | |
2405 uint8_t *dstBlockPtrBackup; | |
2406 uint8_t *srcBlockPtrBackup; | |
2407 | |
2401 #ifdef TIMING | 2408 #ifdef TIMING |
2402 long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0; | 2409 long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0; |
2403 sumTime= rdtsc(); | 2410 sumTime= rdtsc(); |
2404 #endif | 2411 #endif |
2405 | 2412 |
2406 if(tempDst==NULL) | 2413 if(tempDst==NULL) |
2407 { | 2414 { |
2408 tempDst= (uint8_t*)memalign(8, 1024*24); | 2415 tempDst= (uint8_t*)memalign(8, 1024*24); |
2409 tempSrc= (uint8_t*)memalign(8, 1024*24); | 2416 tempSrc= (uint8_t*)memalign(8, 1024*24); |
2417 tempDstBlock= (uint8_t*)memalign(8, 1024*24); | |
2418 tempSrcBlock= (uint8_t*)memalign(8, 1024*24); | |
2410 } | 2419 } |
2411 | 2420 |
2412 if(!yHistogram) | 2421 if(!yHistogram) |
2413 { | 2422 { |
2414 int i; | 2423 int i; |
2415 yHistogram= (uint64_t*)malloc(8*256); | 2424 yHistogram= (uint64_t*)malloc(8*256); |
2416 for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256; | 2425 for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256; |
2426 | |
2427 if(mode & FULL_Y_RANGE) | |
2428 { | |
2429 maxAllowedY=255; | |
2430 minAllowedY=0; | |
2431 } | |
2417 } | 2432 } |
2418 | 2433 |
2419 if(!isColor) | 2434 if(!isColor) |
2420 { | 2435 { |
2421 uint64_t sum= 0; | 2436 uint64_t sum= 0; |
2503 memcpy(tempDst, dstBlock, dstStride*MIN(height-y, 5) ); | 2518 memcpy(tempDst, dstBlock, dstStride*MIN(height-y, 5) ); |
2504 dstBlock= tempDst; | 2519 dstBlock= tempDst; |
2505 srcBlock= tempSrc; | 2520 srcBlock= tempSrc; |
2506 } | 2521 } |
2507 | 2522 |
2523 // From this point on it is guranteed that we can read and write 16 lines downward | |
2508 // finish 1 block before the next otherwise weŽll might have a problem | 2524 // finish 1 block before the next otherwise weŽll might have a problem |
2509 // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing | 2525 // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing |
2510 for(x=0; x<width; x+=BLOCK_SIZE) | 2526 for(x=0; x<width; x+=BLOCK_SIZE) |
2511 { | 2527 { |
2512 const int stride= dstStride; | 2528 const int stride= dstStride; |
2542 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32); | 2558 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32); |
2543 */ | 2559 */ |
2544 #endif | 2560 #endif |
2545 | 2561 |
2546 if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++; | 2562 if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++; |
2563 | |
2564 //can we mess with a 8x16 block, if not use a temp buffer, yes again | |
2565 if(x+7 >= width) | |
2566 { | |
2567 int i; | |
2568 dstBlockPtrBackup= dstBlock; | |
2569 srcBlockPtrBackup= srcBlock; | |
2570 | |
2571 for(i=0;i<BLOCK_SIZE*2; i++) | |
2572 { | |
2573 memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x); | |
2574 memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x); | |
2575 } | |
2576 | |
2577 dstBlock= tempDstBlock; | |
2578 srcBlock= tempSrcBlock; | |
2579 } | |
2547 | 2580 |
2548 blockCopy(dstBlock + dstStride*5, dstStride, | 2581 blockCopy(dstBlock + dstStride*5, dstStride, |
2549 srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX); | 2582 srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX); |
2550 | 2583 |
2551 if(mode & LINEAR_IPOL_DEINT_FILTER) | 2584 if(mode & LINEAR_IPOL_DEINT_FILTER) |
2591 T0=T1; | 2624 T0=T1; |
2592 #endif | 2625 #endif |
2593 } | 2626 } |
2594 | 2627 |
2595 /* check if we have a previous block to deblock it with dstBlock */ | 2628 /* check if we have a previous block to deblock it with dstBlock */ |
2596 if(x - 8 >= 0 && x<width) | 2629 if(x - 8 >= 0) |
2597 { | 2630 { |
2598 #ifdef MORE_TIMING | 2631 #ifdef MORE_TIMING |
2599 T0= rdtsc(); | 2632 T0= rdtsc(); |
2600 #endif | 2633 #endif |
2601 if(mode & H_DEBLOCK) | 2634 if(mode & H_DEBLOCK) |
2622 } | 2655 } |
2623 else if(y!=0) | 2656 else if(y!=0) |
2624 dering(dstBlock - stride*9 + width-9, stride, QP); | 2657 dering(dstBlock - stride*9 + width-9, stride, QP); |
2625 //FIXME dering filter will not be applied to last block (bottom right) | 2658 //FIXME dering filter will not be applied to last block (bottom right) |
2626 | 2659 |
2660 /* did we use a tmp-block buffer */ | |
2661 if(x+7 >= width) | |
2662 { | |
2663 int i; | |
2664 dstBlock= dstBlockPtrBackup; | |
2665 srcBlock= srcBlockPtrBackup; | |
2666 | |
2667 for(i=0;i<BLOCK_SIZE*2; i++) | |
2668 { | |
2669 memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x); | |
2670 } | |
2671 } | |
2672 | |
2627 dstBlock+=8; | 2673 dstBlock+=8; |
2628 srcBlock+=8; | 2674 srcBlock+=8; |
2629 } | 2675 } |
2630 | 2676 |
2631 /* did we use a tmp buffer */ | 2677 /* did we use a tmp buffer */ |
2632 if(y+15 > height) | 2678 if(y+15 >= height) |
2633 { | 2679 { |
2634 uint8_t *dstBlock= &(dst[y*dstStride]); | 2680 uint8_t *dstBlock= &(dst[y*dstStride]); |
2635 memcpy(dstBlock, tempDst, dstStride*(height-y) ); | 2681 memcpy(dstBlock, tempDst, dstStride*(height-y) ); |
2636 } | 2682 } |
2637 } | 2683 } |