comparison postprocess.c @ 95:c24dab9bca80 libpostproc

cosmetics: Fix indentation to be 4 spaces and consistently place {}.
author diego
date Sat, 22 Mar 2008 15:46:34 +0000
parents 7c86da6ce78e
children 2653d9f33b8a
comparison
equal deleted inserted replaced
94:094413c45b0f 95:c24dab9bca80
114 DECLARE_ASM_CONST(8, int, deringThreshold)= 20; 114 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
115 115
116 116
117 static struct PPFilter filters[]= 117 static struct PPFilter filters[]=
118 { 118 {
119 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, 119 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
120 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, 120 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
121 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER}, 121 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
122 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ 122 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
123 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, 123 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
124 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, 124 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
125 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, 125 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
126 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, 126 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
127 {"dr", "dering", 1, 5, 6, DERING}, 127 {"dr", "dering", 1, 5, 6, DERING},
128 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, 128 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
129 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, 129 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
130 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, 130 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
131 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, 131 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
132 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, 132 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
133 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, 133 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
134 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, 134 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
135 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, 135 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
136 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, 136 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
137 {NULL, NULL,0,0,0,0} //End Marker 137 {NULL, NULL,0,0,0,0} //End Marker
138 }; 138 };
139 139
140 static const char *replaceTable[]= 140 static const char *replaceTable[]=
141 { 141 {
142 "default", "hb:a,vb:a,dr:a", 142 "default", "hb:a,vb:a,dr:a",
143 "de", "hb:a,vb:a,dr:a", 143 "de", "hb:a,vb:a,dr:a",
144 "fast", "h1:a,v1:a,dr:a", 144 "fast", "h1:a,v1:a,dr:a",
145 "fa", "h1:a,v1:a,dr:a", 145 "fa", "h1:a,v1:a,dr:a",
146 "ac", "ha:a:128:7,va:a,dr:a", 146 "ac", "ha:a:128:7,va:a,dr:a",
147 NULL //End Marker 147 NULL //End Marker
148 }; 148 };
149 149
150 150
151 #if defined(ARCH_X86) 151 #if defined(ARCH_X86)
152 static inline void prefetchnta(void *p) 152 static inline void prefetchnta(void *p)
153 { 153 {
154 asm volatile( "prefetchnta (%0)\n\t" 154 asm volatile( "prefetchnta (%0)\n\t"
155 : : "r" (p) 155 : : "r" (p)
156 ); 156 );
157 } 157 }
158 158
159 static inline void prefetcht0(void *p) 159 static inline void prefetcht0(void *p)
160 { 160 {
161 asm volatile( "prefetcht0 (%0)\n\t" 161 asm volatile( "prefetcht0 (%0)\n\t"
162 : : "r" (p) 162 : : "r" (p)
163 ); 163 );
164 } 164 }
165 165
166 static inline void prefetcht1(void *p) 166 static inline void prefetcht1(void *p)
167 { 167 {
168 asm volatile( "prefetcht1 (%0)\n\t" 168 asm volatile( "prefetcht1 (%0)\n\t"
169 : : "r" (p) 169 : : "r" (p)
170 ); 170 );
171 } 171 }
172 172
173 static inline void prefetcht2(void *p) 173 static inline void prefetcht2(void *p)
174 { 174 {
175 asm volatile( "prefetcht2 (%0)\n\t" 175 asm volatile( "prefetcht2 (%0)\n\t"
176 : : "r" (p) 176 : : "r" (p)
177 ); 177 );
178 } 178 }
179 #endif 179 #endif
180 180
181 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing 181 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
182 182
183 /** 183 /**
184 * Check if the given 8x8 Block is mostly "flat" 184 * Check if the given 8x8 Block is mostly "flat"
185 */ 185 */
186 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) 186 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
187 { 187 {
188 int numEq= 0; 188 int numEq= 0;
189 int y; 189 int y;
190 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; 190 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
191 const int dcThreshold= dcOffset*2 + 1; 191 const int dcThreshold= dcOffset*2 + 1;
192 192
193 for(y=0; y<BLOCK_SIZE; y++) 193 for(y=0; y<BLOCK_SIZE; y++){
194 { 194 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
195 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; 195 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
196 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; 196 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
197 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; 197 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
198 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++; 198 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
199 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++; 199 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
200 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++; 200 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
201 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++; 201 src+= stride;
202 src+= stride; 202 }
203 } 203 return numEq > c->ppMode.flatnessThreshold;
204 return numEq > c->ppMode.flatnessThreshold;
205 } 204 }
206 205
207 /** 206 /**
208 * Check if the middle 8x8 Block in the given 8x16 block is flat 207 * Check if the middle 8x8 Block in the given 8x16 block is flat
209 */ 208 */
210 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ 209 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
211 int numEq= 0; 210 {
212 int y; 211 int numEq= 0;
213 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; 212 int y;
214 const int dcThreshold= dcOffset*2 + 1; 213 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
215 214 const int dcThreshold= dcOffset*2 + 1;
216 src+= stride*4; // src points to begin of the 8x8 Block 215
217 for(y=0; y<BLOCK_SIZE-1; y++) 216 src+= stride*4; // src points to begin of the 8x8 Block
218 { 217 for(y=0; y<BLOCK_SIZE-1; y++){
219 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; 218 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
220 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; 219 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
221 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; 220 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
222 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++; 221 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
223 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++; 222 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
224 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++; 223 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++; 224 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
226 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++; 225 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
227 src+= stride; 226 src+= stride;
228 } 227 }
229 return numEq > c->ppMode.flatnessThreshold; 228 return numEq > c->ppMode.flatnessThreshold;
230 } 229 }
231 230
232 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) 231 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
233 { 232 {
234 int i; 233 int i;
235 #if 1 234 #if 1
236 for(i=0; i<2; i++){ 235 for(i=0; i<2; i++){
237 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; 236 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
238 src += stride; 237 src += stride;
239 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; 238 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
240 src += stride; 239 src += stride;
241 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; 240 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
242 src += stride; 241 src += stride;
243 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; 242 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
244 src += stride; 243 src += stride;
245 } 244 }
246 #else 245 #else
247 for(i=0; i<8; i++){ 246 for(i=0; i<8; i++){
248 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; 247 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
249 src += stride; 248 src += stride;
250 } 249 }
251 #endif 250 #endif
252 return 1; 251 return 1;
253 } 252 }
254 253
255 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) 254 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
256 { 255 {
257 #if 1 256 #if 1
258 #if 1 257 #if 1
259 int x; 258 int x;
260 src+= stride*4; 259 src+= stride*4;
261 for(x=0; x<BLOCK_SIZE; x+=4) 260 for(x=0; x<BLOCK_SIZE; x+=4){
262 { 261 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
263 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; 262 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
264 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; 263 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
265 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; 264 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
266 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; 265 }
267 }
268 #else 266 #else
269 int x; 267 int x;
270 src+= stride*3; 268 src+= stride*3;
271 for(x=0; x<BLOCK_SIZE; x++) 269 for(x=0; x<BLOCK_SIZE; x++){
272 { 270 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
273 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; 271 }
274 } 272 #endif
275 #endif 273 return 1;
276 return 1;
277 #else 274 #else
278 int x; 275 int x;
279 src+= stride*4; 276 src+= stride*4;
280 for(x=0; x<BLOCK_SIZE; x++) 277 for(x=0; x<BLOCK_SIZE; x++){
281 { 278 int min=255;
282 int min=255; 279 int max=0;
283 int max=0; 280 int y;
284 int y; 281 for(y=0; y<8; y++){
285 for(y=0; y<8; y++){ 282 int v= src[x + y*stride];
286 int v= src[x + y*stride]; 283 if(v>max) max=v;
287 if(v>max) max=v; 284 if(v<min) min=v;
288 if(v<min) min=v; 285 }
289 } 286 if(max-min > 2*QP) return 0;
290 if(max-min > 2*QP) return 0; 287 }
291 } 288 return 1;
292 return 1; 289 #endif
293 #endif 290 }
294 } 291
295 292 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
296 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ 293 {
297 if( isHorizDC_C(src, stride, c) ){ 294 if( isHorizDC_C(src, stride, c) ){
298 if( isHorizMinMaxOk_C(src, stride, c->QP) ) 295 if( isHorizMinMaxOk_C(src, stride, c->QP) )
299 return 1; 296 return 1;
300 else 297 else
301 return 0; 298 return 0;
302 }else{ 299 }else{
303 return 2; 300 return 2;
304 } 301 }
305 } 302 }
306 303
307 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ 304 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
308 if( isVertDC_C(src, stride, c) ){ 305 {
309 if( isVertMinMaxOk_C(src, stride, c->QP) ) 306 if( isVertDC_C(src, stride, c) ){
310 return 1; 307 if( isVertMinMaxOk_C(src, stride, c->QP) )
311 else 308 return 1;
312 return 0; 309 else
313 }else{ 310 return 0;
314 return 2; 311 }else{
315 } 312 return 2;
313 }
316 } 314 }
317 315
318 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) 316 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
319 { 317 {
320 int y; 318 int y;
321 for(y=0; y<BLOCK_SIZE; y++) 319 for(y=0; y<BLOCK_SIZE; y++){
322 { 320 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
323 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); 321
324 322 if(FFABS(middleEnergy) < 8*c->QP){
325 if(FFABS(middleEnergy) < 8*c->QP) 323 const int q=(dst[3] - dst[4])/2;
326 { 324 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
327 const int q=(dst[3] - dst[4])/2; 325 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
328 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); 326
329 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); 327 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
330 328 d= FFMAX(d, 0);
331 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) ); 329
332 d= FFMAX(d, 0); 330 d= (5*d + 32) >> 6;
333 331 d*= FFSIGN(-middleEnergy);
334 d= (5*d + 32) >> 6; 332
335 d*= FFSIGN(-middleEnergy); 333 if(q>0)
336 334 {
337 if(q>0) 335 d= d<0 ? 0 : d;
338 { 336 d= d>q ? q : d;
339 d= d<0 ? 0 : d; 337 }
340 d= d>q ? q : d; 338 else
341 } 339 {
342 else 340 d= d>0 ? 0 : d;
343 { 341 d= d<q ? q : d;
344 d= d>0 ? 0 : d; 342 }
345 d= d<q ? q : d; 343
346 } 344 dst[3]-= d;
347 345 dst[4]+= d;
348 dst[3]-= d; 346 }
349 dst[4]+= d; 347 dst+= stride;
350 } 348 }
351 dst+= stride;
352 }
353 } 349 }
354 350
355 /** 351 /**
356 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) 352 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
357 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) 353 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
358 */ 354 */
359 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) 355 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
360 { 356 {
361 int y; 357 int y;
362 for(y=0; y<BLOCK_SIZE; y++) 358 for(y=0; y<BLOCK_SIZE; y++){
363 { 359 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
364 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; 360 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
365 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; 361
366 362 int sums[10];
367 int sums[10]; 363 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
368 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; 364 sums[1] = sums[0] - first + dst[3];
369 sums[1] = sums[0] - first + dst[3]; 365 sums[2] = sums[1] - first + dst[4];
370 sums[2] = sums[1] - first + dst[4]; 366 sums[3] = sums[2] - first + dst[5];
371 sums[3] = sums[2] - first + dst[5]; 367 sums[4] = sums[3] - first + dst[6];
372 sums[4] = sums[3] - first + dst[6]; 368 sums[5] = sums[4] - dst[0] + dst[7];
373 sums[5] = sums[4] - dst[0] + dst[7]; 369 sums[6] = sums[5] - dst[1] + last;
374 sums[6] = sums[5] - dst[1] + last; 370 sums[7] = sums[6] - dst[2] + last;
375 sums[7] = sums[6] - dst[2] + last; 371 sums[8] = sums[7] - dst[3] + last;
376 sums[8] = sums[7] - dst[3] + last; 372 sums[9] = sums[8] - dst[4] + last;
377 sums[9] = sums[8] - dst[4] + last; 373
378 374 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
379 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; 375 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
380 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; 376 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
381 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; 377 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
382 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; 378 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
383 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; 379 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
384 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; 380 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
385 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; 381 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
386 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; 382
387 383 dst+= stride;
388 dst+= stride; 384 }
389 }
390 } 385 }
391 386
392 /** 387 /**
393 * Experimental Filter 1 (Horizontal) 388 * Experimental Filter 1 (Horizontal)
394 * will not damage linear gradients 389 * will not damage linear gradients
397 * MMX2 version does correct clipping C version does not 392 * MMX2 version does correct clipping C version does not
398 * not identical with the vertical one 393 * not identical with the vertical one
399 */ 394 */
400 static inline void horizX1Filter(uint8_t *src, int stride, int QP) 395 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
401 { 396 {
402 int y; 397 int y;
403 static uint64_t *lut= NULL; 398 static uint64_t *lut= NULL;
404 if(lut==NULL) 399 if(lut==NULL)
400 {
401 int i;
402 lut = av_malloc(256*8);
403 for(i=0; i<256; i++)
405 { 404 {
406 int i; 405 int v= i < 128 ? 2*i : 2*(i-256);
407 lut = av_malloc(256*8);
408 for(i=0; i<256; i++)
409 {
410 int v= i < 128 ? 2*i : 2*(i-256);
411 /* 406 /*
412 //Simulate 112242211 9-Tap filter 407 //Simulate 112242211 9-Tap filter
413 uint64_t a= (v/16) & 0xFF; 408 uint64_t a= (v/16) & 0xFF;
414 uint64_t b= (v/8) & 0xFF; 409 uint64_t b= (v/8) & 0xFF;
415 uint64_t c= (v/4) & 0xFF; 410 uint64_t c= (v/4) & 0xFF;
416 uint64_t d= (3*v/8) & 0xFF; 411 uint64_t d= (3*v/8) & 0xFF;
417 */ 412 */
418 //Simulate piecewise linear interpolation 413 //Simulate piecewise linear interpolation
419 uint64_t a= (v/16) & 0xFF; 414 uint64_t a= (v/16) & 0xFF;
420 uint64_t b= (v*3/16) & 0xFF; 415 uint64_t b= (v*3/16) & 0xFF;
421 uint64_t c= (v*5/16) & 0xFF; 416 uint64_t c= (v*5/16) & 0xFF;
422 uint64_t d= (7*v/16) & 0xFF; 417 uint64_t d= (7*v/16) & 0xFF;
423 uint64_t A= (0x100 - a)&0xFF; 418 uint64_t A= (0x100 - a)&0xFF;
424 uint64_t B= (0x100 - b)&0xFF; 419 uint64_t B= (0x100 - b)&0xFF;
425 uint64_t C= (0x100 - c)&0xFF; 420 uint64_t C= (0x100 - c)&0xFF;
426 uint64_t D= (0x100 - c)&0xFF; 421 uint64_t D= (0x100 - c)&0xFF;
427 422
428 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | 423 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
429 (D<<24) | (C<<16) | (B<<8) | (A); 424 (D<<24) | (C<<16) | (B<<8) | (A);
430 //lut[i] = (v<<32) | (v<<24); 425 //lut[i] = (v<<32) | (v<<24);
431 } 426 }
432 } 427 }
433 428
434 for(y=0; y<BLOCK_SIZE; y++) 429 for(y=0; y<BLOCK_SIZE; y++){
435 { 430 int a= src[1] - src[2];
436 int a= src[1] - src[2]; 431 int b= src[3] - src[4];
437 int b= src[3] - src[4]; 432 int c= src[5] - src[6];
438 int c= src[5] - src[6]; 433
439 434 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
440 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0); 435
441 436 if(d < QP){
442 if(d < QP) 437 int v = d * FFSIGN(-b);
443 { 438
444 int v = d * FFSIGN(-b); 439 src[1] +=v/8;
445 440 src[2] +=v/4;
446 src[1] +=v/8; 441 src[3] +=3*v/8;
447 src[2] +=v/4; 442 src[4] -=3*v/8;
448 src[3] +=3*v/8; 443 src[5] -=v/4;
449 src[4] -=3*v/8; 444 src[6] -=v/8;
450 src[5] -=v/4; 445 }
451 src[6] -=v/8; 446 src+=stride;
452 447 }
453 }
454 src+=stride;
455 }
456 } 448 }
457 449
458 /** 450 /**
459 * accurate deblock filter 451 * accurate deblock filter
460 */ 452 */
461 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ 453 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
462 int y; 454 int y;
463 const int QP= c->QP; 455 const int QP= c->QP;
464 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; 456 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
465 const int dcThreshold= dcOffset*2 + 1; 457 const int dcThreshold= dcOffset*2 + 1;
466 //START_TIMER 458 //START_TIMER
467 src+= step*4; // src points to begin of the 8x8 Block 459 src+= step*4; // src points to begin of the 8x8 Block
468 for(y=0; y<8; y++){ 460 for(y=0; y<8; y++){
469 int numEq= 0; 461 int numEq= 0;
470 462
471 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; 463 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
472 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; 464 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
473 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; 465 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
474 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; 466 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
475 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; 467 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
476 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; 468 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
477 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; 469 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
478 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; 470 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
479 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; 471 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
480 if(numEq > c->ppMode.flatnessThreshold){ 472 if(numEq > c->ppMode.flatnessThreshold){
481 int min, max, x; 473 int min, max, x;
482 474
483 if(src[0] > src[step]){ 475 if(src[0] > src[step]){
484 max= src[0]; 476 max= src[0];
485 min= src[step]; 477 min= src[step];
486 }else{ 478 }else{
487 max= src[step]; 479 max= src[step];
488 min= src[0]; 480 min= src[0];
489 } 481 }
490 for(x=2; x<8; x+=2){ 482 for(x=2; x<8; x+=2){
491 if(src[x*step] > src[(x+1)*step]){ 483 if(src[x*step] > src[(x+1)*step]){
492 if(src[x *step] > max) max= src[ x *step]; 484 if(src[x *step] > max) max= src[ x *step];
493 if(src[(x+1)*step] < min) min= src[(x+1)*step]; 485 if(src[(x+1)*step] < min) min= src[(x+1)*step];
494 }else{
495 if(src[(x+1)*step] > max) max= src[(x+1)*step];
496 if(src[ x *step] < min) min= src[ x *step];
497 }
498 }
499 if(max-min < 2*QP){
500 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
501 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
502
503 int sums[10];
504 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
505 sums[1] = sums[0] - first + src[3*step];
506 sums[2] = sums[1] - first + src[4*step];
507 sums[3] = sums[2] - first + src[5*step];
508 sums[4] = sums[3] - first + src[6*step];
509 sums[5] = sums[4] - src[0*step] + src[7*step];
510 sums[6] = sums[5] - src[1*step] + last;
511 sums[7] = sums[6] - src[2*step] + last;
512 sums[8] = sums[7] - src[3*step] + last;
513 sums[9] = sums[8] - src[4*step] + last;
514
515 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
516 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
517 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
518 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
519 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
520 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
521 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
522 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
523 }
524 }else{ 486 }else{
525 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); 487 if(src[(x+1)*step] > max) max= src[(x+1)*step];
526 488 if(src[ x *step] < min) min= src[ x *step];
527 if(FFABS(middleEnergy) < 8*QP)
528 {
529 const int q=(src[3*step] - src[4*step])/2;
530 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
531 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
532
533 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
534 d= FFMAX(d, 0);
535
536 d= (5*d + 32) >> 6;
537 d*= FFSIGN(-middleEnergy);
538
539 if(q>0)
540 {
541 d= d<0 ? 0 : d;
542 d= d>q ? q : d;
543 }
544 else
545 {
546 d= d>0 ? 0 : d;
547 d= d<q ? q : d;
548 }
549
550 src[3*step]-= d;
551 src[4*step]+= d;
552 }
553 } 489 }
554 490 }
555 src += stride; 491 if(max-min < 2*QP){
556 } 492 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
493 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
494
495 int sums[10];
496 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
497 sums[1] = sums[0] - first + src[3*step];
498 sums[2] = sums[1] - first + src[4*step];
499 sums[3] = sums[2] - first + src[5*step];
500 sums[4] = sums[3] - first + src[6*step];
501 sums[5] = sums[4] - src[0*step] + src[7*step];
502 sums[6] = sums[5] - src[1*step] + last;
503 sums[7] = sums[6] - src[2*step] + last;
504 sums[8] = sums[7] - src[3*step] + last;
505 sums[9] = sums[8] - src[4*step] + last;
506
507 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
508 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
509 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
510 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
511 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
512 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
513 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
514 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
515 }
516 }else{
517 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
518
519 if(FFABS(middleEnergy) < 8*QP){
520 const int q=(src[3*step] - src[4*step])/2;
521 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
522 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
523
524 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
525 d= FFMAX(d, 0);
526
527 d= (5*d + 32) >> 6;
528 d*= FFSIGN(-middleEnergy);
529
530 if(q>0){
531 d= d<0 ? 0 : d;
532 d= d>q ? q : d;
533 }else{
534 d= d>0 ? 0 : d;
535 d= d<q ? q : d;
536 }
537
538 src[3*step]-= d;
539 src[4*step]+= d;
540 }
541 }
542
543 src += stride;
544 }
557 /*if(step==16){ 545 /*if(step==16){
558 STOP_TIMER("step16") 546 STOP_TIMER("step16")
559 }else{ 547 }else{
560 STOP_TIMER("stepX") 548 STOP_TIMER("stepX")
561 }*/ 549 }*/
640 // minor note: the HAVE_xyz is messed up after that line so do not use it. 628 // minor note: the HAVE_xyz is messed up after that line so do not use it.
641 629
642 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, 630 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
643 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc) 631 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
644 { 632 {
645 PPContext *c= (PPContext *)vc; 633 PPContext *c= (PPContext *)vc;
646 PPMode *ppMode= (PPMode *)vm; 634 PPMode *ppMode= (PPMode *)vm;
647 c->ppMode= *ppMode; //FIXME 635 c->ppMode= *ppMode; //FIXME
648 636
649 // Using ifs here as they are faster than function pointers although the 637 // Using ifs here as they are faster than function pointers although the
650 // difference would not be measurable here but it is much better because 638 // difference would not be measurable here but it is much better because
651 // someone might exchange the CPU whithout restarting MPlayer ;) 639 // someone might exchange the CPU whithout restarting MPlayer ;)
652 #ifdef RUNTIME_CPUDETECT 640 #ifdef RUNTIME_CPUDETECT
653 #if defined(ARCH_X86) 641 #if defined(ARCH_X86)
654 // ordered per speed fastest first 642 // ordered per speed fastest first
655 if(c->cpuCaps & PP_CPU_CAPS_MMX2) 643 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
656 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 644 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
657 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) 645 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
658 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 646 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
659 else if(c->cpuCaps & PP_CPU_CAPS_MMX) 647 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
660 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 648 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
661 else 649 else
662 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 650 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
663 #else 651 #else
664 #ifdef HAVE_ALTIVEC 652 #ifdef HAVE_ALTIVEC
665 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) 653 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
666 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 654 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
667 else 655 else
668 #endif 656 #endif
669 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 657 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670 #endif 658 #endif
671 #else //RUNTIME_CPUDETECT 659 #else //RUNTIME_CPUDETECT
672 #ifdef HAVE_MMX2 660 #ifdef HAVE_MMX2
673 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 661 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
674 #elif defined (HAVE_3DNOW) 662 #elif defined (HAVE_3DNOW)
675 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 663 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
676 #elif defined (HAVE_MMX) 664 #elif defined (HAVE_MMX)
677 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 665 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
678 #elif defined (HAVE_ALTIVEC) 666 #elif defined (HAVE_ALTIVEC)
679 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 667 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
680 #else 668 #else
681 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 669 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
682 #endif 670 #endif
683 #endif //!RUNTIME_CPUDETECT 671 #endif //!RUNTIME_CPUDETECT
684 } 672 }
685 673
686 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, 674 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
736 "\n" 724 "\n"
737 ; 725 ;
738 726
739 pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality) 727 pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
740 { 728 {
741 char temp[GET_MODE_BUFFER_SIZE]; 729 char temp[GET_MODE_BUFFER_SIZE];
742 char *p= temp; 730 char *p= temp;
743 static const char filterDelimiters[] = ",/"; 731 static const char filterDelimiters[] = ",/";
744 static const char optionDelimiters[] = ":"; 732 static const char optionDelimiters[] = ":";
745 struct PPMode *ppMode; 733 struct PPMode *ppMode;
746 char *filterToken; 734 char *filterToken;
747 735
748 ppMode= av_malloc(sizeof(PPMode)); 736 ppMode= av_malloc(sizeof(PPMode));
749 737
750 ppMode->lumMode= 0; 738 ppMode->lumMode= 0;
751 ppMode->chromMode= 0; 739 ppMode->chromMode= 0;
752 ppMode->maxTmpNoise[0]= 700; 740 ppMode->maxTmpNoise[0]= 700;
753 ppMode->maxTmpNoise[1]= 1500; 741 ppMode->maxTmpNoise[1]= 1500;
754 ppMode->maxTmpNoise[2]= 3000; 742 ppMode->maxTmpNoise[2]= 3000;
755 ppMode->maxAllowedY= 234; 743 ppMode->maxAllowedY= 234;
756 ppMode->minAllowedY= 16; 744 ppMode->minAllowedY= 16;
757 ppMode->baseDcDiff= 256/8; 745 ppMode->baseDcDiff= 256/8;
758 ppMode->flatnessThreshold= 56-16-1; 746 ppMode->flatnessThreshold= 56-16-1;
759 ppMode->maxClippedThreshold= 0.01; 747 ppMode->maxClippedThreshold= 0.01;
760 ppMode->error=0; 748 ppMode->error=0;
761 749
762 strncpy(temp, name, GET_MODE_BUFFER_SIZE); 750 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
763 751
764 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name); 752 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
765 753
766 for(;;){ 754 for(;;){
767 char *filterName; 755 char *filterName;
768 int q= 1000000; //PP_QUALITY_MAX; 756 int q= 1000000; //PP_QUALITY_MAX;
769 int chrom=-1; 757 int chrom=-1;
770 int luma=-1; 758 int luma=-1;
771 char *option; 759 char *option;
772 char *options[OPTIONS_ARRAY_SIZE]; 760 char *options[OPTIONS_ARRAY_SIZE];
773 int i; 761 int i;
774 int filterNameOk=0; 762 int filterNameOk=0;
775 int numOfUnknownOptions=0; 763 int numOfUnknownOptions=0;
776 int enable=1; //does the user want us to enabled or disabled the filter 764 int enable=1; //does the user want us to enabled or disabled the filter
777 765
778 filterToken= strtok(p, filterDelimiters); 766 filterToken= strtok(p, filterDelimiters);
779 if(filterToken == NULL) break; 767 if(filterToken == NULL) break;
780 p+= strlen(filterToken) + 1; // p points to next filterToken 768 p+= strlen(filterToken) + 1; // p points to next filterToken
781 filterName= strtok(filterToken, optionDelimiters); 769 filterName= strtok(filterToken, optionDelimiters);
782 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName); 770 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
783 771
784 if(*filterName == '-') 772 if(*filterName == '-'){
773 enable=0;
774 filterName++;
775 }
776
777 for(;;){ //for all options
778 option= strtok(NULL, optionDelimiters);
779 if(option == NULL) break;
780
781 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
782 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
783 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
784 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
785 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
786 else{
787 options[numOfUnknownOptions] = option;
788 numOfUnknownOptions++;
789 }
790 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
791 }
792 options[numOfUnknownOptions] = NULL;
793
794 /* replace stuff from the replace Table */
795 for(i=0; replaceTable[2*i]!=NULL; i++){
796 if(!strcmp(replaceTable[2*i], filterName)){
797 int newlen= strlen(replaceTable[2*i + 1]);
798 int plen;
799 int spaceLeft;
800
801 if(p==NULL) p= temp, *p=0; //last filter
802 else p--, *p=','; //not last filter
803
804 plen= strlen(p);
805 spaceLeft= p - temp + plen;
806 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){
807 ppMode->error++;
808 break;
809 }
810 memmove(p + newlen, p, plen+1);
811 memcpy(p, replaceTable[2*i + 1], newlen);
812 filterNameOk=1;
813 }
814 }
815
816 for(i=0; filters[i].shortName!=NULL; i++){
817 if( !strcmp(filters[i].longName, filterName)
818 || !strcmp(filters[i].shortName, filterName)){
819 ppMode->lumMode &= ~filters[i].mask;
820 ppMode->chromMode &= ~filters[i].mask;
821
822 filterNameOk=1;
823 if(!enable) break; // user wants to disable it
824
825 if(q >= filters[i].minLumQuality && luma)
826 ppMode->lumMode|= filters[i].mask;
827 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
828 if(q >= filters[i].minChromQuality)
829 ppMode->chromMode|= filters[i].mask;
830
831 if(filters[i].mask == LEVEL_FIX){
832 int o;
833 ppMode->minAllowedY= 16;
834 ppMode->maxAllowedY= 234;
835 for(o=0; options[o]!=NULL; o++){
836 if( !strcmp(options[o],"fullyrange")
837 ||!strcmp(options[o],"f")){
838 ppMode->minAllowedY= 0;
839 ppMode->maxAllowedY= 255;
840 numOfUnknownOptions--;
841 }
842 }
843 }
844 else if(filters[i].mask == TEMP_NOISE_FILTER)
785 { 845 {
786 enable=0; 846 int o;
787 filterName++; 847 int numOfNoises=0;
848
849 for(o=0; options[o]!=NULL; o++){
850 char *tail;
851 ppMode->maxTmpNoise[numOfNoises]=
852 strtol(options[o], &tail, 0);
853 if(tail!=options[o]){
854 numOfNoises++;
855 numOfUnknownOptions--;
856 if(numOfNoises >= 3) break;
857 }
858 }
788 } 859 }
789 860 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
790 for(;;){ //for all options 861 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
791 option= strtok(NULL, optionDelimiters); 862 int o;
792 if(option == NULL) break; 863
793 864 for(o=0; options[o]!=NULL && o<2; o++){
794 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option); 865 char *tail;
795 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; 866 int val= strtol(options[o], &tail, 0);
796 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; 867 if(tail==options[o]) break;
797 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; 868
798 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; 869 numOfUnknownOptions--;
799 else 870 if(o==0) ppMode->baseDcDiff= val;
800 { 871 else ppMode->flatnessThreshold= val;
801 options[numOfUnknownOptions] = option; 872 }
802 numOfUnknownOptions++;
803 }
804 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
805 } 873 }
806 options[numOfUnknownOptions] = NULL; 874 else if(filters[i].mask == FORCE_QUANT){
807 875 int o;
808 /* replace stuff from the replace Table */ 876 ppMode->forcedQuant= 15;
809 for(i=0; replaceTable[2*i]!=NULL; i++) 877
810 { 878 for(o=0; options[o]!=NULL && o<1; o++){
811 if(!strcmp(replaceTable[2*i], filterName)) 879 char *tail;
812 { 880 int val= strtol(options[o], &tail, 0);
813 int newlen= strlen(replaceTable[2*i + 1]); 881 if(tail==options[o]) break;
814 int plen; 882
815 int spaceLeft; 883 numOfUnknownOptions--;
816 884 ppMode->forcedQuant= val;
817 if(p==NULL) p= temp, *p=0; //last filter 885 }
818 else p--, *p=','; //not last filter
819
820 plen= strlen(p);
821 spaceLeft= p - temp + plen;
822 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
823 {
824 ppMode->error++;
825 break;
826 }
827 memmove(p + newlen, p, plen+1);
828 memcpy(p, replaceTable[2*i + 1], newlen);
829 filterNameOk=1;
830 }
831 } 886 }
832 887 }
833 for(i=0; filters[i].shortName!=NULL; i++) 888 }
834 { 889 if(!filterNameOk) ppMode->error++;
835 if( !strcmp(filters[i].longName, filterName) 890 ppMode->error += numOfUnknownOptions;
836 || !strcmp(filters[i].shortName, filterName)) 891 }
837 { 892
838 ppMode->lumMode &= ~filters[i].mask; 893 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
839 ppMode->chromMode &= ~filters[i].mask; 894 if(ppMode->error){
840 895 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
841 filterNameOk=1; 896 av_free(ppMode);
842 if(!enable) break; // user wants to disable it 897 return NULL;
843 898 }
844 if(q >= filters[i].minLumQuality && luma) 899 return ppMode;
845 ppMode->lumMode|= filters[i].mask;
846 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
847 if(q >= filters[i].minChromQuality)
848 ppMode->chromMode|= filters[i].mask;
849
850 if(filters[i].mask == LEVEL_FIX)
851 {
852 int o;
853 ppMode->minAllowedY= 16;
854 ppMode->maxAllowedY= 234;
855 for(o=0; options[o]!=NULL; o++)
856 {
857 if( !strcmp(options[o],"fullyrange")
858 ||!strcmp(options[o],"f"))
859 {
860 ppMode->minAllowedY= 0;
861 ppMode->maxAllowedY= 255;
862 numOfUnknownOptions--;
863 }
864 }
865 }
866 else if(filters[i].mask == TEMP_NOISE_FILTER)
867 {
868 int o;
869 int numOfNoises=0;
870
871 for(o=0; options[o]!=NULL; o++)
872 {
873 char *tail;
874 ppMode->maxTmpNoise[numOfNoises]=
875 strtol(options[o], &tail, 0);
876 if(tail!=options[o])
877 {
878 numOfNoises++;
879 numOfUnknownOptions--;
880 if(numOfNoises >= 3) break;
881 }
882 }
883 }
884 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
885 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
886 {
887 int o;
888
889 for(o=0; options[o]!=NULL && o<2; o++)
890 {
891 char *tail;
892 int val= strtol(options[o], &tail, 0);
893 if(tail==options[o]) break;
894
895 numOfUnknownOptions--;
896 if(o==0) ppMode->baseDcDiff= val;
897 else ppMode->flatnessThreshold= val;
898 }
899 }
900 else if(filters[i].mask == FORCE_QUANT)
901 {
902 int o;
903 ppMode->forcedQuant= 15;
904
905 for(o=0; options[o]!=NULL && o<1; o++)
906 {
907 char *tail;
908 int val= strtol(options[o], &tail, 0);
909 if(tail==options[o]) break;
910
911 numOfUnknownOptions--;
912 ppMode->forcedQuant= val;
913 }
914 }
915 }
916 }
917 if(!filterNameOk) ppMode->error++;
918 ppMode->error += numOfUnknownOptions;
919 }
920
921 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
922 if(ppMode->error)
923 {
924 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
925 av_free(ppMode);
926 return NULL;
927 }
928 return ppMode;
929 } 900 }
930 901
931 void pp_free_mode(pp_mode_t *mode){ 902 void pp_free_mode(pp_mode_t *mode){
932 av_free(mode); 903 av_free(mode);
933 } 904 }
934 905
935 static void reallocAlign(void **p, int alignment, int size){ 906 static void reallocAlign(void **p, int alignment, int size){
936 av_free(*p); 907 av_free(*p);
937 *p= av_mallocz(size); 908 *p= av_mallocz(size);
938 } 909 }
939 910
940 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){ 911 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
941 int mbWidth = (width+15)>>4; 912 int mbWidth = (width+15)>>4;
942 int mbHeight= (height+15)>>4; 913 int mbHeight= (height+15)>>4;
943 int i; 914 int i;
944 915
945 c->stride= stride; 916 c->stride= stride;
946 c->qpStride= qpStride; 917 c->qpStride= qpStride;
947 918
948 reallocAlign((void **)&c->tempDst, 8, stride*24); 919 reallocAlign((void **)&c->tempDst, 8, stride*24);
949 reallocAlign((void **)&c->tempSrc, 8, stride*24); 920 reallocAlign((void **)&c->tempSrc, 8, stride*24);
950 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); 921 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
951 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); 922 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
952 for(i=0; i<256; i++) 923 for(i=0; i<256; i++)
953 c->yHistogram[i]= width*height/64*15/256; 924 c->yHistogram[i]= width*height/64*15/256;
954 925
955 for(i=0; i<3; i++) 926 for(i=0; i<3; i++){
956 { 927 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
957 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end. 928 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
958 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024); 929 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
959 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size 930 }
960 } 931
961 932 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
962 reallocAlign((void **)&c->deintTemp, 8, 2*width+32); 933 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
963 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); 934 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
964 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); 935 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
965 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
966 } 936 }
967 937
968 static const char * context_to_name(void * ptr) { 938 static const char * context_to_name(void * ptr) {
969 return "postproc"; 939 return "postproc";
970 } 940 }
971 941
972 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL }; 942 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
973 943
974 pp_context_t *pp_get_context(int width, int height, int cpuCaps){ 944 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
975 PPContext *c= av_malloc(sizeof(PPContext)); 945 PPContext *c= av_malloc(sizeof(PPContext));
976 int stride= (width+15)&(~15); //assumed / will realloc if needed 946 int stride= (width+15)&(~15); //assumed / will realloc if needed
977 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed 947 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
978 948
979 memset(c, 0, sizeof(PPContext)); 949 memset(c, 0, sizeof(PPContext));
980 c->av_class = &av_codec_context_class; 950 c->av_class = &av_codec_context_class;
981 c->cpuCaps= cpuCaps; 951 c->cpuCaps= cpuCaps;
982 if(cpuCaps&PP_FORMAT){ 952 if(cpuCaps&PP_FORMAT){
983 c->hChromaSubSample= cpuCaps&0x3; 953 c->hChromaSubSample= cpuCaps&0x3;
984 c->vChromaSubSample= (cpuCaps>>4)&0x3; 954 c->vChromaSubSample= (cpuCaps>>4)&0x3;
985 }else{ 955 }else{
986 c->hChromaSubSample= 1; 956 c->hChromaSubSample= 1;
987 c->vChromaSubSample= 1; 957 c->vChromaSubSample= 1;
988 } 958 }
989 959
990 reallocBuffers(c, width, height, stride, qpStride); 960 reallocBuffers(c, width, height, stride, qpStride);
991 961
992 c->frameNum=-1; 962 c->frameNum=-1;
993 963
994 return c; 964 return c;
995 } 965 }
996 966
997 void pp_free_context(void *vc){ 967 void pp_free_context(void *vc){
998 PPContext *c = (PPContext*)vc; 968 PPContext *c = (PPContext*)vc;
969 int i;
970
971 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
972 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
973
974 av_free(c->tempBlocks);
975 av_free(c->yHistogram);
976 av_free(c->tempDst);
977 av_free(c->tempSrc);
978 av_free(c->deintTemp);
979 av_free(c->stdQPTable);
980 av_free(c->nonBQPTable);
981 av_free(c->forcedQPTable);
982
983 memset(c, 0, sizeof(PPContext));
984
985 av_free(c);
986 }
987
988 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
989 uint8_t * dst[3], const int dstStride[3],
990 int width, int height,
991 const QP_STORE_T *QP_store, int QPStride,
992 pp_mode_t *vm, void *vc, int pict_type)
993 {
994 int mbWidth = (width+15)>>4;
995 int mbHeight= (height+15)>>4;
996 PPMode *mode = (PPMode*)vm;
997 PPContext *c = (PPContext*)vc;
998 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
999 int absQPStride = FFABS(QPStride);
1000
1001 // c->stride and c->QPStride are always positive
1002 if(c->stride < minStride || c->qpStride < absQPStride)
1003 reallocBuffers(c, width, height,
1004 FFMAX(minStride, c->stride),
1005 FFMAX(c->qpStride, absQPStride));
1006
1007 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
999 int i; 1008 int i;
1000 1009 QP_store= c->forcedQPTable;
1001 for(i=0; i<3; i++) av_free(c->tempBlured[i]); 1010 absQPStride = QPStride = 0;
1002 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]); 1011 if(mode->lumMode & FORCE_QUANT)
1003 1012 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1004 av_free(c->tempBlocks); 1013 else
1005 av_free(c->yHistogram); 1014 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1006 av_free(c->tempDst); 1015 }
1007 av_free(c->tempSrc); 1016
1008 av_free(c->deintTemp); 1017 if(pict_type & PP_PICT_TYPE_QP2){
1009 av_free(c->stdQPTable); 1018 int i;
1010 av_free(c->nonBQPTable); 1019 const int count= mbHeight * absQPStride;
1011 av_free(c->forcedQPTable); 1020 for(i=0; i<(count>>2); i++){
1012 1021 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1013 memset(c, 0, sizeof(PPContext)); 1022 }
1014 1023 for(i<<=2; i<count; i++){
1015 av_free(c); 1024 c->stdQPTable[i] = QP_store[i]>>1;
1016 } 1025 }
1017 1026 QP_store= c->stdQPTable;
1018 void pp_postprocess(const uint8_t * src[3], const int srcStride[3], 1027 QPStride= absQPStride;
1019 uint8_t * dst[3], const int dstStride[3], 1028 }
1020 int width, int height, 1029
1021 const QP_STORE_T *QP_store, int QPStride, 1030 if(0){
1022 pp_mode_t *vm, void *vc, int pict_type) 1031 int x,y;
1023 { 1032 for(y=0; y<mbHeight; y++){
1024 int mbWidth = (width+15)>>4; 1033 for(x=0; x<mbWidth; x++){
1025 int mbHeight= (height+15)>>4; 1034 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1026 PPMode *mode = (PPMode*)vm; 1035 }
1027 PPContext *c = (PPContext*)vc; 1036 av_log(c, AV_LOG_INFO, "\n");
1028 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0])); 1037 }
1029 int absQPStride = FFABS(QPStride); 1038 av_log(c, AV_LOG_INFO, "\n");
1030 1039 }
1031 // c->stride and c->QPStride are always positive 1040
1032 if(c->stride < minStride || c->qpStride < absQPStride) 1041 if((pict_type&7)!=3){
1033 reallocBuffers(c, width, height, 1042 if (QPStride >= 0){
1034 FFMAX(minStride, c->stride), 1043 int i;
1035 FFMAX(c->qpStride, absQPStride)); 1044 const int count= mbHeight * QPStride;
1036 1045 for(i=0; i<(count>>2); i++){
1037 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 1046 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1038 { 1047 }
1039 int i; 1048 for(i<<=2; i<count; i++){
1040 QP_store= c->forcedQPTable; 1049 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1041 absQPStride = QPStride = 0; 1050 }
1042 if(mode->lumMode & FORCE_QUANT) 1051 } else {
1043 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant; 1052 int i,j;
1044 else 1053 for(i=0; i<mbHeight; i++) {
1045 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1; 1054 for(j=0; j<absQPStride; j++) {
1046 } 1055 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1047
1048 if(pict_type & PP_PICT_TYPE_QP2){
1049 int i;
1050 const int count= mbHeight * absQPStride;
1051 for(i=0; i<(count>>2); i++){
1052 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1053 } 1056 }
1054 for(i<<=2; i<count; i++){ 1057 }
1055 c->stdQPTable[i] = QP_store[i]>>1; 1058 }
1056 } 1059 }
1057 QP_store= c->stdQPTable; 1060
1058 QPStride= absQPStride; 1061 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1059 } 1062 mode->lumMode, mode->chromMode);
1060 1063
1061 if(0){ 1064 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1062 int x,y;
1063 for(y=0; y<mbHeight; y++){
1064 for(x=0; x<mbWidth; x++){
1065 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1066 }
1067 av_log(c, AV_LOG_INFO, "\n");
1068 }
1069 av_log(c, AV_LOG_INFO, "\n");
1070 }
1071
1072 if((pict_type&7)!=3)
1073 {
1074 if (QPStride >= 0) {
1075 int i;
1076 const int count= mbHeight * QPStride;
1077 for(i=0; i<(count>>2); i++){
1078 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1079 }
1080 for(i<<=2; i<count; i++){
1081 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1082 }
1083 } else {
1084 int i,j;
1085 for(i=0; i<mbHeight; i++) {
1086 for(j=0; j<absQPStride; j++) {
1087 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1088 }
1089 }
1090 }
1091 }
1092
1093 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1094 mode->lumMode, mode->chromMode);
1095
1096 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1097 width, height, QP_store, QPStride, 0, mode, c); 1065 width, height, QP_store, QPStride, 0, mode, c);
1098 1066
1099 width = (width )>>c->hChromaSubSample; 1067 width = (width )>>c->hChromaSubSample;
1100 height = (height)>>c->vChromaSubSample; 1068 height = (height)>>c->vChromaSubSample;
1101 1069
1102 if(mode->chromMode) 1070 if(mode->chromMode){
1103 { 1071 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1104 postProcess(src[1], srcStride[1], dst[1], dstStride[1], 1072 width, height, QP_store, QPStride, 1, mode, c);
1105 width, height, QP_store, QPStride, 1, mode, c); 1073 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1106 postProcess(src[2], srcStride[2], dst[2], dstStride[2], 1074 width, height, QP_store, QPStride, 2, mode, c);
1107 width, height, QP_store, QPStride, 2, mode, c); 1075 }
1108 } 1076 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1109 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) 1077 linecpy(dst[1], src[1], height, srcStride[1]);
1110 { 1078 linecpy(dst[2], src[2], height, srcStride[2]);
1111 linecpy(dst[1], src[1], height, srcStride[1]); 1079 }else{
1112 linecpy(dst[2], src[2], height, srcStride[2]); 1080 int y;
1113 } 1081 for(y=0; y<height; y++){
1114 else 1082 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1115 { 1083 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1116 int y; 1084 }
1117 for(y=0; y<height; y++) 1085 }
1118 { 1086 }
1119 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); 1087
1120 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1121 }
1122 }
1123 }
1124