Mercurial > libpostproc.hg
annotate postprocess.c @ 66:7737e39e74f3 libpostproc
Make pp_help a constant pointer to constant characters, moves it partially
in .rodata (the actual string) and partially in .data.relro (the pointer),
but doesn't change ABI.
Patch by Diego 'Flameeyes' Petten flameeyes at gmail com
author | benoit |
---|---|
date | Fri, 04 Jan 2008 07:47:32 +0000 |
parents | 5bef666de27d |
children | fb2657d1e70d |
rev | line source |
---|---|
0 | 1 /* |
22
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at) |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
3 * |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org> |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
5 * |
23 | 6 * This file is part of FFmpeg. |
22
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
7 * |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or modify |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
9 * it under the terms of the GNU General Public License as published by |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
10 * the Free Software Foundation; either version 2 of the License, or |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
11 * (at your option) any later version. |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
12 * |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
16 * GNU General Public License for more details. |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
17 * |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
18 * You should have received a copy of the GNU General Public License |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
19 * along with FFmpeg; if not, write to the Free Software |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
da3bfee1fa67
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
18
diff
changeset
|
21 */ |
0 | 22 |
23 /** | |
24 * @file postprocess.c | |
25 * postprocessing. | |
26 */ | |
27 | |
28 /* | |
29 C MMX MMX2 3DNow AltiVec | |
30 isVertDC Ec Ec Ec | |
31 isVertMinMaxOk Ec Ec Ec | |
32 doVertLowPass E e e Ec | |
33 doVertDefFilter Ec Ec e e Ec | |
34 isHorizDC Ec Ec Ec | |
35 isHorizMinMaxOk a E Ec | |
36 doHorizLowPass E e e Ec | |
37 doHorizDefFilter Ec Ec e e Ec | |
38 do_a_deblock Ec E Ec E | |
39 deRing E e e* Ecp | |
40 Vertical RKAlgo1 E a a | |
41 Horizontal RKAlgo1 a a | |
42 Vertical X1# a E E | |
43 Horizontal X1# a E E | |
44 LinIpolDeinterlace e E E* | |
45 CubicIpolDeinterlace a e e* | |
46 LinBlendDeinterlace e E E* | |
47 MedianDeinterlace# E Ec Ec | |
48 TempDeNoiser# E e e Ec | |
49 | |
48 | 50 * i do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work |
51 # more or less selfinvented filters so the exactness is not too meaningful | |
0 | 52 E = Exact implementation |
53 e = allmost exact implementation (slightly different rounding,...) | |
54 a = alternative / approximate impl | |
55 c = checked against the other implementations (-vo md5) | |
56 p = partially optimized, still some work to do | |
57 */ | |
58 | |
59 /* | |
60 TODO: | |
61 reduce the time wasted on the mem transfer | |
62 unroll stuff if instructions depend too much on the prior one | |
63 move YScale thing to the end instead of fixing QP | |
64 write a faster and higher quality deblocking filter :) | |
65 make the mainloop more flexible (variable number of blocks at once | |
66 (the if/else stuff per block is slowing things down) | |
67 compare the quality & speed of all filters | |
68 split this huge file | |
69 optimize c versions | |
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks | |
71 ... | |
72 */ | |
73 | |
4 | 74 //Changelog: use the Subversion log |
0 | 75 |
76 #include "config.h" | |
16 | 77 #include "avutil.h" |
0 | 78 #include <inttypes.h> |
79 #include <stdio.h> | |
80 #include <stdlib.h> | |
81 #include <string.h> | |
82 #ifdef HAVE_MALLOC_H | |
83 #include <malloc.h> | |
84 #endif | |
85 //#undef HAVE_MMX2 | |
86 //#define HAVE_3DNOW | |
87 //#undef HAVE_MMX | |
88 //#undef ARCH_X86 | |
89 //#define DEBUG_BRIGHTNESS | |
90 #include "postprocess.h" | |
91 #include "postprocess_internal.h" | |
92 | |
93 #ifdef HAVE_ALTIVEC_H | |
94 #include <altivec.h> | |
95 #endif | |
96 | |
97 #define GET_MODE_BUFFER_SIZE 500 | |
98 #define OPTIONS_ARRAY_SIZE 10 | |
99 #define BLOCK_SIZE 8 | |
100 #define TEMP_STRIDE 8 | |
101 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet | |
102 | |
36 | 103 #if defined(ARCH_X86) |
45 | 104 static DECLARE_ALIGNED(8, uint64_t attribute_used, w05)= 0x0005000500050005LL; |
105 static DECLARE_ALIGNED(8, uint64_t attribute_used, w04)= 0x0004000400040004LL; | |
106 static DECLARE_ALIGNED(8, uint64_t attribute_used, w20)= 0x0020002000200020LL; | |
107 static DECLARE_ALIGNED(8, uint64_t attribute_used, b00)= 0x0000000000000000LL; | |
108 static DECLARE_ALIGNED(8, uint64_t attribute_used, b01)= 0x0101010101010101LL; | |
109 static DECLARE_ALIGNED(8, uint64_t attribute_used, b02)= 0x0202020202020202LL; | |
110 static DECLARE_ALIGNED(8, uint64_t attribute_used, b08)= 0x0808080808080808LL; | |
111 static DECLARE_ALIGNED(8, uint64_t attribute_used, b80)= 0x8080808080808080LL; | |
0 | 112 #endif |
113 | |
114 static uint8_t clip_table[3*256]; | |
115 static uint8_t * const clip_tab= clip_table + 256; | |
116 | |
117 static const int attribute_used deringThreshold= 20; | |
118 | |
119 | |
120 static struct PPFilter filters[]= | |
121 { | |
122 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, | |
123 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, | |
124 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER}, | |
125 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ | |
126 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, | |
127 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, | |
128 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, | |
129 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, | |
130 {"dr", "dering", 1, 5, 6, DERING}, | |
131 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, | |
132 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, | |
133 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, | |
134 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, | |
135 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, | |
136 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, | |
137 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, | |
138 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, | |
139 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, | |
140 {NULL, NULL,0,0,0,0} //End Marker | |
141 }; | |
142 | |
5
a4a3c84fe72f
Add const to (mostly) char* and make some functions static, which aren't used
diego
parents:
4
diff
changeset
|
143 static const char *replaceTable[]= |
0 | 144 { |
145 "default", "hdeblock:a,vdeblock:a,dering:a", | |
146 "de", "hdeblock:a,vdeblock:a,dering:a", | |
147 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a", | |
148 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a", | |
149 "ac", "ha:a:128:7,va:a,dering:a", | |
150 NULL //End Marker | |
151 }; | |
152 | |
153 | |
36 | 154 #if defined(ARCH_X86) |
0 | 155 static inline void prefetchnta(void *p) |
156 { | |
157 asm volatile( "prefetchnta (%0)\n\t" | |
158 : : "r" (p) | |
159 ); | |
160 } | |
161 | |
162 static inline void prefetcht0(void *p) | |
163 { | |
164 asm volatile( "prefetcht0 (%0)\n\t" | |
165 : : "r" (p) | |
166 ); | |
167 } | |
168 | |
169 static inline void prefetcht1(void *p) | |
170 { | |
171 asm volatile( "prefetcht1 (%0)\n\t" | |
172 : : "r" (p) | |
173 ); | |
174 } | |
175 | |
176 static inline void prefetcht2(void *p) | |
177 { | |
178 asm volatile( "prefetcht2 (%0)\n\t" | |
179 : : "r" (p) | |
180 ); | |
181 } | |
182 #endif | |
183 | |
184 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing | |
185 | |
186 /** | |
187 * Check if the given 8x8 Block is mostly "flat" | |
188 */ | |
189 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) | |
190 { | |
191 int numEq= 0; | |
192 int y; | |
193 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; | |
194 const int dcThreshold= dcOffset*2 + 1; | |
195 | |
196 for(y=0; y<BLOCK_SIZE; y++) | |
197 { | |
198 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; | |
199 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; | |
200 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; | |
201 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++; | |
202 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++; | |
203 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++; | |
204 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++; | |
205 src+= stride; | |
206 } | |
207 return numEq > c->ppMode.flatnessThreshold; | |
208 } | |
209 | |
210 /** | |
211 * Check if the middle 8x8 Block in the given 8x16 block is flat | |
212 */ | |
213 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ | |
214 int numEq= 0; | |
215 int y; | |
216 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; | |
217 const int dcThreshold= dcOffset*2 + 1; | |
218 | |
219 src+= stride*4; // src points to begin of the 8x8 Block | |
220 for(y=0; y<BLOCK_SIZE-1; y++) | |
221 { | |
222 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; | |
223 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; | |
224 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; | |
225 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++; | |
226 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++; | |
227 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++; | |
228 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++; | |
229 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++; | |
230 src+= stride; | |
231 } | |
232 return numEq > c->ppMode.flatnessThreshold; | |
233 } | |
234 | |
235 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) | |
236 { | |
237 int i; | |
238 #if 1 | |
239 for(i=0; i<2; i++){ | |
240 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; | |
241 src += stride; | |
242 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; | |
243 src += stride; | |
244 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; | |
245 src += stride; | |
246 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; | |
247 src += stride; | |
248 } | |
249 #else | |
250 for(i=0; i<8; i++){ | |
251 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; | |
252 src += stride; | |
253 } | |
254 #endif | |
255 return 1; | |
256 } | |
257 | |
258 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) | |
259 { | |
260 #if 1 | |
261 #if 1 | |
262 int x; | |
263 src+= stride*4; | |
264 for(x=0; x<BLOCK_SIZE; x+=4) | |
265 { | |
266 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; | |
267 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; | |
268 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; | |
269 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; | |
270 } | |
271 #else | |
272 int x; | |
273 src+= stride*3; | |
274 for(x=0; x<BLOCK_SIZE; x++) | |
275 { | |
276 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; | |
277 } | |
278 #endif | |
279 return 1; | |
280 #else | |
281 int x; | |
282 src+= stride*4; | |
283 for(x=0; x<BLOCK_SIZE; x++) | |
284 { | |
285 int min=255; | |
286 int max=0; | |
287 int y; | |
288 for(y=0; y<8; y++){ | |
289 int v= src[x + y*stride]; | |
290 if(v>max) max=v; | |
291 if(v<min) min=v; | |
292 } | |
293 if(max-min > 2*QP) return 0; | |
294 } | |
295 return 1; | |
296 #endif | |
297 } | |
298 | |
299 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ | |
300 if( isHorizDC_C(src, stride, c) ){ | |
301 if( isHorizMinMaxOk_C(src, stride, c->QP) ) | |
302 return 1; | |
303 else | |
304 return 0; | |
305 }else{ | |
306 return 2; | |
307 } | |
308 } | |
309 | |
310 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ | |
311 if( isVertDC_C(src, stride, c) ){ | |
312 if( isVertMinMaxOk_C(src, stride, c->QP) ) | |
313 return 1; | |
314 else | |
315 return 0; | |
316 }else{ | |
317 return 2; | |
318 } | |
319 } | |
320 | |
321 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) | |
322 { | |
323 int y; | |
324 for(y=0; y<BLOCK_SIZE; y++) | |
325 { | |
326 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); | |
327 | |
32 | 328 if(FFABS(middleEnergy) < 8*c->QP) |
0 | 329 { |
330 const int q=(dst[3] - dst[4])/2; | |
331 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); | |
332 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); | |
333 | |
32 | 334 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) ); |
27
c83a71c1729d
Change libpostproc to use the FFMIN/FFMAX macros from libavutil.
diego
parents:
26
diff
changeset
|
335 d= FFMAX(d, 0); |
0 | 336 |
337 d= (5*d + 32) >> 6; | |
31
81ad3274583c
Rename SIGN macro to FFSIGN to avoid clashes with system headers.
diego
parents:
29
diff
changeset
|
338 d*= FFSIGN(-middleEnergy); |
0 | 339 |
340 if(q>0) | |
341 { | |
342 d= d<0 ? 0 : d; | |
343 d= d>q ? q : d; | |
344 } | |
345 else | |
346 { | |
347 d= d>0 ? 0 : d; | |
348 d= d<q ? q : d; | |
349 } | |
350 | |
351 dst[3]-= d; | |
352 dst[4]+= d; | |
353 } | |
354 dst+= stride; | |
355 } | |
356 } | |
357 | |
358 /** | |
359 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) | |
360 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | |
361 */ | |
362 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) | |
363 { | |
364 int y; | |
365 for(y=0; y<BLOCK_SIZE; y++) | |
366 { | |
32 | 367 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; |
368 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; | |
0 | 369 |
370 int sums[10]; | |
371 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; | |
372 sums[1] = sums[0] - first + dst[3]; | |
373 sums[2] = sums[1] - first + dst[4]; | |
374 sums[3] = sums[2] - first + dst[5]; | |
375 sums[4] = sums[3] - first + dst[6]; | |
376 sums[5] = sums[4] - dst[0] + dst[7]; | |
377 sums[6] = sums[5] - dst[1] + last; | |
378 sums[7] = sums[6] - dst[2] + last; | |
379 sums[8] = sums[7] - dst[3] + last; | |
380 sums[9] = sums[8] - dst[4] + last; | |
381 | |
382 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; | |
383 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; | |
384 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; | |
385 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; | |
386 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; | |
387 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; | |
388 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; | |
389 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; | |
390 | |
391 dst+= stride; | |
392 } | |
393 } | |
394 | |
395 /** | |
396 * Experimental Filter 1 (Horizontal) | |
397 * will not damage linear gradients | |
398 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter | |
48 | 399 * can only smooth blocks at the expected locations (it cannot smooth them if they did move) |
400 * MMX2 version does correct clipping C version does not | |
0 | 401 * not identical with the vertical one |
402 */ | |
403 static inline void horizX1Filter(uint8_t *src, int stride, int QP) | |
404 { | |
405 int y; | |
406 static uint64_t *lut= NULL; | |
407 if(lut==NULL) | |
408 { | |
409 int i; | |
16 | 410 lut = av_malloc(256*8); |
0 | 411 for(i=0; i<256; i++) |
412 { | |
413 int v= i < 128 ? 2*i : 2*(i-256); | |
414 /* | |
415 //Simulate 112242211 9-Tap filter | |
416 uint64_t a= (v/16) & 0xFF; | |
417 uint64_t b= (v/8) & 0xFF; | |
418 uint64_t c= (v/4) & 0xFF; | |
419 uint64_t d= (3*v/8) & 0xFF; | |
420 */ | |
421 //Simulate piecewise linear interpolation | |
422 uint64_t a= (v/16) & 0xFF; | |
423 uint64_t b= (v*3/16) & 0xFF; | |
424 uint64_t c= (v*5/16) & 0xFF; | |
425 uint64_t d= (7*v/16) & 0xFF; | |
426 uint64_t A= (0x100 - a)&0xFF; | |
427 uint64_t B= (0x100 - b)&0xFF; | |
428 uint64_t C= (0x100 - c)&0xFF; | |
429 uint64_t D= (0x100 - c)&0xFF; | |
430 | |
431 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | | |
432 (D<<24) | (C<<16) | (B<<8) | (A); | |
433 //lut[i] = (v<<32) | (v<<24); | |
434 } | |
435 } | |
436 | |
437 for(y=0; y<BLOCK_SIZE; y++) | |
438 { | |
439 int a= src[1] - src[2]; | |
440 int b= src[3] - src[4]; | |
441 int c= src[5] - src[6]; | |
442 | |
32 | 443 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0); |
0 | 444 |
445 if(d < QP) | |
446 { | |
31
81ad3274583c
Rename SIGN macro to FFSIGN to avoid clashes with system headers.
diego
parents:
29
diff
changeset
|
447 int v = d * FFSIGN(-b); |
0 | 448 |
449 src[1] +=v/8; | |
450 src[2] +=v/4; | |
451 src[3] +=3*v/8; | |
452 src[4] -=3*v/8; | |
453 src[5] -=v/4; | |
454 src[6] -=v/8; | |
455 | |
456 } | |
457 src+=stride; | |
458 } | |
459 } | |
460 | |
461 /** | |
462 * accurate deblock filter | |
463 */ | |
38
63d07317cd7a
rename always_inline to av_always_inline and move to common.h
mru
parents:
36
diff
changeset
|
464 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ |
0 | 465 int y; |
466 const int QP= c->QP; | |
467 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; | |
468 const int dcThreshold= dcOffset*2 + 1; | |
469 //START_TIMER | |
470 src+= step*4; // src points to begin of the 8x8 Block | |
471 for(y=0; y<8; y++){ | |
472 int numEq= 0; | |
473 | |
474 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; | |
475 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; | |
476 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; | |
477 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; | |
478 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; | |
479 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; | |
480 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; | |
481 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; | |
482 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; | |
483 if(numEq > c->ppMode.flatnessThreshold){ | |
484 int min, max, x; | |
485 | |
486 if(src[0] > src[step]){ | |
487 max= src[0]; | |
488 min= src[step]; | |
489 }else{ | |
490 max= src[step]; | |
491 min= src[0]; | |
492 } | |
493 for(x=2; x<8; x+=2){ | |
494 if(src[x*step] > src[(x+1)*step]){ | |
495 if(src[x *step] > max) max= src[ x *step]; | |
496 if(src[(x+1)*step] < min) min= src[(x+1)*step]; | |
497 }else{ | |
498 if(src[(x+1)*step] > max) max= src[(x+1)*step]; | |
499 if(src[ x *step] < min) min= src[ x *step]; | |
500 } | |
501 } | |
502 if(max-min < 2*QP){ | |
32 | 503 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; |
504 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; | |
0 | 505 |
506 int sums[10]; | |
507 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; | |
508 sums[1] = sums[0] - first + src[3*step]; | |
509 sums[2] = sums[1] - first + src[4*step]; | |
510 sums[3] = sums[2] - first + src[5*step]; | |
511 sums[4] = sums[3] - first + src[6*step]; | |
512 sums[5] = sums[4] - src[0*step] + src[7*step]; | |
513 sums[6] = sums[5] - src[1*step] + last; | |
514 sums[7] = sums[6] - src[2*step] + last; | |
515 sums[8] = sums[7] - src[3*step] + last; | |
516 sums[9] = sums[8] - src[4*step] + last; | |
517 | |
518 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; | |
519 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; | |
520 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; | |
521 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; | |
522 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; | |
523 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; | |
524 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; | |
525 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; | |
526 } | |
527 }else{ | |
528 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); | |
529 | |
32 | 530 if(FFABS(middleEnergy) < 8*QP) |
0 | 531 { |
532 const int q=(src[3*step] - src[4*step])/2; | |
533 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); | |
534 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); | |
535 | |
32 | 536 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) ); |
27
c83a71c1729d
Change libpostproc to use the FFMIN/FFMAX macros from libavutil.
diego
parents:
26
diff
changeset
|
537 d= FFMAX(d, 0); |
0 | 538 |
539 d= (5*d + 32) >> 6; | |
31
81ad3274583c
Rename SIGN macro to FFSIGN to avoid clashes with system headers.
diego
parents:
29
diff
changeset
|
540 d*= FFSIGN(-middleEnergy); |
0 | 541 |
542 if(q>0) | |
543 { | |
544 d= d<0 ? 0 : d; | |
545 d= d>q ? q : d; | |
546 } | |
547 else | |
548 { | |
549 d= d>0 ? 0 : d; | |
550 d= d<q ? q : d; | |
551 } | |
552 | |
553 src[3*step]-= d; | |
554 src[4*step]+= d; | |
555 } | |
556 } | |
557 | |
558 src += stride; | |
559 } | |
560 /*if(step==16){ | |
561 STOP_TIMER("step16") | |
562 }else{ | |
563 STOP_TIMER("stepX") | |
564 }*/ | |
565 } | |
566 | |
567 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one | |
568 //Plain C versions | |
569 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) | |
570 #define COMPILE_C | |
571 #endif | |
572 | |
573 #ifdef ARCH_POWERPC | |
574 #ifdef HAVE_ALTIVEC | |
575 #define COMPILE_ALTIVEC | |
576 #endif //HAVE_ALTIVEC | |
577 #endif //ARCH_POWERPC | |
578 | |
36 | 579 #if defined(ARCH_X86) |
0 | 580 |
581 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
582 #define COMPILE_MMX | |
583 #endif | |
584 | |
585 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT) | |
586 #define COMPILE_MMX2 | |
587 #endif | |
588 | |
589 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
590 #define COMPILE_3DNOW | |
591 #endif | |
36 | 592 #endif /* defined(ARCH_X86) */ |
0 | 593 |
594 #undef HAVE_MMX | |
595 #undef HAVE_MMX2 | |
596 #undef HAVE_3DNOW | |
597 #undef HAVE_ALTIVEC | |
598 | |
599 #ifdef COMPILE_C | |
600 #undef HAVE_MMX | |
601 #undef HAVE_MMX2 | |
602 #undef HAVE_3DNOW | |
603 #define RENAME(a) a ## _C | |
604 #include "postprocess_template.c" | |
605 #endif | |
606 | |
607 #ifdef ARCH_POWERPC | |
608 #ifdef COMPILE_ALTIVEC | |
609 #undef RENAME | |
610 #define HAVE_ALTIVEC | |
611 #define RENAME(a) a ## _altivec | |
612 #include "postprocess_altivec_template.c" | |
613 #include "postprocess_template.c" | |
614 #endif | |
615 #endif //ARCH_POWERPC | |
616 | |
617 //MMX versions | |
618 #ifdef COMPILE_MMX | |
619 #undef RENAME | |
620 #define HAVE_MMX | |
621 #undef HAVE_MMX2 | |
622 #undef HAVE_3DNOW | |
623 #define RENAME(a) a ## _MMX | |
624 #include "postprocess_template.c" | |
625 #endif | |
626 | |
627 //MMX2 versions | |
628 #ifdef COMPILE_MMX2 | |
629 #undef RENAME | |
630 #define HAVE_MMX | |
631 #define HAVE_MMX2 | |
632 #undef HAVE_3DNOW | |
633 #define RENAME(a) a ## _MMX2 | |
634 #include "postprocess_template.c" | |
635 #endif | |
636 | |
637 //3DNOW versions | |
638 #ifdef COMPILE_3DNOW | |
639 #undef RENAME | |
640 #define HAVE_MMX | |
641 #undef HAVE_MMX2 | |
642 #define HAVE_3DNOW | |
643 #define RENAME(a) a ## _3DNow | |
644 #include "postprocess_template.c" | |
645 #endif | |
646 | |
48 | 647 // minor note: the HAVE_xyz is messed up after that line so do not use it. |
0 | 648 |
649 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | |
650 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc) | |
651 { | |
652 PPContext *c= (PPContext *)vc; | |
653 PPMode *ppMode= (PPMode *)vm; | |
654 c->ppMode= *ppMode; //FIXME | |
655 | |
48 | 656 // Using ifs here as they are faster than function pointers although the |
657 // difference would not be measureable here but it is much better because | |
658 // someone might exchange the CPU whithout restarting MPlayer ;) | |
0 | 659 #ifdef RUNTIME_CPUDETECT |
36 | 660 #if defined(ARCH_X86) |
0 | 661 // ordered per speed fasterst first |
662 if(c->cpuCaps & PP_CPU_CAPS_MMX2) | |
663 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
664 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) | |
665 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
666 else if(c->cpuCaps & PP_CPU_CAPS_MMX) | |
667 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
668 else | |
669 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
670 #else | |
671 #ifdef ARCH_POWERPC | |
672 #ifdef HAVE_ALTIVEC | |
673 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) | |
674 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
675 else | |
676 #endif | |
677 #endif | |
678 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
679 #endif | |
680 #else //RUNTIME_CPUDETECT | |
681 #ifdef HAVE_MMX2 | |
682 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
683 #elif defined (HAVE_3DNOW) | |
684 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
685 #elif defined (HAVE_MMX) | |
686 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
687 #elif defined (HAVE_ALTIVEC) | |
688 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
689 #else | |
690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | |
691 #endif | |
692 #endif //!RUNTIME_CPUDETECT | |
693 } | |
694 | |
695 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | |
696 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); | |
697 | |
698 /* -pp Command line Help | |
699 */ | |
66
7737e39e74f3
Make pp_help a constant pointer to constant characters, moves it partially
benoit
parents:
65
diff
changeset
|
700 const char *const pp_help= |
0 | 701 "Available postprocessing filters:\n" |
702 "Filters Options\n" | |
703 "short long name short long option Description\n" | |
704 "* * a autoq CPU power dependent enabler\n" | |
705 " c chrom chrominance filtering enabled\n" | |
706 " y nochrom chrominance filtering disabled\n" | |
707 " n noluma luma filtering disabled\n" | |
708 "hb hdeblock (2 threshold) horizontal deblocking filter\n" | |
709 " 1. difference factor: default=32, higher -> more deblocking\n" | |
710 " 2. flatness threshold: default=39, lower -> more deblocking\n" | |
711 " the h & v deblocking filters share these\n" | |
712 " so you can't set different thresholds for h / v\n" | |
713 "vb vdeblock (2 threshold) vertical deblocking filter\n" | |
714 "ha hadeblock (2 threshold) horizontal deblocking filter\n" | |
715 "va vadeblock (2 threshold) vertical deblocking filter\n" | |
716 "h1 x1hdeblock experimental h deblock filter 1\n" | |
717 "v1 x1vdeblock experimental v deblock filter 1\n" | |
718 "dr dering deringing filter\n" | |
719 "al autolevels automatic brightness / contrast\n" | |
720 " f fullyrange stretch luminance to (0..255)\n" | |
721 "lb linblenddeint linear blend deinterlacer\n" | |
722 "li linipoldeint linear interpolating deinterlace\n" | |
723 "ci cubicipoldeint cubic interpolating deinterlacer\n" | |
724 "md mediandeint median deinterlacer\n" | |
725 "fd ffmpegdeint ffmpeg deinterlacer\n" | |
726 "l5 lowpass5 FIR lowpass deinterlacer\n" | |
727 "de default hb:a,vb:a,dr:a\n" | |
728 "fa fast h1:a,v1:a,dr:a\n" | |
729 "ac ha:a:128:7,va:a,dr:a\n" | |
730 "tn tmpnoise (3 threshold) temporal noise reducer\n" | |
731 " 1. <= 2. <= 3. larger -> stronger filtering\n" | |
732 "fq forceQuant <quantizer> force quantizer\n" | |
733 "Usage:\n" | |
734 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" | |
735 "long form example:\n" | |
736 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" | |
737 "short form example:\n" | |
738 "vb:a/hb:a/lb de,-vb\n" | |
739 "more examples:\n" | |
740 "tn:64:128:256\n" | |
741 "\n" | |
742 ; | |
743 | |
65
5bef666de27d
Make pp_get_mode_by_name_and_quality accept a constant string.
benoit
parents:
64
diff
changeset
|
744 pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality) |
0 | 745 { |
746 char temp[GET_MODE_BUFFER_SIZE]; | |
747 char *p= temp; | |
64
1912d7e2858d
Make filterDelimiters and optionDelimiters two static constant array of
benoit
parents:
56
diff
changeset
|
748 static const char filterDelimiters[] = ",/"; |
1912d7e2858d
Make filterDelimiters and optionDelimiters two static constant array of
benoit
parents:
56
diff
changeset
|
749 static const char optionDelimiters[] = ":"; |
0 | 750 struct PPMode *ppMode; |
751 char *filterToken; | |
752 | |
16 | 753 ppMode= av_malloc(sizeof(PPMode)); |
0 | 754 |
755 ppMode->lumMode= 0; | |
756 ppMode->chromMode= 0; | |
757 ppMode->maxTmpNoise[0]= 700; | |
758 ppMode->maxTmpNoise[1]= 1500; | |
759 ppMode->maxTmpNoise[2]= 3000; | |
760 ppMode->maxAllowedY= 234; | |
761 ppMode->minAllowedY= 16; | |
762 ppMode->baseDcDiff= 256/8; | |
763 ppMode->flatnessThreshold= 56-16-1; | |
764 ppMode->maxClippedThreshold= 0.01; | |
765 ppMode->error=0; | |
766 | |
767 strncpy(temp, name, GET_MODE_BUFFER_SIZE); | |
768 | |
29 | 769 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name); |
0 | 770 |
771 for(;;){ | |
772 char *filterName; | |
773 int q= 1000000; //PP_QUALITY_MAX; | |
774 int chrom=-1; | |
775 int luma=-1; | |
776 char *option; | |
777 char *options[OPTIONS_ARRAY_SIZE]; | |
778 int i; | |
779 int filterNameOk=0; | |
780 int numOfUnknownOptions=0; | |
781 int enable=1; //does the user want us to enabled or disabled the filter | |
782 | |
783 filterToken= strtok(p, filterDelimiters); | |
784 if(filterToken == NULL) break; | |
785 p+= strlen(filterToken) + 1; // p points to next filterToken | |
786 filterName= strtok(filterToken, optionDelimiters); | |
29 | 787 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName); |
0 | 788 |
789 if(*filterName == '-') | |
790 { | |
791 enable=0; | |
792 filterName++; | |
793 } | |
794 | |
795 for(;;){ //for all options | |
796 option= strtok(NULL, optionDelimiters); | |
797 if(option == NULL) break; | |
798 | |
29 | 799 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option); |
0 | 800 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; |
801 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; | |
802 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; | |
803 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; | |
804 else | |
805 { | |
806 options[numOfUnknownOptions] = option; | |
807 numOfUnknownOptions++; | |
808 } | |
809 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; | |
810 } | |
811 options[numOfUnknownOptions] = NULL; | |
812 | |
813 /* replace stuff from the replace Table */ | |
814 for(i=0; replaceTable[2*i]!=NULL; i++) | |
815 { | |
816 if(!strcmp(replaceTable[2*i], filterName)) | |
817 { | |
818 int newlen= strlen(replaceTable[2*i + 1]); | |
819 int plen; | |
820 int spaceLeft; | |
821 | |
822 if(p==NULL) p= temp, *p=0; //last filter | |
823 else p--, *p=','; //not last filter | |
824 | |
825 plen= strlen(p); | |
826 spaceLeft= p - temp + plen; | |
827 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE) | |
828 { | |
829 ppMode->error++; | |
830 break; | |
831 } | |
832 memmove(p + newlen, p, plen+1); | |
833 memcpy(p, replaceTable[2*i + 1], newlen); | |
834 filterNameOk=1; | |
835 } | |
836 } | |
837 | |
838 for(i=0; filters[i].shortName!=NULL; i++) | |
839 { | |
840 if( !strcmp(filters[i].longName, filterName) | |
841 || !strcmp(filters[i].shortName, filterName)) | |
842 { | |
843 ppMode->lumMode &= ~filters[i].mask; | |
844 ppMode->chromMode &= ~filters[i].mask; | |
845 | |
846 filterNameOk=1; | |
847 if(!enable) break; // user wants to disable it | |
848 | |
849 if(q >= filters[i].minLumQuality && luma) | |
850 ppMode->lumMode|= filters[i].mask; | |
851 if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) | |
852 if(q >= filters[i].minChromQuality) | |
853 ppMode->chromMode|= filters[i].mask; | |
854 | |
855 if(filters[i].mask == LEVEL_FIX) | |
856 { | |
857 int o; | |
858 ppMode->minAllowedY= 16; | |
859 ppMode->maxAllowedY= 234; | |
860 for(o=0; options[o]!=NULL; o++) | |
861 { | |
862 if( !strcmp(options[o],"fullyrange") | |
863 ||!strcmp(options[o],"f")) | |
864 { | |
865 ppMode->minAllowedY= 0; | |
866 ppMode->maxAllowedY= 255; | |
867 numOfUnknownOptions--; | |
868 } | |
869 } | |
870 } | |
871 else if(filters[i].mask == TEMP_NOISE_FILTER) | |
872 { | |
873 int o; | |
874 int numOfNoises=0; | |
875 | |
876 for(o=0; options[o]!=NULL; o++) | |
877 { | |
878 char *tail; | |
879 ppMode->maxTmpNoise[numOfNoises]= | |
880 strtol(options[o], &tail, 0); | |
881 if(tail!=options[o]) | |
882 { | |
883 numOfNoises++; | |
884 numOfUnknownOptions--; | |
885 if(numOfNoises >= 3) break; | |
886 } | |
887 } | |
888 } | |
889 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK | |
890 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK) | |
891 { | |
892 int o; | |
893 | |
894 for(o=0; options[o]!=NULL && o<2; o++) | |
895 { | |
896 char *tail; | |
897 int val= strtol(options[o], &tail, 0); | |
898 if(tail==options[o]) break; | |
899 | |
900 numOfUnknownOptions--; | |
901 if(o==0) ppMode->baseDcDiff= val; | |
902 else ppMode->flatnessThreshold= val; | |
903 } | |
904 } | |
905 else if(filters[i].mask == FORCE_QUANT) | |
906 { | |
907 int o; | |
908 ppMode->forcedQuant= 15; | |
909 | |
910 for(o=0; options[o]!=NULL && o<1; o++) | |
911 { | |
912 char *tail; | |
913 int val= strtol(options[o], &tail, 0); | |
914 if(tail==options[o]) break; | |
915 | |
916 numOfUnknownOptions--; | |
917 ppMode->forcedQuant= val; | |
918 } | |
919 } | |
920 } | |
921 } | |
922 if(!filterNameOk) ppMode->error++; | |
923 ppMode->error += numOfUnknownOptions; | |
924 } | |
925 | |
29 | 926 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode); |
0 | 927 if(ppMode->error) |
928 { | |
29 | 929 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name); |
16 | 930 av_free(ppMode); |
0 | 931 return NULL; |
932 } | |
933 return ppMode; | |
934 } | |
935 | |
936 void pp_free_mode(pp_mode_t *mode){ | |
16 | 937 av_free(mode); |
0 | 938 } |
939 | |
940 static void reallocAlign(void **p, int alignment, int size){ | |
18 | 941 av_free(*p); |
16 | 942 *p= av_mallocz(size); |
0 | 943 } |
944 | |
945 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){ | |
946 int mbWidth = (width+15)>>4; | |
947 int mbHeight= (height+15)>>4; | |
948 int i; | |
949 | |
950 c->stride= stride; | |
951 c->qpStride= qpStride; | |
952 | |
953 reallocAlign((void **)&c->tempDst, 8, stride*24); | |
954 reallocAlign((void **)&c->tempSrc, 8, stride*24); | |
955 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); | |
956 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); | |
957 for(i=0; i<256; i++) | |
958 c->yHistogram[i]= width*height/64*15/256; | |
959 | |
960 for(i=0; i<3; i++) | |
961 { | |
48 | 962 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end. |
0 | 963 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024); |
964 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size | |
965 } | |
966 | |
967 reallocAlign((void **)&c->deintTemp, 8, 2*width+32); | |
968 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); | |
969 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); | |
970 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T)); | |
971 } | |
972 | |
973 static void global_init(void){ | |
974 int i; | |
975 memset(clip_table, 0, 256); | |
976 for(i=256; i<512; i++) | |
977 clip_table[i]= i; | |
978 memset(clip_table+512, 0, 256); | |
979 } | |
980 | |
29 | 981 static const char * context_to_name(void * ptr) { |
982 return "postproc"; | |
983 } | |
984 | |
985 static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL }; | |
986 | |
0 | 987 pp_context_t *pp_get_context(int width, int height, int cpuCaps){ |
16 | 988 PPContext *c= av_malloc(sizeof(PPContext)); |
0 | 989 int stride= (width+15)&(~15); //assumed / will realloc if needed |
990 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed | |
991 | |
992 global_init(); | |
993 | |
994 memset(c, 0, sizeof(PPContext)); | |
29 | 995 c->av_class = &av_codec_context_class; |
0 | 996 c->cpuCaps= cpuCaps; |
997 if(cpuCaps&PP_FORMAT){ | |
998 c->hChromaSubSample= cpuCaps&0x3; | |
999 c->vChromaSubSample= (cpuCaps>>4)&0x3; | |
1000 }else{ | |
1001 c->hChromaSubSample= 1; | |
1002 c->vChromaSubSample= 1; | |
1003 } | |
1004 | |
1005 reallocBuffers(c, width, height, stride, qpStride); | |
1006 | |
1007 c->frameNum=-1; | |
1008 | |
1009 return c; | |
1010 } | |
1011 | |
1012 void pp_free_context(void *vc){ | |
1013 PPContext *c = (PPContext*)vc; | |
1014 int i; | |
1015 | |
16 | 1016 for(i=0; i<3; i++) av_free(c->tempBlured[i]); |
1017 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]); | |
0 | 1018 |
16 | 1019 av_free(c->tempBlocks); |
1020 av_free(c->yHistogram); | |
1021 av_free(c->tempDst); | |
1022 av_free(c->tempSrc); | |
1023 av_free(c->deintTemp); | |
1024 av_free(c->stdQPTable); | |
1025 av_free(c->nonBQPTable); | |
1026 av_free(c->forcedQPTable); | |
0 | 1027 |
1028 memset(c, 0, sizeof(PPContext)); | |
1029 | |
16 | 1030 av_free(c); |
0 | 1031 } |
1032 | |
1033 void pp_postprocess(uint8_t * src[3], int srcStride[3], | |
1034 uint8_t * dst[3], int dstStride[3], | |
1035 int width, int height, | |
1036 QP_STORE_T *QP_store, int QPStride, | |
1037 pp_mode_t *vm, void *vc, int pict_type) | |
1038 { | |
1039 int mbWidth = (width+15)>>4; | |
1040 int mbHeight= (height+15)>>4; | |
1041 PPMode *mode = (PPMode*)vm; | |
1042 PPContext *c = (PPContext*)vc; | |
32 | 1043 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0])); |
1044 int absQPStride = FFABS(QPStride); | |
0 | 1045 |
1046 // c->stride and c->QPStride are always positive | |
1047 if(c->stride < minStride || c->qpStride < absQPStride) | |
1048 reallocBuffers(c, width, height, | |
27
c83a71c1729d
Change libpostproc to use the FFMIN/FFMAX macros from libavutil.
diego
parents:
26
diff
changeset
|
1049 FFMAX(minStride, c->stride), |
c83a71c1729d
Change libpostproc to use the FFMIN/FFMAX macros from libavutil.
diego
parents:
26
diff
changeset
|
1050 FFMAX(c->qpStride, absQPStride)); |
0 | 1051 |
1052 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) | |
1053 { | |
1054 int i; | |
1055 QP_store= c->forcedQPTable; | |
1056 absQPStride = QPStride = 0; | |
1057 if(mode->lumMode & FORCE_QUANT) | |
1058 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant; | |
1059 else | |
1060 for(i=0; i<mbWidth; i++) QP_store[i]= 1; | |
1061 } | |
1062 | |
1063 if(pict_type & PP_PICT_TYPE_QP2){ | |
1064 int i; | |
1065 const int count= mbHeight * absQPStride; | |
1066 for(i=0; i<(count>>2); i++){ | |
1067 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; | |
1068 } | |
1069 for(i<<=2; i<count; i++){ | |
1070 c->stdQPTable[i] = QP_store[i]>>1; | |
1071 } | |
1072 QP_store= c->stdQPTable; | |
1073 QPStride= absQPStride; | |
1074 } | |
1075 | |
1076 if(0){ | |
1077 int x,y; | |
1078 for(y=0; y<mbHeight; y++){ | |
1079 for(x=0; x<mbWidth; x++){ | |
29 | 1080 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]); |
0 | 1081 } |
29 | 1082 av_log(c, AV_LOG_INFO, "\n"); |
0 | 1083 } |
29 | 1084 av_log(c, AV_LOG_INFO, "\n"); |
0 | 1085 } |
1086 | |
1087 if((pict_type&7)!=3) | |
1088 { | |
1089 if (QPStride >= 0) { | |
1090 int i; | |
1091 const int count= mbHeight * QPStride; | |
1092 for(i=0; i<(count>>2); i++){ | |
1093 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; | |
1094 } | |
1095 for(i<<=2; i<count; i++){ | |
1096 c->nonBQPTable[i] = QP_store[i] & 0x3F; | |
1097 } | |
1098 } else { | |
1099 int i,j; | |
1100 for(i=0; i<mbHeight; i++) { | |
1101 for(j=0; j<absQPStride; j++) { | |
1102 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F; | |
1103 } | |
1104 } | |
1105 } | |
1106 } | |
1107 | |
29 | 1108 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n", |
1109 mode->lumMode, mode->chromMode); | |
0 | 1110 |
1111 postProcess(src[0], srcStride[0], dst[0], dstStride[0], | |
1112 width, height, QP_store, QPStride, 0, mode, c); | |
1113 | |
1114 width = (width )>>c->hChromaSubSample; | |
1115 height = (height)>>c->vChromaSubSample; | |
1116 | |
1117 if(mode->chromMode) | |
1118 { | |
1119 postProcess(src[1], srcStride[1], dst[1], dstStride[1], | |
1120 width, height, QP_store, QPStride, 1, mode, c); | |
1121 postProcess(src[2], srcStride[2], dst[2], dstStride[2], | |
1122 width, height, QP_store, QPStride, 2, mode, c); | |
1123 } | |
1124 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) | |
1125 { | |
1126 linecpy(dst[1], src[1], height, srcStride[1]); | |
1127 linecpy(dst[2], src[2], height, srcStride[2]); | |
1128 } | |
1129 else | |
1130 { | |
1131 int y; | |
1132 for(y=0; y<height; y++) | |
1133 { | |
1134 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); | |
1135 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); | |
1136 } | |
1137 } | |
1138 } | |
1139 |