Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 2979:bfabfdf9ce55 libavcodec
COSMETICS: tabs --> spaces, some prettyprinting
author | diego |
---|---|
date | Thu, 22 Dec 2005 01:10:11 +0000 |
parents | ef2149182f1c |
children | 0b546eab515d |
comparison
equal
deleted
inserted
replaced
2978:403183bbb505 | 2979:bfabfdf9ce55 |
---|---|
22 * @file postprocess.c | 22 * @file postprocess.c |
23 * postprocessing. | 23 * postprocessing. |
24 */ | 24 */ |
25 | 25 |
26 /* | 26 /* |
27 C MMX MMX2 3DNow AltiVec | 27 C MMX MMX2 3DNow AltiVec |
28 isVertDC Ec Ec Ec | 28 isVertDC Ec Ec Ec |
29 isVertMinMaxOk Ec Ec Ec | 29 isVertMinMaxOk Ec Ec Ec |
30 doVertLowPass E e e Ec | 30 doVertLowPass E e e Ec |
31 doVertDefFilter Ec Ec e e Ec | 31 doVertDefFilter Ec Ec e e Ec |
32 isHorizDC Ec Ec Ec | 32 isHorizDC Ec Ec Ec |
33 isHorizMinMaxOk a E Ec | 33 isHorizMinMaxOk a E Ec |
34 doHorizLowPass E e e Ec | 34 doHorizLowPass E e e Ec |
35 doHorizDefFilter Ec Ec e e Ec | 35 doHorizDefFilter Ec Ec e e Ec |
36 do_a_deblock Ec E Ec E | 36 do_a_deblock Ec E Ec E |
37 deRing E e e* Ecp | 37 deRing E e e* Ecp |
38 Vertical RKAlgo1 E a a | 38 Vertical RKAlgo1 E a a |
39 Horizontal RKAlgo1 a a | 39 Horizontal RKAlgo1 a a |
40 Vertical X1# a E E | 40 Vertical X1# a E E |
41 Horizontal X1# a E E | 41 Horizontal X1# a E E |
42 LinIpolDeinterlace e E E* | 42 LinIpolDeinterlace e E E* |
43 CubicIpolDeinterlace a e e* | 43 CubicIpolDeinterlace a e e* |
44 LinBlendDeinterlace e E E* | 44 LinBlendDeinterlace e E E* |
45 MedianDeinterlace# E Ec Ec | 45 MedianDeinterlace# E Ec Ec |
46 TempDeNoiser# E e e Ec | 46 TempDeNoiser# E e e Ec |
47 | 47 |
48 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work | 48 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work |
49 # more or less selfinvented filters so the exactness isnt too meaningfull | 49 # more or less selfinvented filters so the exactness isnt too meaningfull |
50 E = Exact implementation | 50 E = Exact implementation |
51 e = allmost exact implementation (slightly different rounding,...) | 51 e = allmost exact implementation (slightly different rounding,...) |
59 reduce the time wasted on the mem transfer | 59 reduce the time wasted on the mem transfer |
60 unroll stuff if instructions depend too much on the prior one | 60 unroll stuff if instructions depend too much on the prior one |
61 move YScale thing to the end instead of fixing QP | 61 move YScale thing to the end instead of fixing QP |
62 write a faster and higher quality deblocking filter :) | 62 write a faster and higher quality deblocking filter :) |
63 make the mainloop more flexible (variable number of blocks at once | 63 make the mainloop more flexible (variable number of blocks at once |
64 (the if/else stuff per block is slowing things down) | 64 (the if/else stuff per block is slowing things down) |
65 compare the quality & speed of all filters | 65 compare the quality & speed of all filters |
66 split this huge file | 66 split this huge file |
67 optimize c versions | 67 optimize c versions |
68 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks | 68 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks |
69 ... | 69 ... |
118 # define attribute_used | 118 # define attribute_used |
119 # define always_inline inline | 119 # define always_inline inline |
120 #endif | 120 #endif |
121 | 121 |
122 #if defined(ARCH_X86) || defined(ARCH_X86_64) | 122 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
123 static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL; | 123 static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL; |
124 static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL; | 124 static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL; |
125 static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL; | 125 static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL; |
126 static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL; | 126 static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL; |
127 static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL; | 127 static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL; |
128 static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL; | 128 static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL; |
129 static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL; | 129 static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL; |
130 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL; | 130 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL; |
131 #endif | 131 #endif |
132 | 132 |
133 static uint8_t clip_table[3*256]; | 133 static uint8_t clip_table[3*256]; |
134 static uint8_t * const clip_tab= clip_table + 256; | 134 static uint8_t * const clip_tab= clip_table + 256; |
135 | 135 |
138 static const int attribute_used deringThreshold= 20; | 138 static const int attribute_used deringThreshold= 20; |
139 | 139 |
140 | 140 |
141 static struct PPFilter filters[]= | 141 static struct PPFilter filters[]= |
142 { | 142 { |
143 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, | 143 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, |
144 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, | 144 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, |
145 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER}, | 145 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER}, |
146 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ | 146 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ |
147 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, | 147 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, |
148 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, | 148 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, |
149 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, | 149 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, |
150 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, | 150 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, |
151 {"dr", "dering", 1, 5, 6, DERING}, | 151 {"dr", "dering", 1, 5, 6, DERING}, |
152 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, | 152 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, |
153 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, | 153 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, |
154 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, | 154 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, |
155 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, | 155 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, |
156 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, | 156 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, |
157 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, | 157 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, |
158 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, | 158 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, |
159 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, | 159 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, |
160 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, | 160 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, |
161 {NULL, NULL,0,0,0,0} //End Marker | 161 {NULL, NULL,0,0,0,0} //End Marker |
162 }; | 162 }; |
163 | 163 |
164 static char *replaceTable[]= | 164 static char *replaceTable[]= |
165 { | 165 { |
166 "default", "hdeblock:a,vdeblock:a,dering:a", | 166 "default", "hdeblock:a,vdeblock:a,dering:a", |
167 "de", "hdeblock:a,vdeblock:a,dering:a", | 167 "de", "hdeblock:a,vdeblock:a,dering:a", |
168 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a", | 168 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a", |
169 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a", | 169 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a", |
170 "ac", "ha:a:128:7,va:a,dering:a", | 170 "ac", "ha:a:128:7,va:a,dering:a", |
171 NULL //End Marker | 171 NULL //End Marker |
172 }; | 172 }; |
173 | 173 |
174 | 174 |
175 #if defined(ARCH_X86) || defined(ARCH_X86_64) | 175 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
176 static inline void prefetchnta(void *p) | 176 static inline void prefetchnta(void *p) |
177 { | 177 { |
178 asm volatile( "prefetchnta (%0)\n\t" | 178 asm volatile( "prefetchnta (%0)\n\t" |
179 : : "r" (p) | 179 : : "r" (p) |
180 ); | 180 ); |
181 } | 181 } |
182 | 182 |
183 static inline void prefetcht0(void *p) | 183 static inline void prefetcht0(void *p) |
184 { | 184 { |
185 asm volatile( "prefetcht0 (%0)\n\t" | 185 asm volatile( "prefetcht0 (%0)\n\t" |
186 : : "r" (p) | 186 : : "r" (p) |
187 ); | 187 ); |
188 } | 188 } |
189 | 189 |
190 static inline void prefetcht1(void *p) | 190 static inline void prefetcht1(void *p) |
191 { | 191 { |
192 asm volatile( "prefetcht1 (%0)\n\t" | 192 asm volatile( "prefetcht1 (%0)\n\t" |
193 : : "r" (p) | 193 : : "r" (p) |
194 ); | 194 ); |
195 } | 195 } |
196 | 196 |
197 static inline void prefetcht2(void *p) | 197 static inline void prefetcht2(void *p) |
198 { | 198 { |
199 asm volatile( "prefetcht2 (%0)\n\t" | 199 asm volatile( "prefetcht2 (%0)\n\t" |
200 : : "r" (p) | 200 : : "r" (p) |
201 ); | 201 ); |
202 } | 202 } |
203 #endif | 203 #endif |
204 | 204 |
205 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing | 205 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing |
206 | 206 |
207 /** | 207 /** |
208 * Check if the given 8x8 Block is mostly "flat" | 208 * Check if the given 8x8 Block is mostly "flat" |
209 */ | 209 */ |
210 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) | 210 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) |
211 { | 211 { |
212 int numEq= 0; | 212 int numEq= 0; |
213 int y; | 213 int y; |
214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; | 214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
215 const int dcThreshold= dcOffset*2 + 1; | 215 const int dcThreshold= dcOffset*2 + 1; |
216 | 216 |
217 for(y=0; y<BLOCK_SIZE; y++) | 217 for(y=0; y<BLOCK_SIZE; y++) |
218 { | 218 { |
219 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; | 219 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; |
220 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; | 220 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; |
221 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; | 221 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; |
222 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++; | 222 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++; |
223 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++; | 223 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++; |
224 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++; | 224 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++; |
225 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++; | 225 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++; |
226 src+= stride; | 226 src+= stride; |
227 } | 227 } |
228 return numEq > c->ppMode.flatnessThreshold; | 228 return numEq > c->ppMode.flatnessThreshold; |
229 } | 229 } |
230 | 230 |
231 /** | 231 /** |
232 * Check if the middle 8x8 Block in the given 8x16 block is flat | 232 * Check if the middle 8x8 Block in the given 8x16 block is flat |
233 */ | 233 */ |
234 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ | 234 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ |
235 int numEq= 0; | 235 int numEq= 0; |
236 int y; | 236 int y; |
237 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; | 237 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
238 const int dcThreshold= dcOffset*2 + 1; | 238 const int dcThreshold= dcOffset*2 + 1; |
239 | 239 |
240 src+= stride*4; // src points to begin of the 8x8 Block | 240 src+= stride*4; // src points to begin of the 8x8 Block |
241 for(y=0; y<BLOCK_SIZE-1; y++) | 241 for(y=0; y<BLOCK_SIZE-1; y++) |
242 { | 242 { |
243 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; | 243 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; |
244 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; | 244 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; |
245 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; | 245 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; |
246 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++; | 246 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++; |
247 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++; | 247 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++; |
248 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++; | 248 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++; |
249 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++; | 249 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++; |
250 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++; | 250 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++; |
251 src+= stride; | 251 src+= stride; |
252 } | 252 } |
253 return numEq > c->ppMode.flatnessThreshold; | 253 return numEq > c->ppMode.flatnessThreshold; |
254 } | 254 } |
255 | 255 |
256 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) | 256 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) |
257 { | 257 { |
258 int i; | 258 int i; |
259 #if 1 | 259 #if 1 |
260 for(i=0; i<2; i++){ | 260 for(i=0; i<2; i++){ |
261 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; | 261 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; |
262 src += stride; | 262 src += stride; |
263 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; | 263 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; |
264 src += stride; | 264 src += stride; |
265 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; | 265 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; |
266 src += stride; | 266 src += stride; |
267 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; | 267 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; |
268 src += stride; | 268 src += stride; |
269 } | 269 } |
270 #else | 270 #else |
271 for(i=0; i<8; i++){ | 271 for(i=0; i<8; i++){ |
272 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; | 272 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; |
273 src += stride; | 273 src += stride; |
274 } | 274 } |
275 #endif | 275 #endif |
276 return 1; | 276 return 1; |
277 } | 277 } |
278 | 278 |
279 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) | 279 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) |
280 { | 280 { |
281 #if 1 | 281 #if 1 |
282 #if 1 | 282 #if 1 |
283 int x; | 283 int x; |
284 src+= stride*4; | 284 src+= stride*4; |
285 for(x=0; x<BLOCK_SIZE; x+=4) | 285 for(x=0; x<BLOCK_SIZE; x+=4) |
286 { | 286 { |
287 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; | 287 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; |
288 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; | 288 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; |
289 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; | 289 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; |
290 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; | 290 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; |
291 } | 291 } |
292 #else | 292 #else |
293 int x; | 293 int x; |
294 src+= stride*3; | 294 src+= stride*3; |
295 for(x=0; x<BLOCK_SIZE; x++) | 295 for(x=0; x<BLOCK_SIZE; x++) |
296 { | 296 { |
297 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; | 297 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; |
298 } | 298 } |
299 #endif | 299 #endif |
300 return 1; | 300 return 1; |
301 #else | 301 #else |
302 int x; | 302 int x; |
303 src+= stride*4; | 303 src+= stride*4; |
304 for(x=0; x<BLOCK_SIZE; x++) | 304 for(x=0; x<BLOCK_SIZE; x++) |
305 { | 305 { |
306 int min=255; | 306 int min=255; |
307 int max=0; | 307 int max=0; |
308 int y; | 308 int y; |
309 for(y=0; y<8; y++){ | 309 for(y=0; y<8; y++){ |
310 int v= src[x + y*stride]; | 310 int v= src[x + y*stride]; |
311 if(v>max) max=v; | 311 if(v>max) max=v; |
312 if(v<min) min=v; | 312 if(v<min) min=v; |
313 } | 313 } |
314 if(max-min > 2*QP) return 0; | 314 if(max-min > 2*QP) return 0; |
315 } | 315 } |
316 return 1; | 316 return 1; |
317 #endif | 317 #endif |
318 } | 318 } |
319 | 319 |
320 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ | 320 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ |
321 if( isHorizDC_C(src, stride, c) ){ | 321 if( isHorizDC_C(src, stride, c) ){ |
322 if( isHorizMinMaxOk_C(src, stride, c->QP) ) | 322 if( isHorizMinMaxOk_C(src, stride, c->QP) ) |
323 return 1; | 323 return 1; |
324 else | 324 else |
325 return 0; | 325 return 0; |
326 }else{ | 326 }else{ |
327 return 2; | 327 return 2; |
328 } | 328 } |
329 } | 329 } |
330 | 330 |
331 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ | 331 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ |
332 if( isVertDC_C(src, stride, c) ){ | 332 if( isVertDC_C(src, stride, c) ){ |
333 if( isVertMinMaxOk_C(src, stride, c->QP) ) | 333 if( isVertMinMaxOk_C(src, stride, c->QP) ) |
334 return 1; | 334 return 1; |
335 else | 335 else |
336 return 0; | 336 return 0; |
337 }else{ | 337 }else{ |
338 return 2; | 338 return 2; |
339 } | 339 } |
340 } | 340 } |
341 | 341 |
342 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) | 342 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) |
343 { | 343 { |
344 int y; | 344 int y; |
345 for(y=0; y<BLOCK_SIZE; y++) | 345 for(y=0; y<BLOCK_SIZE; y++) |
346 { | 346 { |
347 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); | 347 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); |
348 | 348 |
349 if(ABS(middleEnergy) < 8*c->QP) | 349 if(ABS(middleEnergy) < 8*c->QP) |
350 { | 350 { |
351 const int q=(dst[3] - dst[4])/2; | 351 const int q=(dst[3] - dst[4])/2; |
352 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); | 352 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); |
353 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); | 353 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); |
354 | 354 |
355 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); | 355 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); |
356 d= MAX(d, 0); | 356 d= MAX(d, 0); |
357 | 357 |
358 d= (5*d + 32) >> 6; | 358 d= (5*d + 32) >> 6; |
359 d*= SIGN(-middleEnergy); | 359 d*= SIGN(-middleEnergy); |
360 | 360 |
361 if(q>0) | 361 if(q>0) |
362 { | 362 { |
363 d= d<0 ? 0 : d; | 363 d= d<0 ? 0 : d; |
364 d= d>q ? q : d; | 364 d= d>q ? q : d; |
365 } | 365 } |
366 else | 366 else |
367 { | 367 { |
368 d= d>0 ? 0 : d; | 368 d= d>0 ? 0 : d; |
369 d= d<q ? q : d; | 369 d= d<q ? q : d; |
370 } | 370 } |
371 | 371 |
372 dst[3]-= d; | 372 dst[3]-= d; |
373 dst[4]+= d; | 373 dst[4]+= d; |
374 } | 374 } |
375 dst+= stride; | 375 dst+= stride; |
376 } | 376 } |
377 } | 377 } |
378 | 378 |
379 /** | 379 /** |
380 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) | 380 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) |
381 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | 381 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) |
382 */ | 382 */ |
383 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) | 383 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) |
384 { | 384 { |
385 int y; | 385 int y; |
386 for(y=0; y<BLOCK_SIZE; y++) | 386 for(y=0; y<BLOCK_SIZE; y++) |
387 { | 387 { |
388 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; | 388 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; |
389 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; | 389 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; |
390 | 390 |
391 int sums[10]; | 391 int sums[10]; |
392 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; | 392 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; |
393 sums[1] = sums[0] - first + dst[3]; | 393 sums[1] = sums[0] - first + dst[3]; |
394 sums[2] = sums[1] - first + dst[4]; | 394 sums[2] = sums[1] - first + dst[4]; |
395 sums[3] = sums[2] - first + dst[5]; | 395 sums[3] = sums[2] - first + dst[5]; |
396 sums[4] = sums[3] - first + dst[6]; | 396 sums[4] = sums[3] - first + dst[6]; |
397 sums[5] = sums[4] - dst[0] + dst[7]; | 397 sums[5] = sums[4] - dst[0] + dst[7]; |
398 sums[6] = sums[5] - dst[1] + last; | 398 sums[6] = sums[5] - dst[1] + last; |
399 sums[7] = sums[6] - dst[2] + last; | 399 sums[7] = sums[6] - dst[2] + last; |
400 sums[8] = sums[7] - dst[3] + last; | 400 sums[8] = sums[7] - dst[3] + last; |
401 sums[9] = sums[8] - dst[4] + last; | 401 sums[9] = sums[8] - dst[4] + last; |
402 | 402 |
403 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; | 403 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; |
404 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; | 404 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; |
405 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; | 405 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; |
406 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; | 406 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; |
407 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; | 407 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; |
408 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; | 408 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; |
409 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; | 409 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; |
410 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; | 410 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; |
411 | 411 |
412 dst+= stride; | 412 dst+= stride; |
413 } | 413 } |
414 } | 414 } |
415 | 415 |
416 /** | 416 /** |
417 * Experimental Filter 1 (Horizontal) | 417 * Experimental Filter 1 (Horizontal) |
418 * will not damage linear gradients | 418 * will not damage linear gradients |
421 * MMX2 version does correct clipping C version doesnt | 421 * MMX2 version does correct clipping C version doesnt |
422 * not identical with the vertical one | 422 * not identical with the vertical one |
423 */ | 423 */ |
424 static inline void horizX1Filter(uint8_t *src, int stride, int QP) | 424 static inline void horizX1Filter(uint8_t *src, int stride, int QP) |
425 { | 425 { |
426 int y; | 426 int y; |
427 static uint64_t *lut= NULL; | 427 static uint64_t *lut= NULL; |
428 if(lut==NULL) | 428 if(lut==NULL) |
429 { | 429 { |
430 int i; | 430 int i; |
431 lut= (uint64_t*)memalign(8, 256*8); | 431 lut= (uint64_t*)memalign(8, 256*8); |
432 for(i=0; i<256; i++) | 432 for(i=0; i<256; i++) |
433 { | 433 { |
434 int v= i < 128 ? 2*i : 2*(i-256); | 434 int v= i < 128 ? 2*i : 2*(i-256); |
435 /* | 435 /* |
436 //Simulate 112242211 9-Tap filter | 436 //Simulate 112242211 9-Tap filter |
437 uint64_t a= (v/16) & 0xFF; | 437 uint64_t a= (v/16) & 0xFF; |
438 uint64_t b= (v/8) & 0xFF; | 438 uint64_t b= (v/8) & 0xFF; |
439 uint64_t c= (v/4) & 0xFF; | 439 uint64_t c= (v/4) & 0xFF; |
440 uint64_t d= (3*v/8) & 0xFF; | 440 uint64_t d= (3*v/8) & 0xFF; |
441 */ | 441 */ |
442 //Simulate piecewise linear interpolation | 442 //Simulate piecewise linear interpolation |
443 uint64_t a= (v/16) & 0xFF; | 443 uint64_t a= (v/16) & 0xFF; |
444 uint64_t b= (v*3/16) & 0xFF; | 444 uint64_t b= (v*3/16) & 0xFF; |
445 uint64_t c= (v*5/16) & 0xFF; | 445 uint64_t c= (v*5/16) & 0xFF; |
446 uint64_t d= (7*v/16) & 0xFF; | 446 uint64_t d= (7*v/16) & 0xFF; |
447 uint64_t A= (0x100 - a)&0xFF; | 447 uint64_t A= (0x100 - a)&0xFF; |
448 uint64_t B= (0x100 - b)&0xFF; | 448 uint64_t B= (0x100 - b)&0xFF; |
449 uint64_t C= (0x100 - c)&0xFF; | 449 uint64_t C= (0x100 - c)&0xFF; |
450 uint64_t D= (0x100 - c)&0xFF; | 450 uint64_t D= (0x100 - c)&0xFF; |
451 | 451 |
452 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | | 452 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | |
453 (D<<24) | (C<<16) | (B<<8) | (A); | 453 (D<<24) | (C<<16) | (B<<8) | (A); |
454 //lut[i] = (v<<32) | (v<<24); | 454 //lut[i] = (v<<32) | (v<<24); |
455 } | 455 } |
456 } | 456 } |
457 | 457 |
458 for(y=0; y<BLOCK_SIZE; y++) | 458 for(y=0; y<BLOCK_SIZE; y++) |
459 { | 459 { |
460 int a= src[1] - src[2]; | 460 int a= src[1] - src[2]; |
461 int b= src[3] - src[4]; | 461 int b= src[3] - src[4]; |
462 int c= src[5] - src[6]; | 462 int c= src[5] - src[6]; |
463 | 463 |
464 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0); | 464 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0); |
465 | 465 |
466 if(d < QP) | 466 if(d < QP) |
467 { | 467 { |
468 int v = d * SIGN(-b); | 468 int v = d * SIGN(-b); |
469 | 469 |
470 src[1] +=v/8; | 470 src[1] +=v/8; |
471 src[2] +=v/4; | 471 src[2] +=v/4; |
472 src[3] +=3*v/8; | 472 src[3] +=3*v/8; |
473 src[4] -=3*v/8; | 473 src[4] -=3*v/8; |
474 src[5] -=v/4; | 474 src[5] -=v/4; |
475 src[6] -=v/8; | 475 src[6] -=v/8; |
476 | 476 |
477 } | 477 } |
478 src+=stride; | 478 src+=stride; |
479 } | 479 } |
480 } | 480 } |
481 | 481 |
482 /** | 482 /** |
483 * accurate deblock filter | 483 * accurate deblock filter |
484 */ | 484 */ |
485 static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ | 485 static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ |
486 int y; | 486 int y; |
487 const int QP= c->QP; | 487 const int QP= c->QP; |
488 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; | 488 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
489 const int dcThreshold= dcOffset*2 + 1; | 489 const int dcThreshold= dcOffset*2 + 1; |
490 //START_TIMER | 490 //START_TIMER |
491 src+= step*4; // src points to begin of the 8x8 Block | 491 src+= step*4; // src points to begin of the 8x8 Block |
492 for(y=0; y<8; y++){ | 492 for(y=0; y<8; y++){ |
493 int numEq= 0; | 493 int numEq= 0; |
494 | 494 |
495 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; | 495 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; |
496 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; | 496 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; |
497 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; | 497 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; |
498 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; | 498 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; |
499 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; | 499 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; |
500 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; | 500 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; |
501 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; | 501 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; |
502 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; | 502 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; |
503 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; | 503 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; |
504 if(numEq > c->ppMode.flatnessThreshold){ | 504 if(numEq > c->ppMode.flatnessThreshold){ |
505 int min, max, x; | 505 int min, max, x; |
506 | 506 |
507 if(src[0] > src[step]){ | 507 if(src[0] > src[step]){ |
508 max= src[0]; | 508 max= src[0]; |
509 min= src[step]; | 509 min= src[step]; |
510 }else{ | 510 }else{ |
511 max= src[step]; | 511 max= src[step]; |
512 min= src[0]; | 512 min= src[0]; |
513 } | 513 } |
514 for(x=2; x<8; x+=2){ | 514 for(x=2; x<8; x+=2){ |
515 if(src[x*step] > src[(x+1)*step]){ | 515 if(src[x*step] > src[(x+1)*step]){ |
516 if(src[x *step] > max) max= src[ x *step]; | 516 if(src[x *step] > max) max= src[ x *step]; |
517 if(src[(x+1)*step] < min) min= src[(x+1)*step]; | 517 if(src[(x+1)*step] < min) min= src[(x+1)*step]; |
518 }else{ | 518 }else{ |
519 if(src[(x+1)*step] > max) max= src[(x+1)*step]; | 519 if(src[(x+1)*step] > max) max= src[(x+1)*step]; |
520 if(src[ x *step] < min) min= src[ x *step]; | 520 if(src[ x *step] < min) min= src[ x *step]; |
521 } | 521 } |
522 } | 522 } |
523 if(max-min < 2*QP){ | 523 if(max-min < 2*QP){ |
524 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; | 524 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; |
525 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; | 525 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; |
526 | 526 |
527 int sums[10]; | 527 int sums[10]; |
528 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; | 528 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; |
529 sums[1] = sums[0] - first + src[3*step]; | 529 sums[1] = sums[0] - first + src[3*step]; |
530 sums[2] = sums[1] - first + src[4*step]; | 530 sums[2] = sums[1] - first + src[4*step]; |
531 sums[3] = sums[2] - first + src[5*step]; | 531 sums[3] = sums[2] - first + src[5*step]; |
532 sums[4] = sums[3] - first + src[6*step]; | 532 sums[4] = sums[3] - first + src[6*step]; |
533 sums[5] = sums[4] - src[0*step] + src[7*step]; | 533 sums[5] = sums[4] - src[0*step] + src[7*step]; |
534 sums[6] = sums[5] - src[1*step] + last; | 534 sums[6] = sums[5] - src[1*step] + last; |
535 sums[7] = sums[6] - src[2*step] + last; | 535 sums[7] = sums[6] - src[2*step] + last; |
536 sums[8] = sums[7] - src[3*step] + last; | 536 sums[8] = sums[7] - src[3*step] + last; |
537 sums[9] = sums[8] - src[4*step] + last; | 537 sums[9] = sums[8] - src[4*step] + last; |
538 | 538 |
539 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; | 539 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; |
540 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; | 540 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; |
541 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; | 541 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; |
542 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; | 542 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; |
543 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; | 543 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; |
544 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; | 544 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; |
545 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; | 545 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; |
546 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; | 546 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; |
547 } | 547 } |
548 }else{ | 548 }else{ |
549 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); | 549 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); |
550 | 550 |
551 if(ABS(middleEnergy) < 8*QP) | 551 if(ABS(middleEnergy) < 8*QP) |
552 { | 552 { |
553 const int q=(src[3*step] - src[4*step])/2; | 553 const int q=(src[3*step] - src[4*step])/2; |
554 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); | 554 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); |
555 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); | 555 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); |
556 | 556 |
557 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); | 557 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); |
558 d= MAX(d, 0); | 558 d= MAX(d, 0); |
559 | 559 |
560 d= (5*d + 32) >> 6; | 560 d= (5*d + 32) >> 6; |
561 d*= SIGN(-middleEnergy); | 561 d*= SIGN(-middleEnergy); |
562 | 562 |
563 if(q>0) | 563 if(q>0) |
564 { | 564 { |
565 d= d<0 ? 0 : d; | 565 d= d<0 ? 0 : d; |
566 d= d>q ? q : d; | 566 d= d>q ? q : d; |
567 } | 567 } |
568 else | 568 else |
569 { | 569 { |
570 d= d>0 ? 0 : d; | 570 d= d>0 ? 0 : d; |
571 d= d<q ? q : d; | 571 d= d<q ? q : d; |
572 } | 572 } |
573 | 573 |
574 src[3*step]-= d; | 574 src[3*step]-= d; |
575 src[4*step]+= d; | 575 src[4*step]+= d; |
576 } | 576 } |
577 } | 577 } |
578 | 578 |
579 src += stride; | 579 src += stride; |
580 } | 580 } |
581 /*if(step==16){ | 581 /*if(step==16){ |
582 STOP_TIMER("step16") | 582 STOP_TIMER("step16") |
583 }else{ | 583 }else{ |
584 STOP_TIMER("stepX") | 584 STOP_TIMER("stepX") |
585 }*/ | 585 }*/ |
666 #endif | 666 #endif |
667 | 667 |
668 // minor note: the HAVE_xyz is messed up after that line so dont use it | 668 // minor note: the HAVE_xyz is messed up after that line so dont use it |
669 | 669 |
670 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | 670 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
671 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc) | 671 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc) |
672 { | 672 { |
673 PPContext *c= (PPContext *)vc; | 673 PPContext *c= (PPContext *)vc; |
674 PPMode *ppMode= (PPMode *)vm; | 674 PPMode *ppMode= (PPMode *)vm; |
675 c->ppMode= *ppMode; //FIXME | 675 c->ppMode= *ppMode; //FIXME |
676 | 676 |
677 // useing ifs here as they are faster than function pointers allthough the | 677 // useing ifs here as they are faster than function pointers allthough the |
678 // difference wouldnt be messureable here but its much better because | 678 // difference wouldnt be messureable here but its much better because |
679 // someone might exchange the cpu whithout restarting mplayer ;) | 679 // someone might exchange the cpu whithout restarting mplayer ;) |
680 #ifdef RUNTIME_CPUDETECT | 680 #ifdef RUNTIME_CPUDETECT |
681 #if defined(ARCH_X86) || defined(ARCH_X86_64) | 681 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
682 // ordered per speed fasterst first | 682 // ordered per speed fasterst first |
683 if(c->cpuCaps & PP_CPU_CAPS_MMX2) | 683 if(c->cpuCaps & PP_CPU_CAPS_MMX2) |
684 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 684 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
685 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) | 685 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) |
686 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 686 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
687 else if(c->cpuCaps & PP_CPU_CAPS_MMX) | 687 else if(c->cpuCaps & PP_CPU_CAPS_MMX) |
688 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 688 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
689 else | 689 else |
690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
691 #else | 691 #else |
692 #ifdef ARCH_POWERPC | 692 #ifdef ARCH_POWERPC |
693 #ifdef HAVE_ALTIVEC | 693 #ifdef HAVE_ALTIVEC |
694 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) | 694 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) |
695 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 695 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
696 else | 696 else |
697 #endif | 697 #endif |
698 #endif | 698 #endif |
699 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 699 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
700 #endif | 700 #endif |
701 #else //RUNTIME_CPUDETECT | 701 #else //RUNTIME_CPUDETECT |
702 #ifdef HAVE_MMX2 | 702 #ifdef HAVE_MMX2 |
703 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 703 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
704 #elif defined (HAVE_3DNOW) | 704 #elif defined (HAVE_3DNOW) |
705 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 705 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
706 #elif defined (HAVE_MMX) | 706 #elif defined (HAVE_MMX) |
707 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 707 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
708 #elif defined (HAVE_ALTIVEC) | 708 #elif defined (HAVE_ALTIVEC) |
709 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 709 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
710 #else | 710 #else |
711 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); | 711 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
712 #endif | 712 #endif |
713 #endif //!RUNTIME_CPUDETECT | 713 #endif //!RUNTIME_CPUDETECT |
714 } | 714 } |
715 | 715 |
716 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | 716 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
717 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); | 717 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); |
718 | 718 |
719 /* -pp Command line Help | 719 /* -pp Command line Help |
720 */ | 720 */ |
721 char *pp_help= | 721 char *pp_help= |
722 "Available postprocessing filters:\n" | 722 "Available postprocessing filters:\n" |
723 "Filters Options\n" | 723 "Filters Options\n" |
724 "short long name short long option Description\n" | 724 "short long name short long option Description\n" |
725 "* * a autoq CPU power dependent enabler\n" | 725 "* * a autoq CPU power dependent enabler\n" |
726 " c chrom chrominance filtering enabled\n" | 726 " c chrom chrominance filtering enabled\n" |
727 " y nochrom chrominance filtering disabled\n" | 727 " y nochrom chrominance filtering disabled\n" |
728 " n noluma luma filtering disabled\n" | 728 " n noluma luma filtering disabled\n" |
729 "hb hdeblock (2 threshold) horizontal deblocking filter\n" | 729 "hb hdeblock (2 threshold) horizontal deblocking filter\n" |
730 " 1. difference factor: default=32, higher -> more deblocking\n" | 730 " 1. difference factor: default=32, higher -> more deblocking\n" |
731 " 2. flatness threshold: default=39, lower -> more deblocking\n" | 731 " 2. flatness threshold: default=39, lower -> more deblocking\n" |
732 " the h & v deblocking filters share these\n" | 732 " the h & v deblocking filters share these\n" |
733 " so you can't set different thresholds for h / v\n" | 733 " so you can't set different thresholds for h / v\n" |
734 "vb vdeblock (2 threshold) vertical deblocking filter\n" | 734 "vb vdeblock (2 threshold) vertical deblocking filter\n" |
735 "ha hadeblock (2 threshold) horizontal deblocking filter\n" | 735 "ha hadeblock (2 threshold) horizontal deblocking filter\n" |
736 "va vadeblock (2 threshold) vertical deblocking filter\n" | 736 "va vadeblock (2 threshold) vertical deblocking filter\n" |
737 "h1 x1hdeblock experimental h deblock filter 1\n" | 737 "h1 x1hdeblock experimental h deblock filter 1\n" |
738 "v1 x1vdeblock experimental v deblock filter 1\n" | 738 "v1 x1vdeblock experimental v deblock filter 1\n" |
739 "dr dering deringing filter\n" | 739 "dr dering deringing filter\n" |
740 "al autolevels automatic brightness / contrast\n" | 740 "al autolevels automatic brightness / contrast\n" |
741 " f fullyrange stretch luminance to (0..255)\n" | 741 " f fullyrange stretch luminance to (0..255)\n" |
742 "lb linblenddeint linear blend deinterlacer\n" | 742 "lb linblenddeint linear blend deinterlacer\n" |
743 "li linipoldeint linear interpolating deinterlace\n" | 743 "li linipoldeint linear interpolating deinterlace\n" |
744 "ci cubicipoldeint cubic interpolating deinterlacer\n" | 744 "ci cubicipoldeint cubic interpolating deinterlacer\n" |
745 "md mediandeint median deinterlacer\n" | 745 "md mediandeint median deinterlacer\n" |
746 "fd ffmpegdeint ffmpeg deinterlacer\n" | 746 "fd ffmpegdeint ffmpeg deinterlacer\n" |
747 "l5 lowpass5 FIR lowpass deinterlacer\n" | 747 "l5 lowpass5 FIR lowpass deinterlacer\n" |
748 "de default hb:a,vb:a,dr:a\n" | 748 "de default hb:a,vb:a,dr:a\n" |
749 "fa fast h1:a,v1:a,dr:a\n" | 749 "fa fast h1:a,v1:a,dr:a\n" |
750 "ac ha:a:128:7,va:a,dr:a\n" | 750 "ac ha:a:128:7,va:a,dr:a\n" |
751 "tn tmpnoise (3 threshold) temporal noise reducer\n" | 751 "tn tmpnoise (3 threshold) temporal noise reducer\n" |
752 " 1. <= 2. <= 3. larger -> stronger filtering\n" | 752 " 1. <= 2. <= 3. larger -> stronger filtering\n" |
753 "fq forceQuant <quantizer> force quantizer\n" | 753 "fq forceQuant <quantizer> force quantizer\n" |
754 "Usage:\n" | 754 "Usage:\n" |
755 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" | 755 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" |
756 "long form example:\n" | 756 "long form example:\n" |
757 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" | 757 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" |
758 "short form example:\n" | 758 "short form example:\n" |
759 "vb:a/hb:a/lb de,-vb\n" | 759 "vb:a/hb:a/lb de,-vb\n" |
760 "more examples:\n" | 760 "more examples:\n" |
761 "tn:64:128:256\n" | 761 "tn:64:128:256\n" |
762 ; | 762 ; |
763 | 763 |
764 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) | 764 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) |
765 { | 765 { |
766 char temp[GET_MODE_BUFFER_SIZE]; | 766 char temp[GET_MODE_BUFFER_SIZE]; |
767 char *p= temp; | 767 char *p= temp; |
768 char *filterDelimiters= ",/"; | 768 char *filterDelimiters= ",/"; |
769 char *optionDelimiters= ":"; | 769 char *optionDelimiters= ":"; |
770 struct PPMode *ppMode; | 770 struct PPMode *ppMode; |
771 char *filterToken; | 771 char *filterToken; |
772 | 772 |
773 ppMode= memalign(8, sizeof(PPMode)); | 773 ppMode= memalign(8, sizeof(PPMode)); |
774 | 774 |
775 ppMode->lumMode= 0; | 775 ppMode->lumMode= 0; |
776 ppMode->chromMode= 0; | 776 ppMode->chromMode= 0; |
777 ppMode->maxTmpNoise[0]= 700; | 777 ppMode->maxTmpNoise[0]= 700; |
778 ppMode->maxTmpNoise[1]= 1500; | 778 ppMode->maxTmpNoise[1]= 1500; |
779 ppMode->maxTmpNoise[2]= 3000; | 779 ppMode->maxTmpNoise[2]= 3000; |
780 ppMode->maxAllowedY= 234; | 780 ppMode->maxAllowedY= 234; |
781 ppMode->minAllowedY= 16; | 781 ppMode->minAllowedY= 16; |
782 ppMode->baseDcDiff= 256/8; | 782 ppMode->baseDcDiff= 256/8; |
783 ppMode->flatnessThreshold= 56-16-1; | 783 ppMode->flatnessThreshold= 56-16-1; |
784 ppMode->maxClippedThreshold= 0.01; | 784 ppMode->maxClippedThreshold= 0.01; |
785 ppMode->error=0; | 785 ppMode->error=0; |
786 | 786 |
787 strncpy(temp, name, GET_MODE_BUFFER_SIZE); | 787 strncpy(temp, name, GET_MODE_BUFFER_SIZE); |
788 | 788 |
789 if(verbose>1) printf("pp: %s\n", name); | 789 if(verbose>1) printf("pp: %s\n", name); |
790 | 790 |
791 for(;;){ | 791 for(;;){ |
792 char *filterName; | 792 char *filterName; |
793 int q= 1000000; //PP_QUALITY_MAX; | 793 int q= 1000000; //PP_QUALITY_MAX; |
794 int chrom=-1; | 794 int chrom=-1; |
795 int luma=-1; | 795 int luma=-1; |
796 char *option; | 796 char *option; |
797 char *options[OPTIONS_ARRAY_SIZE]; | 797 char *options[OPTIONS_ARRAY_SIZE]; |
798 int i; | 798 int i; |
799 int filterNameOk=0; | 799 int filterNameOk=0; |
800 int numOfUnknownOptions=0; | 800 int numOfUnknownOptions=0; |
801 int enable=1; //does the user want us to enabled or disabled the filter | 801 int enable=1; //does the user want us to enabled or disabled the filter |
802 | 802 |
803 filterToken= strtok(p, filterDelimiters); | 803 filterToken= strtok(p, filterDelimiters); |
804 if(filterToken == NULL) break; | 804 if(filterToken == NULL) break; |
805 p+= strlen(filterToken) + 1; // p points to next filterToken | 805 p+= strlen(filterToken) + 1; // p points to next filterToken |
806 filterName= strtok(filterToken, optionDelimiters); | 806 filterName= strtok(filterToken, optionDelimiters); |
807 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName); | 807 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName); |
808 | 808 |
809 if(*filterName == '-') | 809 if(*filterName == '-') |
810 { | 810 { |
811 enable=0; | 811 enable=0; |
812 filterName++; | 812 filterName++; |
813 } | 813 } |
814 | 814 |
815 for(;;){ //for all options | 815 for(;;){ //for all options |
816 option= strtok(NULL, optionDelimiters); | 816 option= strtok(NULL, optionDelimiters); |
817 if(option == NULL) break; | 817 if(option == NULL) break; |
818 | 818 |
819 if(verbose>1) printf("pp: option: %s\n", option); | 819 if(verbose>1) printf("pp: option: %s\n", option); |
820 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; | 820 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; |
821 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; | 821 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; |
822 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; | 822 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; |
823 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; | 823 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; |
824 else | 824 else |
825 { | 825 { |
826 options[numOfUnknownOptions] = option; | 826 options[numOfUnknownOptions] = option; |
827 numOfUnknownOptions++; | 827 numOfUnknownOptions++; |
828 } | 828 } |
829 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; | 829 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; |
830 } | 830 } |
831 options[numOfUnknownOptions] = NULL; | 831 options[numOfUnknownOptions] = NULL; |
832 | 832 |
833 /* replace stuff from the replace Table */ | 833 /* replace stuff from the replace Table */ |
834 for(i=0; replaceTable[2*i]!=NULL; i++) | 834 for(i=0; replaceTable[2*i]!=NULL; i++) |
835 { | 835 { |
836 if(!strcmp(replaceTable[2*i], filterName)) | 836 if(!strcmp(replaceTable[2*i], filterName)) |
837 { | 837 { |
838 int newlen= strlen(replaceTable[2*i + 1]); | 838 int newlen= strlen(replaceTable[2*i + 1]); |
839 int plen; | 839 int plen; |
840 int spaceLeft; | 840 int spaceLeft; |
841 | 841 |
842 if(p==NULL) p= temp, *p=0; //last filter | 842 if(p==NULL) p= temp, *p=0; //last filter |
843 else p--, *p=','; //not last filter | 843 else p--, *p=','; //not last filter |
844 | 844 |
845 plen= strlen(p); | 845 plen= strlen(p); |
846 spaceLeft= p - temp + plen; | 846 spaceLeft= p - temp + plen; |
847 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE) | 847 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE) |
848 { | 848 { |
849 ppMode->error++; | 849 ppMode->error++; |
850 break; | 850 break; |
851 } | 851 } |
852 memmove(p + newlen, p, plen+1); | 852 memmove(p + newlen, p, plen+1); |
853 memcpy(p, replaceTable[2*i + 1], newlen); | 853 memcpy(p, replaceTable[2*i + 1], newlen); |
854 filterNameOk=1; | 854 filterNameOk=1; |
855 } | 855 } |
856 } | 856 } |
857 | 857 |
858 for(i=0; filters[i].shortName!=NULL; i++) | 858 for(i=0; filters[i].shortName!=NULL; i++) |
859 { | 859 { |
860 // printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName); | 860 // printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName); |
861 if( !strcmp(filters[i].longName, filterName) | 861 if( !strcmp(filters[i].longName, filterName) |
862 || !strcmp(filters[i].shortName, filterName)) | 862 || !strcmp(filters[i].shortName, filterName)) |
863 { | 863 { |
864 ppMode->lumMode &= ~filters[i].mask; | 864 ppMode->lumMode &= ~filters[i].mask; |
865 ppMode->chromMode &= ~filters[i].mask; | 865 ppMode->chromMode &= ~filters[i].mask; |
866 | 866 |
867 filterNameOk=1; | 867 filterNameOk=1; |
868 if(!enable) break; // user wants to disable it | 868 if(!enable) break; // user wants to disable it |
869 | 869 |
870 if(q >= filters[i].minLumQuality && luma) | 870 if(q >= filters[i].minLumQuality && luma) |
871 ppMode->lumMode|= filters[i].mask; | 871 ppMode->lumMode|= filters[i].mask; |
872 if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) | 872 if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) |
873 if(q >= filters[i].minChromQuality) | 873 if(q >= filters[i].minChromQuality) |
874 ppMode->chromMode|= filters[i].mask; | 874 ppMode->chromMode|= filters[i].mask; |
875 | 875 |
876 if(filters[i].mask == LEVEL_FIX) | 876 if(filters[i].mask == LEVEL_FIX) |
877 { | 877 { |
878 int o; | 878 int o; |
879 ppMode->minAllowedY= 16; | 879 ppMode->minAllowedY= 16; |
880 ppMode->maxAllowedY= 234; | 880 ppMode->maxAllowedY= 234; |
881 for(o=0; options[o]!=NULL; o++) | 881 for(o=0; options[o]!=NULL; o++) |
882 { | 882 { |
883 if( !strcmp(options[o],"fullyrange") | 883 if( !strcmp(options[o],"fullyrange") |
884 ||!strcmp(options[o],"f")) | 884 ||!strcmp(options[o],"f")) |
885 { | 885 { |
886 ppMode->minAllowedY= 0; | 886 ppMode->minAllowedY= 0; |
887 ppMode->maxAllowedY= 255; | 887 ppMode->maxAllowedY= 255; |
888 numOfUnknownOptions--; | 888 numOfUnknownOptions--; |
889 } | 889 } |
890 } | 890 } |
891 } | 891 } |
892 else if(filters[i].mask == TEMP_NOISE_FILTER) | 892 else if(filters[i].mask == TEMP_NOISE_FILTER) |
893 { | 893 { |
894 int o; | 894 int o; |
895 int numOfNoises=0; | 895 int numOfNoises=0; |
896 | 896 |
897 for(o=0; options[o]!=NULL; o++) | 897 for(o=0; options[o]!=NULL; o++) |
898 { | 898 { |
899 char *tail; | 899 char *tail; |
900 ppMode->maxTmpNoise[numOfNoises]= | 900 ppMode->maxTmpNoise[numOfNoises]= |
901 strtol(options[o], &tail, 0); | 901 strtol(options[o], &tail, 0); |
902 if(tail!=options[o]) | 902 if(tail!=options[o]) |
903 { | 903 { |
904 numOfNoises++; | 904 numOfNoises++; |
905 numOfUnknownOptions--; | 905 numOfUnknownOptions--; |
906 if(numOfNoises >= 3) break; | 906 if(numOfNoises >= 3) break; |
907 } | 907 } |
908 } | 908 } |
909 } | 909 } |
910 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK | 910 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK |
911 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK) | 911 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK) |
912 { | 912 { |
913 int o; | 913 int o; |
914 | 914 |
915 for(o=0; options[o]!=NULL && o<2; o++) | 915 for(o=0; options[o]!=NULL && o<2; o++) |
916 { | 916 { |
917 char *tail; | 917 char *tail; |
918 int val= strtol(options[o], &tail, 0); | 918 int val= strtol(options[o], &tail, 0); |
919 if(tail==options[o]) break; | 919 if(tail==options[o]) break; |
920 | 920 |
921 numOfUnknownOptions--; | 921 numOfUnknownOptions--; |
922 if(o==0) ppMode->baseDcDiff= val; | 922 if(o==0) ppMode->baseDcDiff= val; |
923 else ppMode->flatnessThreshold= val; | 923 else ppMode->flatnessThreshold= val; |
924 } | 924 } |
925 } | 925 } |
926 else if(filters[i].mask == FORCE_QUANT) | 926 else if(filters[i].mask == FORCE_QUANT) |
927 { | 927 { |
928 int o; | 928 int o; |
929 ppMode->forcedQuant= 15; | 929 ppMode->forcedQuant= 15; |
930 | 930 |
931 for(o=0; options[o]!=NULL && o<1; o++) | 931 for(o=0; options[o]!=NULL && o<1; o++) |
932 { | 932 { |
933 char *tail; | 933 char *tail; |
934 int val= strtol(options[o], &tail, 0); | 934 int val= strtol(options[o], &tail, 0); |
935 if(tail==options[o]) break; | 935 if(tail==options[o]) break; |
936 | 936 |
937 numOfUnknownOptions--; | 937 numOfUnknownOptions--; |
938 ppMode->forcedQuant= val; | 938 ppMode->forcedQuant= val; |
939 } | 939 } |
940 } | 940 } |
941 } | 941 } |
942 } | 942 } |
943 if(!filterNameOk) ppMode->error++; | 943 if(!filterNameOk) ppMode->error++; |
944 ppMode->error += numOfUnknownOptions; | 944 ppMode->error += numOfUnknownOptions; |
945 } | 945 } |
946 | 946 |
947 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode); | 947 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode); |
948 if(ppMode->error) | 948 if(ppMode->error) |
949 { | 949 { |
950 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name); | 950 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name); |
951 free(ppMode); | 951 free(ppMode); |
952 return NULL; | 952 return NULL; |
953 } | 953 } |
954 return ppMode; | 954 return ppMode; |
955 } | 955 } |
956 | 956 |
957 void pp_free_mode(pp_mode_t *mode){ | 957 void pp_free_mode(pp_mode_t *mode){ |
958 if(mode) free(mode); | 958 if(mode) free(mode); |
959 } | 959 } |
960 | 960 |
961 static void reallocAlign(void **p, int alignment, int size){ | 961 static void reallocAlign(void **p, int alignment, int size){ |
962 if(*p) free(*p); | 962 if(*p) free(*p); |
963 *p= memalign(alignment, size); | 963 *p= memalign(alignment, size); |
964 memset(*p, 0, size); | 964 memset(*p, 0, size); |
965 } | 965 } |
966 | 966 |
967 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){ | 967 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){ |
968 int mbWidth = (width+15)>>4; | 968 int mbWidth = (width+15)>>4; |
969 int mbHeight= (height+15)>>4; | 969 int mbHeight= (height+15)>>4; |
970 int i; | 970 int i; |
971 | 971 |
972 c->stride= stride; | 972 c->stride= stride; |
973 c->qpStride= qpStride; | 973 c->qpStride= qpStride; |
974 | 974 |
975 reallocAlign((void **)&c->tempDst, 8, stride*24); | 975 reallocAlign((void **)&c->tempDst, 8, stride*24); |
976 reallocAlign((void **)&c->tempSrc, 8, stride*24); | 976 reallocAlign((void **)&c->tempSrc, 8, stride*24); |
977 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); | 977 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); |
978 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); | 978 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); |
979 for(i=0; i<256; i++) | 979 for(i=0; i<256; i++) |
980 c->yHistogram[i]= width*height/64*15/256; | 980 c->yHistogram[i]= width*height/64*15/256; |
981 | 981 |
982 for(i=0; i<3; i++) | 982 for(i=0; i<3; i++) |
983 { | 983 { |
984 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end | 984 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end |
985 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024); | 985 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024); |
986 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size | 986 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size |
987 } | 987 } |
988 | 988 |
989 reallocAlign((void **)&c->deintTemp, 8, 2*width+32); | 989 reallocAlign((void **)&c->deintTemp, 8, 2*width+32); |
990 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); | 990 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); |
991 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); | 991 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); |
992 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T)); | 992 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T)); |
993 } | 993 } |
994 | 994 |
995 static void global_init(void){ | 995 static void global_init(void){ |
996 int i; | 996 int i; |
997 memset(clip_table, 0, 256); | 997 memset(clip_table, 0, 256); |
998 for(i=256; i<512; i++) | 998 for(i=256; i<512; i++) |
999 clip_table[i]= i; | 999 clip_table[i]= i; |
1000 memset(clip_table+512, 0, 256); | 1000 memset(clip_table+512, 0, 256); |
1001 } | 1001 } |
1002 | 1002 |
1003 pp_context_t *pp_get_context(int width, int height, int cpuCaps){ | 1003 pp_context_t *pp_get_context(int width, int height, int cpuCaps){ |
1004 PPContext *c= memalign(32, sizeof(PPContext)); | 1004 PPContext *c= memalign(32, sizeof(PPContext)); |
1005 int stride= (width+15)&(~15); //assumed / will realloc if needed | 1005 int stride= (width+15)&(~15); //assumed / will realloc if needed |
1006 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed | 1006 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed |
1007 | 1007 |
1008 global_init(); | 1008 global_init(); |
1009 | 1009 |
1010 memset(c, 0, sizeof(PPContext)); | 1010 memset(c, 0, sizeof(PPContext)); |
1011 c->cpuCaps= cpuCaps; | 1011 c->cpuCaps= cpuCaps; |
1012 if(cpuCaps&PP_FORMAT){ | 1012 if(cpuCaps&PP_FORMAT){ |
1013 c->hChromaSubSample= cpuCaps&0x3; | 1013 c->hChromaSubSample= cpuCaps&0x3; |
1014 c->vChromaSubSample= (cpuCaps>>4)&0x3; | 1014 c->vChromaSubSample= (cpuCaps>>4)&0x3; |
1015 }else{ | 1015 }else{ |
1016 c->hChromaSubSample= 1; | 1016 c->hChromaSubSample= 1; |
1017 c->vChromaSubSample= 1; | 1017 c->vChromaSubSample= 1; |
1018 } | 1018 } |
1019 | 1019 |
1020 reallocBuffers(c, width, height, stride, qpStride); | 1020 reallocBuffers(c, width, height, stride, qpStride); |
1021 | 1021 |
1022 c->frameNum=-1; | 1022 c->frameNum=-1; |
1023 | 1023 |
1024 return c; | 1024 return c; |
1025 } | 1025 } |
1026 | 1026 |
1027 void pp_free_context(void *vc){ | 1027 void pp_free_context(void *vc){ |
1028 PPContext *c = (PPContext*)vc; | 1028 PPContext *c = (PPContext*)vc; |
1029 int i; | 1029 int i; |
1030 | 1030 |
1031 for(i=0; i<3; i++) free(c->tempBlured[i]); | 1031 for(i=0; i<3; i++) free(c->tempBlured[i]); |
1032 for(i=0; i<3; i++) free(c->tempBluredPast[i]); | 1032 for(i=0; i<3; i++) free(c->tempBluredPast[i]); |
1033 | 1033 |
1034 free(c->tempBlocks); | 1034 free(c->tempBlocks); |
1035 free(c->yHistogram); | 1035 free(c->yHistogram); |
1036 free(c->tempDst); | 1036 free(c->tempDst); |
1037 free(c->tempSrc); | 1037 free(c->tempSrc); |
1038 free(c->deintTemp); | 1038 free(c->deintTemp); |
1039 free(c->stdQPTable); | 1039 free(c->stdQPTable); |
1040 free(c->nonBQPTable); | 1040 free(c->nonBQPTable); |
1041 free(c->forcedQPTable); | 1041 free(c->forcedQPTable); |
1042 | 1042 |
1043 memset(c, 0, sizeof(PPContext)); | 1043 memset(c, 0, sizeof(PPContext)); |
1044 | 1044 |
1045 free(c); | 1045 free(c); |
1046 } | 1046 } |
1047 | 1047 |
1048 void pp_postprocess(uint8_t * src[3], int srcStride[3], | 1048 void pp_postprocess(uint8_t * src[3], int srcStride[3], |
1049 uint8_t * dst[3], int dstStride[3], | 1049 uint8_t * dst[3], int dstStride[3], |
1050 int width, int height, | 1050 int width, int height, |
1051 QP_STORE_T *QP_store, int QPStride, | 1051 QP_STORE_T *QP_store, int QPStride, |
1052 pp_mode_t *vm, void *vc, int pict_type) | 1052 pp_mode_t *vm, void *vc, int pict_type) |
1053 { | 1053 { |
1054 int mbWidth = (width+15)>>4; | 1054 int mbWidth = (width+15)>>4; |
1055 int mbHeight= (height+15)>>4; | 1055 int mbHeight= (height+15)>>4; |
1056 PPMode *mode = (PPMode*)vm; | 1056 PPMode *mode = (PPMode*)vm; |
1057 PPContext *c = (PPContext*)vc; | 1057 PPContext *c = (PPContext*)vc; |
1058 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0])); | 1058 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0])); |
1059 int absQPStride = ABS(QPStride); | 1059 int absQPStride = ABS(QPStride); |
1060 | 1060 |
1061 // c->stride and c->QPStride are always positive | 1061 // c->stride and c->QPStride are always positive |
1062 if(c->stride < minStride || c->qpStride < absQPStride) | 1062 if(c->stride < minStride || c->qpStride < absQPStride) |
1063 reallocBuffers(c, width, height, | 1063 reallocBuffers(c, width, height, |
1064 MAX(minStride, c->stride), | 1064 MAX(minStride, c->stride), |
1065 MAX(c->qpStride, absQPStride)); | 1065 MAX(c->qpStride, absQPStride)); |
1066 | 1066 |
1067 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) | 1067 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) |
1068 { | 1068 { |
1069 int i; | 1069 int i; |
1070 QP_store= c->forcedQPTable; | 1070 QP_store= c->forcedQPTable; |
1071 absQPStride = QPStride = 0; | 1071 absQPStride = QPStride = 0; |
1072 if(mode->lumMode & FORCE_QUANT) | 1072 if(mode->lumMode & FORCE_QUANT) |
1073 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant; | 1073 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant; |
1074 else | 1074 else |
1075 for(i=0; i<mbWidth; i++) QP_store[i]= 1; | 1075 for(i=0; i<mbWidth; i++) QP_store[i]= 1; |
1076 } | 1076 } |
1077 //printf("pict_type:%d\n", pict_type); | 1077 //printf("pict_type:%d\n", pict_type); |
1078 | 1078 |
1079 if(pict_type & PP_PICT_TYPE_QP2){ | 1079 if(pict_type & PP_PICT_TYPE_QP2){ |
1080 int i; | 1080 int i; |
1081 const int count= mbHeight * absQPStride; | 1081 const int count= mbHeight * absQPStride; |
1082 for(i=0; i<(count>>2); i++){ | 1082 for(i=0; i<(count>>2); i++){ |
1083 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; | 1083 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; |
1084 } | 1084 } |
1085 for(i<<=2; i<count; i++){ | 1085 for(i<<=2; i<count; i++){ |
1086 c->stdQPTable[i] = QP_store[i]>>1; | 1086 c->stdQPTable[i] = QP_store[i]>>1; |
1087 } | 1087 } |
1088 QP_store= c->stdQPTable; | 1088 QP_store= c->stdQPTable; |
1089 QPStride= absQPStride; | 1089 QPStride= absQPStride; |
1090 } | 1090 } |
1091 | 1091 |
1092 if(0){ | 1092 if(0){ |
1093 int x,y; | 1093 int x,y; |
1094 for(y=0; y<mbHeight; y++){ | 1094 for(y=0; y<mbHeight; y++){ |
1095 for(x=0; x<mbWidth; x++){ | 1095 for(x=0; x<mbWidth; x++){ |
1096 printf("%2d ", QP_store[x + y*QPStride]); | 1096 printf("%2d ", QP_store[x + y*QPStride]); |
1097 } | 1097 } |
1098 printf("\n"); | 1098 printf("\n"); |
1099 } | 1099 } |
1100 printf("\n"); | 1100 printf("\n"); |
1101 } | 1101 } |
1102 | 1102 |
1103 if((pict_type&7)!=3) | 1103 if((pict_type&7)!=3) |
1104 { | 1104 { |
1105 if (QPStride >= 0) { | 1105 if (QPStride >= 0) { |
1106 int i; | 1106 int i; |
1107 const int count= mbHeight * QPStride; | 1107 const int count= mbHeight * QPStride; |
1108 for(i=0; i<(count>>2); i++){ | 1108 for(i=0; i<(count>>2); i++){ |
1109 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; | 1109 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; |
1110 } | 1110 } |
1111 for(i<<=2; i<count; i++){ | 1111 for(i<<=2; i<count; i++){ |
1112 c->nonBQPTable[i] = QP_store[i] & 0x3F; | 1112 c->nonBQPTable[i] = QP_store[i] & 0x3F; |
1113 } | 1113 } |
1114 } else { | 1114 } else { |
1115 int i,j; | 1115 int i,j; |
1116 for(i=0; i<mbHeight; i++) { | 1116 for(i=0; i<mbHeight; i++) { |
1117 for(j=0; j<absQPStride; j++) { | 1117 for(j=0; j<absQPStride; j++) { |
1118 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F; | 1118 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F; |
1119 } | 1119 } |
1120 } | 1120 } |
1121 } | 1121 } |
1122 } | 1122 } |
1123 | 1123 |
1124 if(verbose>2) | 1124 if(verbose>2) |
1125 { | 1125 { |
1126 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode); | 1126 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode); |
1127 } | 1127 } |
1128 | 1128 |
1129 postProcess(src[0], srcStride[0], dst[0], dstStride[0], | 1129 postProcess(src[0], srcStride[0], dst[0], dstStride[0], |
1130 width, height, QP_store, QPStride, 0, mode, c); | 1130 width, height, QP_store, QPStride, 0, mode, c); |
1131 | 1131 |
1132 width = (width )>>c->hChromaSubSample; | 1132 width = (width )>>c->hChromaSubSample; |
1133 height = (height)>>c->vChromaSubSample; | 1133 height = (height)>>c->vChromaSubSample; |
1134 | 1134 |
1135 if(mode->chromMode) | 1135 if(mode->chromMode) |
1136 { | 1136 { |
1137 postProcess(src[1], srcStride[1], dst[1], dstStride[1], | 1137 postProcess(src[1], srcStride[1], dst[1], dstStride[1], |
1138 width, height, QP_store, QPStride, 1, mode, c); | 1138 width, height, QP_store, QPStride, 1, mode, c); |
1139 postProcess(src[2], srcStride[2], dst[2], dstStride[2], | 1139 postProcess(src[2], srcStride[2], dst[2], dstStride[2], |
1140 width, height, QP_store, QPStride, 2, mode, c); | 1140 width, height, QP_store, QPStride, 2, mode, c); |
1141 } | 1141 } |
1142 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) | 1142 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) |
1143 { | 1143 { |
1144 linecpy(dst[1], src[1], height, srcStride[1]); | 1144 linecpy(dst[1], src[1], height, srcStride[1]); |
1145 linecpy(dst[2], src[2], height, srcStride[2]); | 1145 linecpy(dst[2], src[2], height, srcStride[2]); |
1146 } | 1146 } |
1147 else | 1147 else |
1148 { | 1148 { |
1149 int y; | 1149 int y; |
1150 for(y=0; y<height; y++) | 1150 for(y=0; y<height; y++) |
1151 { | 1151 { |
1152 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); | 1152 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); |
1153 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); | 1153 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); |
1154 } | 1154 } |
1155 } | 1155 } |
1156 } | 1156 } |
1157 | 1157 |