comparison libpostproc/postprocess.c @ 2979:bfabfdf9ce55 libavcodec

COSMETICS: tabs --> spaces, some prettyprinting
author diego
date Thu, 22 Dec 2005 01:10:11 +0000
parents ef2149182f1c
children 0b546eab515d
comparison
equal deleted inserted replaced
2978:403183bbb505 2979:bfabfdf9ce55
22 * @file postprocess.c 22 * @file postprocess.c
23 * postprocessing. 23 * postprocessing.
24 */ 24 */
25 25
26 /* 26 /*
27 C MMX MMX2 3DNow AltiVec 27 C MMX MMX2 3DNow AltiVec
28 isVertDC Ec Ec Ec 28 isVertDC Ec Ec Ec
29 isVertMinMaxOk Ec Ec Ec 29 isVertMinMaxOk Ec Ec Ec
30 doVertLowPass E e e Ec 30 doVertLowPass E e e Ec
31 doVertDefFilter Ec Ec e e Ec 31 doVertDefFilter Ec Ec e e Ec
32 isHorizDC Ec Ec Ec 32 isHorizDC Ec Ec Ec
33 isHorizMinMaxOk a E Ec 33 isHorizMinMaxOk a E Ec
34 doHorizLowPass E e e Ec 34 doHorizLowPass E e e Ec
35 doHorizDefFilter Ec Ec e e Ec 35 doHorizDefFilter Ec Ec e e Ec
36 do_a_deblock Ec E Ec E 36 do_a_deblock Ec E Ec E
37 deRing E e e* Ecp 37 deRing E e e* Ecp
38 Vertical RKAlgo1 E a a 38 Vertical RKAlgo1 E a a
39 Horizontal RKAlgo1 a a 39 Horizontal RKAlgo1 a a
40 Vertical X1# a E E 40 Vertical X1# a E E
41 Horizontal X1# a E E 41 Horizontal X1# a E E
42 LinIpolDeinterlace e E E* 42 LinIpolDeinterlace e E E*
43 CubicIpolDeinterlace a e e* 43 CubicIpolDeinterlace a e e*
44 LinBlendDeinterlace e E E* 44 LinBlendDeinterlace e E E*
45 MedianDeinterlace# E Ec Ec 45 MedianDeinterlace# E Ec Ec
46 TempDeNoiser# E e e Ec 46 TempDeNoiser# E e e Ec
47 47
48 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work 48 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
49 # more or less selfinvented filters so the exactness isnt too meaningfull 49 # more or less selfinvented filters so the exactness isnt too meaningfull
50 E = Exact implementation 50 E = Exact implementation
51 e = allmost exact implementation (slightly different rounding,...) 51 e = allmost exact implementation (slightly different rounding,...)
59 reduce the time wasted on the mem transfer 59 reduce the time wasted on the mem transfer
60 unroll stuff if instructions depend too much on the prior one 60 unroll stuff if instructions depend too much on the prior one
61 move YScale thing to the end instead of fixing QP 61 move YScale thing to the end instead of fixing QP
62 write a faster and higher quality deblocking filter :) 62 write a faster and higher quality deblocking filter :)
63 make the mainloop more flexible (variable number of blocks at once 63 make the mainloop more flexible (variable number of blocks at once
64 (the if/else stuff per block is slowing things down) 64 (the if/else stuff per block is slowing things down)
65 compare the quality & speed of all filters 65 compare the quality & speed of all filters
66 split this huge file 66 split this huge file
67 optimize c versions 67 optimize c versions
68 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks 68 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
69 ... 69 ...
118 # define attribute_used 118 # define attribute_used
119 # define always_inline inline 119 # define always_inline inline
120 #endif 120 #endif
121 121
122 #if defined(ARCH_X86) || defined(ARCH_X86_64) 122 #if defined(ARCH_X86) || defined(ARCH_X86_64)
123 static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL; 123 static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
124 static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL; 124 static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
125 static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL; 125 static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
126 static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL; 126 static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
127 static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL; 127 static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
128 static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL; 128 static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
129 static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL; 129 static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
130 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL; 130 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
131 #endif 131 #endif
132 132
133 static uint8_t clip_table[3*256]; 133 static uint8_t clip_table[3*256];
134 static uint8_t * const clip_tab= clip_table + 256; 134 static uint8_t * const clip_tab= clip_table + 256;
135 135
138 static const int attribute_used deringThreshold= 20; 138 static const int attribute_used deringThreshold= 20;
139 139
140 140
141 static struct PPFilter filters[]= 141 static struct PPFilter filters[]=
142 { 142 {
143 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, 143 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
144 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, 144 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
145 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER}, 145 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
146 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ 146 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
147 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, 147 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
148 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, 148 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
149 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, 149 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
150 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, 150 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
151 {"dr", "dering", 1, 5, 6, DERING}, 151 {"dr", "dering", 1, 5, 6, DERING},
152 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, 152 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
153 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, 153 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
154 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, 154 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
155 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, 155 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
156 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, 156 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
157 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, 157 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
158 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, 158 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
159 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, 159 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
160 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, 160 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
161 {NULL, NULL,0,0,0,0} //End Marker 161 {NULL, NULL,0,0,0,0} //End Marker
162 }; 162 };
163 163
164 static char *replaceTable[]= 164 static char *replaceTable[]=
165 { 165 {
166 "default", "hdeblock:a,vdeblock:a,dering:a", 166 "default", "hdeblock:a,vdeblock:a,dering:a",
167 "de", "hdeblock:a,vdeblock:a,dering:a", 167 "de", "hdeblock:a,vdeblock:a,dering:a",
168 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a", 168 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
169 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a", 169 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
170 "ac", "ha:a:128:7,va:a,dering:a", 170 "ac", "ha:a:128:7,va:a,dering:a",
171 NULL //End Marker 171 NULL //End Marker
172 }; 172 };
173 173
174 174
175 #if defined(ARCH_X86) || defined(ARCH_X86_64) 175 #if defined(ARCH_X86) || defined(ARCH_X86_64)
176 static inline void prefetchnta(void *p) 176 static inline void prefetchnta(void *p)
177 { 177 {
178 asm volatile( "prefetchnta (%0)\n\t" 178 asm volatile( "prefetchnta (%0)\n\t"
179 : : "r" (p) 179 : : "r" (p)
180 ); 180 );
181 } 181 }
182 182
183 static inline void prefetcht0(void *p) 183 static inline void prefetcht0(void *p)
184 { 184 {
185 asm volatile( "prefetcht0 (%0)\n\t" 185 asm volatile( "prefetcht0 (%0)\n\t"
186 : : "r" (p) 186 : : "r" (p)
187 ); 187 );
188 } 188 }
189 189
190 static inline void prefetcht1(void *p) 190 static inline void prefetcht1(void *p)
191 { 191 {
192 asm volatile( "prefetcht1 (%0)\n\t" 192 asm volatile( "prefetcht1 (%0)\n\t"
193 : : "r" (p) 193 : : "r" (p)
194 ); 194 );
195 } 195 }
196 196
197 static inline void prefetcht2(void *p) 197 static inline void prefetcht2(void *p)
198 { 198 {
199 asm volatile( "prefetcht2 (%0)\n\t" 199 asm volatile( "prefetcht2 (%0)\n\t"
200 : : "r" (p) 200 : : "r" (p)
201 ); 201 );
202 } 202 }
203 #endif 203 #endif
204 204
205 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing 205 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
206 206
207 /** 207 /**
208 * Check if the given 8x8 Block is mostly "flat" 208 * Check if the given 8x8 Block is mostly "flat"
209 */ 209 */
210 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) 210 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
211 { 211 {
212 int numEq= 0; 212 int numEq= 0;
213 int y; 213 int y;
214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; 214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
215 const int dcThreshold= dcOffset*2 + 1; 215 const int dcThreshold= dcOffset*2 + 1;
216 216
217 for(y=0; y<BLOCK_SIZE; y++) 217 for(y=0; y<BLOCK_SIZE; y++)
218 { 218 {
219 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; 219 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
220 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; 220 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
221 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; 221 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
222 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++; 222 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
223 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++; 223 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
224 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++; 224 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++; 225 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
226 src+= stride; 226 src+= stride;
227 } 227 }
228 return numEq > c->ppMode.flatnessThreshold; 228 return numEq > c->ppMode.flatnessThreshold;
229 } 229 }
230 230
231 /** 231 /**
232 * Check if the middle 8x8 Block in the given 8x16 block is flat 232 * Check if the middle 8x8 Block in the given 8x16 block is flat
233 */ 233 */
234 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ 234 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
235 int numEq= 0; 235 int numEq= 0;
236 int y; 236 int y;
237 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; 237 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
238 const int dcThreshold= dcOffset*2 + 1; 238 const int dcThreshold= dcOffset*2 + 1;
239 239
240 src+= stride*4; // src points to begin of the 8x8 Block 240 src+= stride*4; // src points to begin of the 8x8 Block
241 for(y=0; y<BLOCK_SIZE-1; y++) 241 for(y=0; y<BLOCK_SIZE-1; y++)
242 { 242 {
243 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; 243 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
244 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; 244 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
245 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; 245 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
246 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++; 246 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
247 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++; 247 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
248 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++; 248 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
249 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++; 249 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
250 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++; 250 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
251 src+= stride; 251 src+= stride;
252 } 252 }
253 return numEq > c->ppMode.flatnessThreshold; 253 return numEq > c->ppMode.flatnessThreshold;
254 } 254 }
255 255
256 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) 256 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
257 { 257 {
258 int i; 258 int i;
259 #if 1 259 #if 1
260 for(i=0; i<2; i++){ 260 for(i=0; i<2; i++){
261 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; 261 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
262 src += stride; 262 src += stride;
263 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; 263 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
264 src += stride; 264 src += stride;
265 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; 265 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
266 src += stride; 266 src += stride;
267 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; 267 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
268 src += stride; 268 src += stride;
269 } 269 }
270 #else 270 #else
271 for(i=0; i<8; i++){ 271 for(i=0; i<8; i++){
272 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; 272 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
273 src += stride; 273 src += stride;
274 } 274 }
275 #endif 275 #endif
276 return 1; 276 return 1;
277 } 277 }
278 278
279 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) 279 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
280 { 280 {
281 #if 1 281 #if 1
282 #if 1 282 #if 1
283 int x; 283 int x;
284 src+= stride*4; 284 src+= stride*4;
285 for(x=0; x<BLOCK_SIZE; x+=4) 285 for(x=0; x<BLOCK_SIZE; x+=4)
286 { 286 {
287 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; 287 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
288 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; 288 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
289 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; 289 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
290 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; 290 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
291 } 291 }
292 #else 292 #else
293 int x; 293 int x;
294 src+= stride*3; 294 src+= stride*3;
295 for(x=0; x<BLOCK_SIZE; x++) 295 for(x=0; x<BLOCK_SIZE; x++)
296 { 296 {
297 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; 297 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
298 } 298 }
299 #endif 299 #endif
300 return 1; 300 return 1;
301 #else 301 #else
302 int x; 302 int x;
303 src+= stride*4; 303 src+= stride*4;
304 for(x=0; x<BLOCK_SIZE; x++) 304 for(x=0; x<BLOCK_SIZE; x++)
305 { 305 {
306 int min=255; 306 int min=255;
307 int max=0; 307 int max=0;
308 int y; 308 int y;
309 for(y=0; y<8; y++){ 309 for(y=0; y<8; y++){
310 int v= src[x + y*stride]; 310 int v= src[x + y*stride];
311 if(v>max) max=v; 311 if(v>max) max=v;
312 if(v<min) min=v; 312 if(v<min) min=v;
313 } 313 }
314 if(max-min > 2*QP) return 0; 314 if(max-min > 2*QP) return 0;
315 } 315 }
316 return 1; 316 return 1;
317 #endif 317 #endif
318 } 318 }
319 319
320 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ 320 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
321 if( isHorizDC_C(src, stride, c) ){ 321 if( isHorizDC_C(src, stride, c) ){
322 if( isHorizMinMaxOk_C(src, stride, c->QP) ) 322 if( isHorizMinMaxOk_C(src, stride, c->QP) )
323 return 1; 323 return 1;
324 else 324 else
325 return 0; 325 return 0;
326 }else{ 326 }else{
327 return 2; 327 return 2;
328 } 328 }
329 } 329 }
330 330
331 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ 331 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
332 if( isVertDC_C(src, stride, c) ){ 332 if( isVertDC_C(src, stride, c) ){
333 if( isVertMinMaxOk_C(src, stride, c->QP) ) 333 if( isVertMinMaxOk_C(src, stride, c->QP) )
334 return 1; 334 return 1;
335 else 335 else
336 return 0; 336 return 0;
337 }else{ 337 }else{
338 return 2; 338 return 2;
339 } 339 }
340 } 340 }
341 341
342 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) 342 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
343 { 343 {
344 int y; 344 int y;
345 for(y=0; y<BLOCK_SIZE; y++) 345 for(y=0; y<BLOCK_SIZE; y++)
346 { 346 {
347 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); 347 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
348 348
349 if(ABS(middleEnergy) < 8*c->QP) 349 if(ABS(middleEnergy) < 8*c->QP)
350 { 350 {
351 const int q=(dst[3] - dst[4])/2; 351 const int q=(dst[3] - dst[4])/2;
352 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); 352 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
353 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); 353 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
354 354
355 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); 355 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
356 d= MAX(d, 0); 356 d= MAX(d, 0);
357 357
358 d= (5*d + 32) >> 6; 358 d= (5*d + 32) >> 6;
359 d*= SIGN(-middleEnergy); 359 d*= SIGN(-middleEnergy);
360 360
361 if(q>0) 361 if(q>0)
362 { 362 {
363 d= d<0 ? 0 : d; 363 d= d<0 ? 0 : d;
364 d= d>q ? q : d; 364 d= d>q ? q : d;
365 } 365 }
366 else 366 else
367 { 367 {
368 d= d>0 ? 0 : d; 368 d= d>0 ? 0 : d;
369 d= d<q ? q : d; 369 d= d<q ? q : d;
370 } 370 }
371 371
372 dst[3]-= d; 372 dst[3]-= d;
373 dst[4]+= d; 373 dst[4]+= d;
374 } 374 }
375 dst+= stride; 375 dst+= stride;
376 } 376 }
377 } 377 }
378 378
379 /** 379 /**
380 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) 380 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
381 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) 381 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
382 */ 382 */
383 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) 383 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
384 { 384 {
385 int y; 385 int y;
386 for(y=0; y<BLOCK_SIZE; y++) 386 for(y=0; y<BLOCK_SIZE; y++)
387 { 387 {
388 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; 388 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
389 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; 389 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
390 390
391 int sums[10]; 391 int sums[10];
392 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; 392 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
393 sums[1] = sums[0] - first + dst[3]; 393 sums[1] = sums[0] - first + dst[3];
394 sums[2] = sums[1] - first + dst[4]; 394 sums[2] = sums[1] - first + dst[4];
395 sums[3] = sums[2] - first + dst[5]; 395 sums[3] = sums[2] - first + dst[5];
396 sums[4] = sums[3] - first + dst[6]; 396 sums[4] = sums[3] - first + dst[6];
397 sums[5] = sums[4] - dst[0] + dst[7]; 397 sums[5] = sums[4] - dst[0] + dst[7];
398 sums[6] = sums[5] - dst[1] + last; 398 sums[6] = sums[5] - dst[1] + last;
399 sums[7] = sums[6] - dst[2] + last; 399 sums[7] = sums[6] - dst[2] + last;
400 sums[8] = sums[7] - dst[3] + last; 400 sums[8] = sums[7] - dst[3] + last;
401 sums[9] = sums[8] - dst[4] + last; 401 sums[9] = sums[8] - dst[4] + last;
402 402
403 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; 403 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
404 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; 404 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
405 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; 405 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
406 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; 406 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
407 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; 407 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
408 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; 408 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
409 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; 409 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
410 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; 410 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
411 411
412 dst+= stride; 412 dst+= stride;
413 } 413 }
414 } 414 }
415 415
416 /** 416 /**
417 * Experimental Filter 1 (Horizontal) 417 * Experimental Filter 1 (Horizontal)
418 * will not damage linear gradients 418 * will not damage linear gradients
421 * MMX2 version does correct clipping C version doesnt 421 * MMX2 version does correct clipping C version doesnt
422 * not identical with the vertical one 422 * not identical with the vertical one
423 */ 423 */
424 static inline void horizX1Filter(uint8_t *src, int stride, int QP) 424 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
425 { 425 {
426 int y; 426 int y;
427 static uint64_t *lut= NULL; 427 static uint64_t *lut= NULL;
428 if(lut==NULL) 428 if(lut==NULL)
429 { 429 {
430 int i; 430 int i;
431 lut= (uint64_t*)memalign(8, 256*8); 431 lut= (uint64_t*)memalign(8, 256*8);
432 for(i=0; i<256; i++) 432 for(i=0; i<256; i++)
433 { 433 {
434 int v= i < 128 ? 2*i : 2*(i-256); 434 int v= i < 128 ? 2*i : 2*(i-256);
435 /* 435 /*
436 //Simulate 112242211 9-Tap filter 436 //Simulate 112242211 9-Tap filter
437 uint64_t a= (v/16) & 0xFF; 437 uint64_t a= (v/16) & 0xFF;
438 uint64_t b= (v/8) & 0xFF; 438 uint64_t b= (v/8) & 0xFF;
439 uint64_t c= (v/4) & 0xFF; 439 uint64_t c= (v/4) & 0xFF;
440 uint64_t d= (3*v/8) & 0xFF; 440 uint64_t d= (3*v/8) & 0xFF;
441 */ 441 */
442 //Simulate piecewise linear interpolation 442 //Simulate piecewise linear interpolation
443 uint64_t a= (v/16) & 0xFF; 443 uint64_t a= (v/16) & 0xFF;
444 uint64_t b= (v*3/16) & 0xFF; 444 uint64_t b= (v*3/16) & 0xFF;
445 uint64_t c= (v*5/16) & 0xFF; 445 uint64_t c= (v*5/16) & 0xFF;
446 uint64_t d= (7*v/16) & 0xFF; 446 uint64_t d= (7*v/16) & 0xFF;
447 uint64_t A= (0x100 - a)&0xFF; 447 uint64_t A= (0x100 - a)&0xFF;
448 uint64_t B= (0x100 - b)&0xFF; 448 uint64_t B= (0x100 - b)&0xFF;
449 uint64_t C= (0x100 - c)&0xFF; 449 uint64_t C= (0x100 - c)&0xFF;
450 uint64_t D= (0x100 - c)&0xFF; 450 uint64_t D= (0x100 - c)&0xFF;
451 451
452 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | 452 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
453 (D<<24) | (C<<16) | (B<<8) | (A); 453 (D<<24) | (C<<16) | (B<<8) | (A);
454 //lut[i] = (v<<32) | (v<<24); 454 //lut[i] = (v<<32) | (v<<24);
455 } 455 }
456 } 456 }
457 457
458 for(y=0; y<BLOCK_SIZE; y++) 458 for(y=0; y<BLOCK_SIZE; y++)
459 { 459 {
460 int a= src[1] - src[2]; 460 int a= src[1] - src[2];
461 int b= src[3] - src[4]; 461 int b= src[3] - src[4];
462 int c= src[5] - src[6]; 462 int c= src[5] - src[6];
463 463
464 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0); 464 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
465 465
466 if(d < QP) 466 if(d < QP)
467 { 467 {
468 int v = d * SIGN(-b); 468 int v = d * SIGN(-b);
469 469
470 src[1] +=v/8; 470 src[1] +=v/8;
471 src[2] +=v/4; 471 src[2] +=v/4;
472 src[3] +=3*v/8; 472 src[3] +=3*v/8;
473 src[4] -=3*v/8; 473 src[4] -=3*v/8;
474 src[5] -=v/4; 474 src[5] -=v/4;
475 src[6] -=v/8; 475 src[6] -=v/8;
476 476
477 } 477 }
478 src+=stride; 478 src+=stride;
479 } 479 }
480 } 480 }
481 481
482 /** 482 /**
483 * accurate deblock filter 483 * accurate deblock filter
484 */ 484 */
485 static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ 485 static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
486 int y; 486 int y;
487 const int QP= c->QP; 487 const int QP= c->QP;
488 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; 488 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
489 const int dcThreshold= dcOffset*2 + 1; 489 const int dcThreshold= dcOffset*2 + 1;
490 //START_TIMER 490 //START_TIMER
491 src+= step*4; // src points to begin of the 8x8 Block 491 src+= step*4; // src points to begin of the 8x8 Block
492 for(y=0; y<8; y++){ 492 for(y=0; y<8; y++){
493 int numEq= 0; 493 int numEq= 0;
494 494
495 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; 495 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
496 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; 496 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
497 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; 497 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
498 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; 498 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
499 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; 499 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
500 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; 500 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
501 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; 501 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
502 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; 502 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
503 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; 503 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
504 if(numEq > c->ppMode.flatnessThreshold){ 504 if(numEq > c->ppMode.flatnessThreshold){
505 int min, max, x; 505 int min, max, x;
506 506
507 if(src[0] > src[step]){ 507 if(src[0] > src[step]){
508 max= src[0]; 508 max= src[0];
509 min= src[step]; 509 min= src[step];
510 }else{ 510 }else{
511 max= src[step]; 511 max= src[step];
512 min= src[0]; 512 min= src[0];
513 } 513 }
514 for(x=2; x<8; x+=2){ 514 for(x=2; x<8; x+=2){
515 if(src[x*step] > src[(x+1)*step]){ 515 if(src[x*step] > src[(x+1)*step]){
516 if(src[x *step] > max) max= src[ x *step]; 516 if(src[x *step] > max) max= src[ x *step];
517 if(src[(x+1)*step] < min) min= src[(x+1)*step]; 517 if(src[(x+1)*step] < min) min= src[(x+1)*step];
518 }else{ 518 }else{
519 if(src[(x+1)*step] > max) max= src[(x+1)*step]; 519 if(src[(x+1)*step] > max) max= src[(x+1)*step];
520 if(src[ x *step] < min) min= src[ x *step]; 520 if(src[ x *step] < min) min= src[ x *step];
521 } 521 }
522 } 522 }
523 if(max-min < 2*QP){ 523 if(max-min < 2*QP){
524 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; 524 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
525 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; 525 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
526 526
527 int sums[10]; 527 int sums[10];
528 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; 528 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
529 sums[1] = sums[0] - first + src[3*step]; 529 sums[1] = sums[0] - first + src[3*step];
530 sums[2] = sums[1] - first + src[4*step]; 530 sums[2] = sums[1] - first + src[4*step];
531 sums[3] = sums[2] - first + src[5*step]; 531 sums[3] = sums[2] - first + src[5*step];
532 sums[4] = sums[3] - first + src[6*step]; 532 sums[4] = sums[3] - first + src[6*step];
533 sums[5] = sums[4] - src[0*step] + src[7*step]; 533 sums[5] = sums[4] - src[0*step] + src[7*step];
534 sums[6] = sums[5] - src[1*step] + last; 534 sums[6] = sums[5] - src[1*step] + last;
535 sums[7] = sums[6] - src[2*step] + last; 535 sums[7] = sums[6] - src[2*step] + last;
536 sums[8] = sums[7] - src[3*step] + last; 536 sums[8] = sums[7] - src[3*step] + last;
537 sums[9] = sums[8] - src[4*step] + last; 537 sums[9] = sums[8] - src[4*step] + last;
538 538
539 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; 539 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
540 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; 540 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
541 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; 541 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
542 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; 542 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
543 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; 543 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
544 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; 544 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
545 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; 545 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
546 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; 546 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
547 } 547 }
548 }else{ 548 }else{
549 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); 549 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
550 550
551 if(ABS(middleEnergy) < 8*QP) 551 if(ABS(middleEnergy) < 8*QP)
552 { 552 {
553 const int q=(src[3*step] - src[4*step])/2; 553 const int q=(src[3*step] - src[4*step])/2;
554 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); 554 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
555 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); 555 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
556 556
557 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); 557 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
558 d= MAX(d, 0); 558 d= MAX(d, 0);
559 559
560 d= (5*d + 32) >> 6; 560 d= (5*d + 32) >> 6;
561 d*= SIGN(-middleEnergy); 561 d*= SIGN(-middleEnergy);
562 562
563 if(q>0) 563 if(q>0)
564 { 564 {
565 d= d<0 ? 0 : d; 565 d= d<0 ? 0 : d;
566 d= d>q ? q : d; 566 d= d>q ? q : d;
567 } 567 }
568 else 568 else
569 { 569 {
570 d= d>0 ? 0 : d; 570 d= d>0 ? 0 : d;
571 d= d<q ? q : d; 571 d= d<q ? q : d;
572 } 572 }
573 573
574 src[3*step]-= d; 574 src[3*step]-= d;
575 src[4*step]+= d; 575 src[4*step]+= d;
576 } 576 }
577 } 577 }
578 578
579 src += stride; 579 src += stride;
580 } 580 }
581 /*if(step==16){ 581 /*if(step==16){
582 STOP_TIMER("step16") 582 STOP_TIMER("step16")
583 }else{ 583 }else{
584 STOP_TIMER("stepX") 584 STOP_TIMER("stepX")
585 }*/ 585 }*/
666 #endif 666 #endif
667 667
668 // minor note: the HAVE_xyz is messed up after that line so dont use it 668 // minor note: the HAVE_xyz is messed up after that line so dont use it
669 669
670 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, 670 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
671 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc) 671 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
672 { 672 {
673 PPContext *c= (PPContext *)vc; 673 PPContext *c= (PPContext *)vc;
674 PPMode *ppMode= (PPMode *)vm; 674 PPMode *ppMode= (PPMode *)vm;
675 c->ppMode= *ppMode; //FIXME 675 c->ppMode= *ppMode; //FIXME
676 676
677 // useing ifs here as they are faster than function pointers allthough the 677 // useing ifs here as they are faster than function pointers allthough the
678 // difference wouldnt be messureable here but its much better because 678 // difference wouldnt be messureable here but its much better because
679 // someone might exchange the cpu whithout restarting mplayer ;) 679 // someone might exchange the cpu whithout restarting mplayer ;)
680 #ifdef RUNTIME_CPUDETECT 680 #ifdef RUNTIME_CPUDETECT
681 #if defined(ARCH_X86) || defined(ARCH_X86_64) 681 #if defined(ARCH_X86) || defined(ARCH_X86_64)
682 // ordered per speed fasterst first 682 // ordered per speed fasterst first
683 if(c->cpuCaps & PP_CPU_CAPS_MMX2) 683 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
684 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 684 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) 685 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
686 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 686 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687 else if(c->cpuCaps & PP_CPU_CAPS_MMX) 687 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
688 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 688 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689 else 689 else
690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
691 #else 691 #else
692 #ifdef ARCH_POWERPC 692 #ifdef ARCH_POWERPC
693 #ifdef HAVE_ALTIVEC 693 #ifdef HAVE_ALTIVEC
694 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) 694 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
695 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 695 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
696 else 696 else
697 #endif 697 #endif
698 #endif 698 #endif
699 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 699 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
700 #endif 700 #endif
701 #else //RUNTIME_CPUDETECT 701 #else //RUNTIME_CPUDETECT
702 #ifdef HAVE_MMX2 702 #ifdef HAVE_MMX2
703 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 703 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
704 #elif defined (HAVE_3DNOW) 704 #elif defined (HAVE_3DNOW)
705 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 705 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
706 #elif defined (HAVE_MMX) 706 #elif defined (HAVE_MMX)
707 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 707 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
708 #elif defined (HAVE_ALTIVEC) 708 #elif defined (HAVE_ALTIVEC)
709 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 709 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
710 #else 710 #else
711 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 711 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
712 #endif 712 #endif
713 #endif //!RUNTIME_CPUDETECT 713 #endif //!RUNTIME_CPUDETECT
714 } 714 }
715 715
716 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, 716 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
717 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); 717 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
718 718
719 /* -pp Command line Help 719 /* -pp Command line Help
720 */ 720 */
721 char *pp_help= 721 char *pp_help=
722 "Available postprocessing filters:\n" 722 "Available postprocessing filters:\n"
723 "Filters Options\n" 723 "Filters Options\n"
724 "short long name short long option Description\n" 724 "short long name short long option Description\n"
725 "* * a autoq CPU power dependent enabler\n" 725 "* * a autoq CPU power dependent enabler\n"
726 " c chrom chrominance filtering enabled\n" 726 " c chrom chrominance filtering enabled\n"
727 " y nochrom chrominance filtering disabled\n" 727 " y nochrom chrominance filtering disabled\n"
728 " n noluma luma filtering disabled\n" 728 " n noluma luma filtering disabled\n"
729 "hb hdeblock (2 threshold) horizontal deblocking filter\n" 729 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
730 " 1. difference factor: default=32, higher -> more deblocking\n" 730 " 1. difference factor: default=32, higher -> more deblocking\n"
731 " 2. flatness threshold: default=39, lower -> more deblocking\n" 731 " 2. flatness threshold: default=39, lower -> more deblocking\n"
732 " the h & v deblocking filters share these\n" 732 " the h & v deblocking filters share these\n"
733 " so you can't set different thresholds for h / v\n" 733 " so you can't set different thresholds for h / v\n"
734 "vb vdeblock (2 threshold) vertical deblocking filter\n" 734 "vb vdeblock (2 threshold) vertical deblocking filter\n"
735 "ha hadeblock (2 threshold) horizontal deblocking filter\n" 735 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
736 "va vadeblock (2 threshold) vertical deblocking filter\n" 736 "va vadeblock (2 threshold) vertical deblocking filter\n"
737 "h1 x1hdeblock experimental h deblock filter 1\n" 737 "h1 x1hdeblock experimental h deblock filter 1\n"
738 "v1 x1vdeblock experimental v deblock filter 1\n" 738 "v1 x1vdeblock experimental v deblock filter 1\n"
739 "dr dering deringing filter\n" 739 "dr dering deringing filter\n"
740 "al autolevels automatic brightness / contrast\n" 740 "al autolevels automatic brightness / contrast\n"
741 " f fullyrange stretch luminance to (0..255)\n" 741 " f fullyrange stretch luminance to (0..255)\n"
742 "lb linblenddeint linear blend deinterlacer\n" 742 "lb linblenddeint linear blend deinterlacer\n"
743 "li linipoldeint linear interpolating deinterlace\n" 743 "li linipoldeint linear interpolating deinterlace\n"
744 "ci cubicipoldeint cubic interpolating deinterlacer\n" 744 "ci cubicipoldeint cubic interpolating deinterlacer\n"
745 "md mediandeint median deinterlacer\n" 745 "md mediandeint median deinterlacer\n"
746 "fd ffmpegdeint ffmpeg deinterlacer\n" 746 "fd ffmpegdeint ffmpeg deinterlacer\n"
747 "l5 lowpass5 FIR lowpass deinterlacer\n" 747 "l5 lowpass5 FIR lowpass deinterlacer\n"
748 "de default hb:a,vb:a,dr:a\n" 748 "de default hb:a,vb:a,dr:a\n"
749 "fa fast h1:a,v1:a,dr:a\n" 749 "fa fast h1:a,v1:a,dr:a\n"
750 "ac ha:a:128:7,va:a,dr:a\n" 750 "ac ha:a:128:7,va:a,dr:a\n"
751 "tn tmpnoise (3 threshold) temporal noise reducer\n" 751 "tn tmpnoise (3 threshold) temporal noise reducer\n"
752 " 1. <= 2. <= 3. larger -> stronger filtering\n" 752 " 1. <= 2. <= 3. larger -> stronger filtering\n"
753 "fq forceQuant <quantizer> force quantizer\n" 753 "fq forceQuant <quantizer> force quantizer\n"
754 "Usage:\n" 754 "Usage:\n"
755 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" 755 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
756 "long form example:\n" 756 "long form example:\n"
757 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" 757 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
758 "short form example:\n" 758 "short form example:\n"
759 "vb:a/hb:a/lb de,-vb\n" 759 "vb:a/hb:a/lb de,-vb\n"
760 "more examples:\n" 760 "more examples:\n"
761 "tn:64:128:256\n" 761 "tn:64:128:256\n"
762 ; 762 ;
763 763
764 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) 764 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
765 { 765 {
766 char temp[GET_MODE_BUFFER_SIZE]; 766 char temp[GET_MODE_BUFFER_SIZE];
767 char *p= temp; 767 char *p= temp;
768 char *filterDelimiters= ",/"; 768 char *filterDelimiters= ",/";
769 char *optionDelimiters= ":"; 769 char *optionDelimiters= ":";
770 struct PPMode *ppMode; 770 struct PPMode *ppMode;
771 char *filterToken; 771 char *filterToken;
772 772
773 ppMode= memalign(8, sizeof(PPMode)); 773 ppMode= memalign(8, sizeof(PPMode));
774 774
775 ppMode->lumMode= 0; 775 ppMode->lumMode= 0;
776 ppMode->chromMode= 0; 776 ppMode->chromMode= 0;
777 ppMode->maxTmpNoise[0]= 700; 777 ppMode->maxTmpNoise[0]= 700;
778 ppMode->maxTmpNoise[1]= 1500; 778 ppMode->maxTmpNoise[1]= 1500;
779 ppMode->maxTmpNoise[2]= 3000; 779 ppMode->maxTmpNoise[2]= 3000;
780 ppMode->maxAllowedY= 234; 780 ppMode->maxAllowedY= 234;
781 ppMode->minAllowedY= 16; 781 ppMode->minAllowedY= 16;
782 ppMode->baseDcDiff= 256/8; 782 ppMode->baseDcDiff= 256/8;
783 ppMode->flatnessThreshold= 56-16-1; 783 ppMode->flatnessThreshold= 56-16-1;
784 ppMode->maxClippedThreshold= 0.01; 784 ppMode->maxClippedThreshold= 0.01;
785 ppMode->error=0; 785 ppMode->error=0;
786 786
787 strncpy(temp, name, GET_MODE_BUFFER_SIZE); 787 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
788 788
789 if(verbose>1) printf("pp: %s\n", name); 789 if(verbose>1) printf("pp: %s\n", name);
790 790
791 for(;;){ 791 for(;;){
792 char *filterName; 792 char *filterName;
793 int q= 1000000; //PP_QUALITY_MAX; 793 int q= 1000000; //PP_QUALITY_MAX;
794 int chrom=-1; 794 int chrom=-1;
795 int luma=-1; 795 int luma=-1;
796 char *option; 796 char *option;
797 char *options[OPTIONS_ARRAY_SIZE]; 797 char *options[OPTIONS_ARRAY_SIZE];
798 int i; 798 int i;
799 int filterNameOk=0; 799 int filterNameOk=0;
800 int numOfUnknownOptions=0; 800 int numOfUnknownOptions=0;
801 int enable=1; //does the user want us to enabled or disabled the filter 801 int enable=1; //does the user want us to enabled or disabled the filter
802 802
803 filterToken= strtok(p, filterDelimiters); 803 filterToken= strtok(p, filterDelimiters);
804 if(filterToken == NULL) break; 804 if(filterToken == NULL) break;
805 p+= strlen(filterToken) + 1; // p points to next filterToken 805 p+= strlen(filterToken) + 1; // p points to next filterToken
806 filterName= strtok(filterToken, optionDelimiters); 806 filterName= strtok(filterToken, optionDelimiters);
807 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName); 807 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
808 808
809 if(*filterName == '-') 809 if(*filterName == '-')
810 { 810 {
811 enable=0; 811 enable=0;
812 filterName++; 812 filterName++;
813 } 813 }
814 814
815 for(;;){ //for all options 815 for(;;){ //for all options
816 option= strtok(NULL, optionDelimiters); 816 option= strtok(NULL, optionDelimiters);
817 if(option == NULL) break; 817 if(option == NULL) break;
818 818
819 if(verbose>1) printf("pp: option: %s\n", option); 819 if(verbose>1) printf("pp: option: %s\n", option);
820 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; 820 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
821 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; 821 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
822 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; 822 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
823 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; 823 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
824 else 824 else
825 { 825 {
826 options[numOfUnknownOptions] = option; 826 options[numOfUnknownOptions] = option;
827 numOfUnknownOptions++; 827 numOfUnknownOptions++;
828 } 828 }
829 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; 829 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
830 } 830 }
831 options[numOfUnknownOptions] = NULL; 831 options[numOfUnknownOptions] = NULL;
832 832
833 /* replace stuff from the replace Table */ 833 /* replace stuff from the replace Table */
834 for(i=0; replaceTable[2*i]!=NULL; i++) 834 for(i=0; replaceTable[2*i]!=NULL; i++)
835 { 835 {
836 if(!strcmp(replaceTable[2*i], filterName)) 836 if(!strcmp(replaceTable[2*i], filterName))
837 { 837 {
838 int newlen= strlen(replaceTable[2*i + 1]); 838 int newlen= strlen(replaceTable[2*i + 1]);
839 int plen; 839 int plen;
840 int spaceLeft; 840 int spaceLeft;
841 841
842 if(p==NULL) p= temp, *p=0; //last filter 842 if(p==NULL) p= temp, *p=0; //last filter
843 else p--, *p=','; //not last filter 843 else p--, *p=','; //not last filter
844 844
845 plen= strlen(p); 845 plen= strlen(p);
846 spaceLeft= p - temp + plen; 846 spaceLeft= p - temp + plen;
847 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE) 847 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
848 { 848 {
849 ppMode->error++; 849 ppMode->error++;
850 break; 850 break;
851 } 851 }
852 memmove(p + newlen, p, plen+1); 852 memmove(p + newlen, p, plen+1);
853 memcpy(p, replaceTable[2*i + 1], newlen); 853 memcpy(p, replaceTable[2*i + 1], newlen);
854 filterNameOk=1; 854 filterNameOk=1;
855 } 855 }
856 } 856 }
857 857
858 for(i=0; filters[i].shortName!=NULL; i++) 858 for(i=0; filters[i].shortName!=NULL; i++)
859 { 859 {
860 // printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName); 860 // printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
861 if( !strcmp(filters[i].longName, filterName) 861 if( !strcmp(filters[i].longName, filterName)
862 || !strcmp(filters[i].shortName, filterName)) 862 || !strcmp(filters[i].shortName, filterName))
863 { 863 {
864 ppMode->lumMode &= ~filters[i].mask; 864 ppMode->lumMode &= ~filters[i].mask;
865 ppMode->chromMode &= ~filters[i].mask; 865 ppMode->chromMode &= ~filters[i].mask;
866 866
867 filterNameOk=1; 867 filterNameOk=1;
868 if(!enable) break; // user wants to disable it 868 if(!enable) break; // user wants to disable it
869 869
870 if(q >= filters[i].minLumQuality && luma) 870 if(q >= filters[i].minLumQuality && luma)
871 ppMode->lumMode|= filters[i].mask; 871 ppMode->lumMode|= filters[i].mask;
872 if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) 872 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
873 if(q >= filters[i].minChromQuality) 873 if(q >= filters[i].minChromQuality)
874 ppMode->chromMode|= filters[i].mask; 874 ppMode->chromMode|= filters[i].mask;
875 875
876 if(filters[i].mask == LEVEL_FIX) 876 if(filters[i].mask == LEVEL_FIX)
877 { 877 {
878 int o; 878 int o;
879 ppMode->minAllowedY= 16; 879 ppMode->minAllowedY= 16;
880 ppMode->maxAllowedY= 234; 880 ppMode->maxAllowedY= 234;
881 for(o=0; options[o]!=NULL; o++) 881 for(o=0; options[o]!=NULL; o++)
882 { 882 {
883 if( !strcmp(options[o],"fullyrange") 883 if( !strcmp(options[o],"fullyrange")
884 ||!strcmp(options[o],"f")) 884 ||!strcmp(options[o],"f"))
885 { 885 {
886 ppMode->minAllowedY= 0; 886 ppMode->minAllowedY= 0;
887 ppMode->maxAllowedY= 255; 887 ppMode->maxAllowedY= 255;
888 numOfUnknownOptions--; 888 numOfUnknownOptions--;
889 } 889 }
890 } 890 }
891 } 891 }
892 else if(filters[i].mask == TEMP_NOISE_FILTER) 892 else if(filters[i].mask == TEMP_NOISE_FILTER)
893 { 893 {
894 int o; 894 int o;
895 int numOfNoises=0; 895 int numOfNoises=0;
896 896
897 for(o=0; options[o]!=NULL; o++) 897 for(o=0; options[o]!=NULL; o++)
898 { 898 {
899 char *tail; 899 char *tail;
900 ppMode->maxTmpNoise[numOfNoises]= 900 ppMode->maxTmpNoise[numOfNoises]=
901 strtol(options[o], &tail, 0); 901 strtol(options[o], &tail, 0);
902 if(tail!=options[o]) 902 if(tail!=options[o])
903 { 903 {
904 numOfNoises++; 904 numOfNoises++;
905 numOfUnknownOptions--; 905 numOfUnknownOptions--;
906 if(numOfNoises >= 3) break; 906 if(numOfNoises >= 3) break;
907 } 907 }
908 } 908 }
909 } 909 }
910 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK 910 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
911 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK) 911 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
912 { 912 {
913 int o; 913 int o;
914 914
915 for(o=0; options[o]!=NULL && o<2; o++) 915 for(o=0; options[o]!=NULL && o<2; o++)
916 { 916 {
917 char *tail; 917 char *tail;
918 int val= strtol(options[o], &tail, 0); 918 int val= strtol(options[o], &tail, 0);
919 if(tail==options[o]) break; 919 if(tail==options[o]) break;
920 920
921 numOfUnknownOptions--; 921 numOfUnknownOptions--;
922 if(o==0) ppMode->baseDcDiff= val; 922 if(o==0) ppMode->baseDcDiff= val;
923 else ppMode->flatnessThreshold= val; 923 else ppMode->flatnessThreshold= val;
924 } 924 }
925 } 925 }
926 else if(filters[i].mask == FORCE_QUANT) 926 else if(filters[i].mask == FORCE_QUANT)
927 { 927 {
928 int o; 928 int o;
929 ppMode->forcedQuant= 15; 929 ppMode->forcedQuant= 15;
930 930
931 for(o=0; options[o]!=NULL && o<1; o++) 931 for(o=0; options[o]!=NULL && o<1; o++)
932 { 932 {
933 char *tail; 933 char *tail;
934 int val= strtol(options[o], &tail, 0); 934 int val= strtol(options[o], &tail, 0);
935 if(tail==options[o]) break; 935 if(tail==options[o]) break;
936 936
937 numOfUnknownOptions--; 937 numOfUnknownOptions--;
938 ppMode->forcedQuant= val; 938 ppMode->forcedQuant= val;
939 } 939 }
940 } 940 }
941 } 941 }
942 } 942 }
943 if(!filterNameOk) ppMode->error++; 943 if(!filterNameOk) ppMode->error++;
944 ppMode->error += numOfUnknownOptions; 944 ppMode->error += numOfUnknownOptions;
945 } 945 }
946 946
947 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode); 947 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
948 if(ppMode->error) 948 if(ppMode->error)
949 { 949 {
950 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name); 950 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
951 free(ppMode); 951 free(ppMode);
952 return NULL; 952 return NULL;
953 } 953 }
954 return ppMode; 954 return ppMode;
955 } 955 }
956 956
957 void pp_free_mode(pp_mode_t *mode){ 957 void pp_free_mode(pp_mode_t *mode){
958 if(mode) free(mode); 958 if(mode) free(mode);
959 } 959 }
960 960
961 static void reallocAlign(void **p, int alignment, int size){ 961 static void reallocAlign(void **p, int alignment, int size){
962 if(*p) free(*p); 962 if(*p) free(*p);
963 *p= memalign(alignment, size); 963 *p= memalign(alignment, size);
964 memset(*p, 0, size); 964 memset(*p, 0, size);
965 } 965 }
966 966
967 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){ 967 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
968 int mbWidth = (width+15)>>4; 968 int mbWidth = (width+15)>>4;
969 int mbHeight= (height+15)>>4; 969 int mbHeight= (height+15)>>4;
970 int i; 970 int i;
971 971
972 c->stride= stride; 972 c->stride= stride;
973 c->qpStride= qpStride; 973 c->qpStride= qpStride;
974 974
975 reallocAlign((void **)&c->tempDst, 8, stride*24); 975 reallocAlign((void **)&c->tempDst, 8, stride*24);
976 reallocAlign((void **)&c->tempSrc, 8, stride*24); 976 reallocAlign((void **)&c->tempSrc, 8, stride*24);
977 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); 977 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
978 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); 978 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
979 for(i=0; i<256; i++) 979 for(i=0; i<256; i++)
980 c->yHistogram[i]= width*height/64*15/256; 980 c->yHistogram[i]= width*height/64*15/256;
981 981
982 for(i=0; i<3; i++) 982 for(i=0; i<3; i++)
983 { 983 {
984 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end 984 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
985 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024); 985 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
986 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size 986 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
987 } 987 }
988 988
989 reallocAlign((void **)&c->deintTemp, 8, 2*width+32); 989 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
990 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); 990 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
991 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); 991 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
992 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T)); 992 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
993 } 993 }
994 994
995 static void global_init(void){ 995 static void global_init(void){
996 int i; 996 int i;
997 memset(clip_table, 0, 256); 997 memset(clip_table, 0, 256);
998 for(i=256; i<512; i++) 998 for(i=256; i<512; i++)
999 clip_table[i]= i; 999 clip_table[i]= i;
1000 memset(clip_table+512, 0, 256); 1000 memset(clip_table+512, 0, 256);
1001 } 1001 }
1002 1002
1003 pp_context_t *pp_get_context(int width, int height, int cpuCaps){ 1003 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
1004 PPContext *c= memalign(32, sizeof(PPContext)); 1004 PPContext *c= memalign(32, sizeof(PPContext));
1005 int stride= (width+15)&(~15); //assumed / will realloc if needed 1005 int stride= (width+15)&(~15); //assumed / will realloc if needed
1006 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed 1006 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
1007 1007
1008 global_init(); 1008 global_init();
1009 1009
1010 memset(c, 0, sizeof(PPContext)); 1010 memset(c, 0, sizeof(PPContext));
1011 c->cpuCaps= cpuCaps; 1011 c->cpuCaps= cpuCaps;
1012 if(cpuCaps&PP_FORMAT){ 1012 if(cpuCaps&PP_FORMAT){
1013 c->hChromaSubSample= cpuCaps&0x3; 1013 c->hChromaSubSample= cpuCaps&0x3;
1014 c->vChromaSubSample= (cpuCaps>>4)&0x3; 1014 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1015 }else{ 1015 }else{
1016 c->hChromaSubSample= 1; 1016 c->hChromaSubSample= 1;
1017 c->vChromaSubSample= 1; 1017 c->vChromaSubSample= 1;
1018 } 1018 }
1019 1019
1020 reallocBuffers(c, width, height, stride, qpStride); 1020 reallocBuffers(c, width, height, stride, qpStride);
1021 1021
1022 c->frameNum=-1; 1022 c->frameNum=-1;
1023 1023
1024 return c; 1024 return c;
1025 } 1025 }
1026 1026
1027 void pp_free_context(void *vc){ 1027 void pp_free_context(void *vc){
1028 PPContext *c = (PPContext*)vc; 1028 PPContext *c = (PPContext*)vc;
1029 int i; 1029 int i;
1030 1030
1031 for(i=0; i<3; i++) free(c->tempBlured[i]); 1031 for(i=0; i<3; i++) free(c->tempBlured[i]);
1032 for(i=0; i<3; i++) free(c->tempBluredPast[i]); 1032 for(i=0; i<3; i++) free(c->tempBluredPast[i]);
1033 1033
1034 free(c->tempBlocks); 1034 free(c->tempBlocks);
1035 free(c->yHistogram); 1035 free(c->yHistogram);
1036 free(c->tempDst); 1036 free(c->tempDst);
1037 free(c->tempSrc); 1037 free(c->tempSrc);
1038 free(c->deintTemp); 1038 free(c->deintTemp);
1039 free(c->stdQPTable); 1039 free(c->stdQPTable);
1040 free(c->nonBQPTable); 1040 free(c->nonBQPTable);
1041 free(c->forcedQPTable); 1041 free(c->forcedQPTable);
1042 1042
1043 memset(c, 0, sizeof(PPContext)); 1043 memset(c, 0, sizeof(PPContext));
1044 1044
1045 free(c); 1045 free(c);
1046 } 1046 }
1047 1047
1048 void pp_postprocess(uint8_t * src[3], int srcStride[3], 1048 void pp_postprocess(uint8_t * src[3], int srcStride[3],
1049 uint8_t * dst[3], int dstStride[3], 1049 uint8_t * dst[3], int dstStride[3],
1050 int width, int height, 1050 int width, int height,
1051 QP_STORE_T *QP_store, int QPStride, 1051 QP_STORE_T *QP_store, int QPStride,
1052 pp_mode_t *vm, void *vc, int pict_type) 1052 pp_mode_t *vm, void *vc, int pict_type)
1053 { 1053 {
1054 int mbWidth = (width+15)>>4; 1054 int mbWidth = (width+15)>>4;
1055 int mbHeight= (height+15)>>4; 1055 int mbHeight= (height+15)>>4;
1056 PPMode *mode = (PPMode*)vm; 1056 PPMode *mode = (PPMode*)vm;
1057 PPContext *c = (PPContext*)vc; 1057 PPContext *c = (PPContext*)vc;
1058 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0])); 1058 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
1059 int absQPStride = ABS(QPStride); 1059 int absQPStride = ABS(QPStride);
1060 1060
1061 // c->stride and c->QPStride are always positive 1061 // c->stride and c->QPStride are always positive
1062 if(c->stride < minStride || c->qpStride < absQPStride) 1062 if(c->stride < minStride || c->qpStride < absQPStride)
1063 reallocBuffers(c, width, height, 1063 reallocBuffers(c, width, height,
1064 MAX(minStride, c->stride), 1064 MAX(minStride, c->stride),
1065 MAX(c->qpStride, absQPStride)); 1065 MAX(c->qpStride, absQPStride));
1066 1066
1067 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 1067 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1068 { 1068 {
1069 int i; 1069 int i;
1070 QP_store= c->forcedQPTable; 1070 QP_store= c->forcedQPTable;
1071 absQPStride = QPStride = 0; 1071 absQPStride = QPStride = 0;
1072 if(mode->lumMode & FORCE_QUANT) 1072 if(mode->lumMode & FORCE_QUANT)
1073 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant; 1073 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1074 else 1074 else
1075 for(i=0; i<mbWidth; i++) QP_store[i]= 1; 1075 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1076 } 1076 }
1077 //printf("pict_type:%d\n", pict_type); 1077 //printf("pict_type:%d\n", pict_type);
1078 1078
1079 if(pict_type & PP_PICT_TYPE_QP2){ 1079 if(pict_type & PP_PICT_TYPE_QP2){
1080 int i; 1080 int i;
1081 const int count= mbHeight * absQPStride; 1081 const int count= mbHeight * absQPStride;
1082 for(i=0; i<(count>>2); i++){ 1082 for(i=0; i<(count>>2); i++){
1083 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; 1083 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1084 } 1084 }
1085 for(i<<=2; i<count; i++){ 1085 for(i<<=2; i<count; i++){
1086 c->stdQPTable[i] = QP_store[i]>>1; 1086 c->stdQPTable[i] = QP_store[i]>>1;
1087 } 1087 }
1088 QP_store= c->stdQPTable; 1088 QP_store= c->stdQPTable;
1089 QPStride= absQPStride; 1089 QPStride= absQPStride;
1090 } 1090 }
1091 1091
1092 if(0){ 1092 if(0){
1093 int x,y; 1093 int x,y;
1094 for(y=0; y<mbHeight; y++){ 1094 for(y=0; y<mbHeight; y++){
1095 for(x=0; x<mbWidth; x++){ 1095 for(x=0; x<mbWidth; x++){
1096 printf("%2d ", QP_store[x + y*QPStride]); 1096 printf("%2d ", QP_store[x + y*QPStride]);
1097 } 1097 }
1098 printf("\n"); 1098 printf("\n");
1099 } 1099 }
1100 printf("\n"); 1100 printf("\n");
1101 } 1101 }
1102 1102
1103 if((pict_type&7)!=3) 1103 if((pict_type&7)!=3)
1104 { 1104 {
1105 if (QPStride >= 0) { 1105 if (QPStride >= 0) {
1106 int i; 1106 int i;
1107 const int count= mbHeight * QPStride; 1107 const int count= mbHeight * QPStride;
1108 for(i=0; i<(count>>2); i++){ 1108 for(i=0; i<(count>>2); i++){
1109 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; 1109 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1110 } 1110 }
1111 for(i<<=2; i<count; i++){ 1111 for(i<<=2; i<count; i++){
1112 c->nonBQPTable[i] = QP_store[i] & 0x3F; 1112 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1113 } 1113 }
1114 } else { 1114 } else {
1115 int i,j; 1115 int i,j;
1116 for(i=0; i<mbHeight; i++) { 1116 for(i=0; i<mbHeight; i++) {
1117 for(j=0; j<absQPStride; j++) { 1117 for(j=0; j<absQPStride; j++) {
1118 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F; 1118 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1119 } 1119 }
1120 } 1120 }
1121 } 1121 }
1122 } 1122 }
1123 1123
1124 if(verbose>2) 1124 if(verbose>2)
1125 { 1125 {
1126 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode); 1126 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
1127 } 1127 }
1128 1128
1129 postProcess(src[0], srcStride[0], dst[0], dstStride[0], 1129 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1130 width, height, QP_store, QPStride, 0, mode, c); 1130 width, height, QP_store, QPStride, 0, mode, c);
1131 1131
1132 width = (width )>>c->hChromaSubSample; 1132 width = (width )>>c->hChromaSubSample;
1133 height = (height)>>c->vChromaSubSample; 1133 height = (height)>>c->vChromaSubSample;
1134 1134
1135 if(mode->chromMode) 1135 if(mode->chromMode)
1136 { 1136 {
1137 postProcess(src[1], srcStride[1], dst[1], dstStride[1], 1137 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1138 width, height, QP_store, QPStride, 1, mode, c); 1138 width, height, QP_store, QPStride, 1, mode, c);
1139 postProcess(src[2], srcStride[2], dst[2], dstStride[2], 1139 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1140 width, height, QP_store, QPStride, 2, mode, c); 1140 width, height, QP_store, QPStride, 2, mode, c);
1141 } 1141 }
1142 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) 1142 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1143 { 1143 {
1144 linecpy(dst[1], src[1], height, srcStride[1]); 1144 linecpy(dst[1], src[1], height, srcStride[1]);
1145 linecpy(dst[2], src[2], height, srcStride[2]); 1145 linecpy(dst[2], src[2], height, srcStride[2]);
1146 } 1146 }
1147 else 1147 else
1148 { 1148 {
1149 int y; 1149 int y;
1150 for(y=0; y<height; y++) 1150 for(y=0; y<height; y++)
1151 { 1151 {
1152 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); 1152 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1153 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); 1153 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1154 } 1154 }
1155 } 1155 }
1156 } 1156 }
1157 1157