Mercurial > libavcodec.hg
comparison dsputil.c @ 1708:dea5b2946999 libavcodec
interlaced motion estimation
interlaced mpeg2 encoding
P & B frames
rate distored interlaced mb decission
alternate scantable support
4mv encoding fixes (thats also why the regression tests change)
passing height to most dsp functions
interlaced mpeg4 encoding (no direct mode MBs yet)
various related cleanups
disabled old motion estimaton algorithms (log, full, ...) they will either be fixed or removed
author | michael |
---|---|
date | Tue, 30 Dec 2003 16:07:57 +0000 |
parents | 1a2db2073848 |
children | a4a5e7521339 |
comparison
equal
deleted
inserted
replaced
1707:027545a2fdbe | 1708:dea5b2946999 |
---|---|
216 for(;i<w; i++){ | 216 for(;i<w; i++){ |
217 dst[i+0]= bswap_32(src[i+0]); | 217 dst[i+0]= bswap_32(src[i+0]); |
218 } | 218 } |
219 } | 219 } |
220 | 220 |
221 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) | 221 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
222 { | 222 { |
223 int s, i; | 223 int s, i; |
224 uint32_t *sq = squareTbl + 256; | 224 uint32_t *sq = squareTbl + 256; |
225 | 225 |
226 s = 0; | 226 s = 0; |
227 for (i = 0; i < 8; i++) { | 227 for (i = 0; i < h; i++) { |
228 s += sq[pix1[0] - pix2[0]]; | 228 s += sq[pix1[0] - pix2[0]]; |
229 s += sq[pix1[1] - pix2[1]]; | 229 s += sq[pix1[1] - pix2[1]]; |
230 s += sq[pix1[2] - pix2[2]]; | 230 s += sq[pix1[2] - pix2[2]]; |
231 s += sq[pix1[3] - pix2[3]]; | 231 s += sq[pix1[3] - pix2[3]]; |
232 s += sq[pix1[4] - pix2[4]]; | 232 s += sq[pix1[4] - pix2[4]]; |
237 pix2 += line_size; | 237 pix2 += line_size; |
238 } | 238 } |
239 return s; | 239 return s; |
240 } | 240 } |
241 | 241 |
242 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size) | 242 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
243 { | 243 { |
244 int s, i; | 244 int s, i; |
245 uint32_t *sq = squareTbl + 256; | 245 uint32_t *sq = squareTbl + 256; |
246 | 246 |
247 s = 0; | 247 s = 0; |
248 for (i = 0; i < 16; i++) { | 248 for (i = 0; i < h; i++) { |
249 s += sq[pix1[ 0] - pix2[ 0]]; | 249 s += sq[pix1[ 0] - pix2[ 0]]; |
250 s += sq[pix1[ 1] - pix2[ 1]]; | 250 s += sq[pix1[ 1] - pix2[ 1]]; |
251 s += sq[pix1[ 2] - pix2[ 2]]; | 251 s += sq[pix1[ 2] - pix2[ 2]]; |
252 s += sq[pix1[ 3] - pix2[ 3]]; | 252 s += sq[pix1[ 3] - pix2[ 3]]; |
253 s += sq[pix1[ 4] - pix2[ 4]]; | 253 s += sq[pix1[ 4] - pix2[ 4]]; |
2329 src[y*stride-2] = p0 - d2; | 2329 src[y*stride-2] = p0 - d2; |
2330 src[y*stride+1] = p3 + d2; | 2330 src[y*stride+1] = p3 + d2; |
2331 } | 2331 } |
2332 } | 2332 } |
2333 | 2333 |
2334 static inline int pix_abs16x16_c(uint8_t *pix1, uint8_t *pix2, int line_size) | 2334 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
2335 { | 2335 { |
2336 int s, i; | 2336 int s, i; |
2337 | 2337 |
2338 s = 0; | 2338 s = 0; |
2339 for(i=0;i<16;i++) { | 2339 for(i=0;i<h;i++) { |
2340 s += abs(pix1[0] - pix2[0]); | 2340 s += abs(pix1[0] - pix2[0]); |
2341 s += abs(pix1[1] - pix2[1]); | 2341 s += abs(pix1[1] - pix2[1]); |
2342 s += abs(pix1[2] - pix2[2]); | 2342 s += abs(pix1[2] - pix2[2]); |
2343 s += abs(pix1[3] - pix2[3]); | 2343 s += abs(pix1[3] - pix2[3]); |
2344 s += abs(pix1[4] - pix2[4]); | 2344 s += abs(pix1[4] - pix2[4]); |
2357 pix2 += line_size; | 2357 pix2 += line_size; |
2358 } | 2358 } |
2359 return s; | 2359 return s; |
2360 } | 2360 } |
2361 | 2361 |
2362 static int pix_abs16x16_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size) | 2362 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
2363 { | 2363 { |
2364 int s, i; | 2364 int s, i; |
2365 | 2365 |
2366 s = 0; | 2366 s = 0; |
2367 for(i=0;i<16;i++) { | 2367 for(i=0;i<h;i++) { |
2368 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); | 2368 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
2369 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | 2369 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); |
2370 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | 2370 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); |
2371 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | 2371 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); |
2372 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | 2372 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); |
2385 pix2 += line_size; | 2385 pix2 += line_size; |
2386 } | 2386 } |
2387 return s; | 2387 return s; |
2388 } | 2388 } |
2389 | 2389 |
2390 static int pix_abs16x16_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size) | 2390 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
2391 { | 2391 { |
2392 int s, i; | 2392 int s, i; |
2393 uint8_t *pix3 = pix2 + line_size; | 2393 uint8_t *pix3 = pix2 + line_size; |
2394 | 2394 |
2395 s = 0; | 2395 s = 0; |
2396 for(i=0;i<16;i++) { | 2396 for(i=0;i<h;i++) { |
2397 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); | 2397 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
2398 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | 2398 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); |
2399 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | 2399 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); |
2400 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | 2400 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); |
2401 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | 2401 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); |
2415 pix3 += line_size; | 2415 pix3 += line_size; |
2416 } | 2416 } |
2417 return s; | 2417 return s; |
2418 } | 2418 } |
2419 | 2419 |
2420 static int pix_abs16x16_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size) | 2420 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
2421 { | 2421 { |
2422 int s, i; | 2422 int s, i; |
2423 uint8_t *pix3 = pix2 + line_size; | 2423 uint8_t *pix3 = pix2 + line_size; |
2424 | 2424 |
2425 s = 0; | 2425 s = 0; |
2426 for(i=0;i<16;i++) { | 2426 for(i=0;i<h;i++) { |
2427 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); | 2427 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
2428 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | 2428 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); |
2429 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | 2429 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); |
2430 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | 2430 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); |
2431 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | 2431 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); |
2445 pix3 += line_size; | 2445 pix3 += line_size; |
2446 } | 2446 } |
2447 return s; | 2447 return s; |
2448 } | 2448 } |
2449 | 2449 |
2450 static inline int pix_abs8x8_c(uint8_t *pix1, uint8_t *pix2, int line_size) | 2450 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
2451 { | 2451 { |
2452 int s, i; | 2452 int s, i; |
2453 | 2453 |
2454 s = 0; | 2454 s = 0; |
2455 for(i=0;i<8;i++) { | 2455 for(i=0;i<h;i++) { |
2456 s += abs(pix1[0] - pix2[0]); | 2456 s += abs(pix1[0] - pix2[0]); |
2457 s += abs(pix1[1] - pix2[1]); | 2457 s += abs(pix1[1] - pix2[1]); |
2458 s += abs(pix1[2] - pix2[2]); | 2458 s += abs(pix1[2] - pix2[2]); |
2459 s += abs(pix1[3] - pix2[3]); | 2459 s += abs(pix1[3] - pix2[3]); |
2460 s += abs(pix1[4] - pix2[4]); | 2460 s += abs(pix1[4] - pix2[4]); |
2465 pix2 += line_size; | 2465 pix2 += line_size; |
2466 } | 2466 } |
2467 return s; | 2467 return s; |
2468 } | 2468 } |
2469 | 2469 |
2470 static int pix_abs8x8_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size) | 2470 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
2471 { | 2471 { |
2472 int s, i; | 2472 int s, i; |
2473 | 2473 |
2474 s = 0; | 2474 s = 0; |
2475 for(i=0;i<8;i++) { | 2475 for(i=0;i<h;i++) { |
2476 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); | 2476 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
2477 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | 2477 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); |
2478 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | 2478 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); |
2479 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | 2479 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); |
2480 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | 2480 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); |
2485 pix2 += line_size; | 2485 pix2 += line_size; |
2486 } | 2486 } |
2487 return s; | 2487 return s; |
2488 } | 2488 } |
2489 | 2489 |
2490 static int pix_abs8x8_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size) | 2490 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
2491 { | 2491 { |
2492 int s, i; | 2492 int s, i; |
2493 uint8_t *pix3 = pix2 + line_size; | 2493 uint8_t *pix3 = pix2 + line_size; |
2494 | 2494 |
2495 s = 0; | 2495 s = 0; |
2496 for(i=0;i<8;i++) { | 2496 for(i=0;i<h;i++) { |
2497 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); | 2497 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
2498 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | 2498 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); |
2499 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | 2499 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); |
2500 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | 2500 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); |
2501 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | 2501 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); |
2507 pix3 += line_size; | 2507 pix3 += line_size; |
2508 } | 2508 } |
2509 return s; | 2509 return s; |
2510 } | 2510 } |
2511 | 2511 |
2512 static int pix_abs8x8_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size) | 2512 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
2513 { | 2513 { |
2514 int s, i; | 2514 int s, i; |
2515 uint8_t *pix3 = pix2 + line_size; | 2515 uint8_t *pix3 = pix2 + line_size; |
2516 | 2516 |
2517 s = 0; | 2517 s = 0; |
2518 for(i=0;i<8;i++) { | 2518 for(i=0;i<h;i++) { |
2519 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); | 2519 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
2520 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | 2520 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); |
2521 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | 2521 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); |
2522 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | 2522 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); |
2523 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | 2523 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); |
2527 pix1 += line_size; | 2527 pix1 += line_size; |
2528 pix2 += line_size; | 2528 pix2 += line_size; |
2529 pix3 += line_size; | 2529 pix3 += line_size; |
2530 } | 2530 } |
2531 return s; | 2531 return s; |
2532 } | |
2533 | |
2534 static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){ | |
2535 return pix_abs16x16_c(a,b,stride); | |
2536 } | |
2537 | |
2538 static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){ | |
2539 return pix_abs8x8_c(a,b,stride); | |
2540 } | 2532 } |
2541 | 2533 |
2542 /** | 2534 /** |
2543 * permutes an 8x8 block. | 2535 * permutes an 8x8 block. |
2544 * @param block the block which will be permuted according to the given permutation vector | 2536 * @param block the block which will be permuted according to the given permutation vector |
2639 y= a-b;\ | 2631 y= a-b;\ |
2640 } | 2632 } |
2641 | 2633 |
2642 #define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y))) | 2634 #define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y))) |
2643 | 2635 |
2644 static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){ | 2636 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
2645 int i; | 2637 int i; |
2646 int temp[64]; | 2638 int temp[64]; |
2647 int sum=0; | 2639 int sum=0; |
2640 | |
2641 assert(h==8); | |
2648 | 2642 |
2649 for(i=0; i<8; i++){ | 2643 for(i=0; i<8; i++){ |
2650 //FIXME try pointer walks | 2644 //FIXME try pointer walks |
2651 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); | 2645 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); |
2652 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); | 2646 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); |
2733 } | 2727 } |
2734 | 2728 |
2735 return sum; | 2729 return sum; |
2736 } | 2730 } |
2737 | 2731 |
2738 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ | 2732 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
2739 MpegEncContext * const s= (MpegEncContext *)c; | 2733 MpegEncContext * const s= (MpegEncContext *)c; |
2740 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; | 2734 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; |
2741 DCTELEM * const temp= (DCTELEM*)aligned_temp; | 2735 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
2742 int sum=0, i; | 2736 int sum=0, i; |
2737 | |
2738 assert(h==8); | |
2743 | 2739 |
2744 s->dsp.diff_pixels(temp, src1, src2, stride); | 2740 s->dsp.diff_pixels(temp, src1, src2, stride); |
2745 s->dsp.fdct(temp); | 2741 s->dsp.fdct(temp); |
2746 | 2742 |
2747 for(i=0; i<64; i++) | 2743 for(i=0; i<64; i++) |
2750 return sum; | 2746 return sum; |
2751 } | 2747 } |
2752 | 2748 |
2753 void simple_idct(DCTELEM *block); //FIXME | 2749 void simple_idct(DCTELEM *block); //FIXME |
2754 | 2750 |
2755 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ | 2751 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
2756 MpegEncContext * const s= (MpegEncContext *)c; | 2752 MpegEncContext * const s= (MpegEncContext *)c; |
2757 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8]; | 2753 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8]; |
2758 DCTELEM * const temp= (DCTELEM*)aligned_temp; | 2754 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
2759 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; | 2755 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; |
2760 int sum=0, i; | 2756 int sum=0, i; |
2761 | 2757 |
2758 assert(h==8); | |
2762 s->mb_intra=0; | 2759 s->mb_intra=0; |
2763 | 2760 |
2764 s->dsp.diff_pixels(temp, src1, src2, stride); | 2761 s->dsp.diff_pixels(temp, src1, src2, stride); |
2765 | 2762 |
2766 memcpy(bak, temp, 64*sizeof(DCTELEM)); | 2763 memcpy(bak, temp, 64*sizeof(DCTELEM)); |
2773 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); | 2770 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); |
2774 | 2771 |
2775 return sum; | 2772 return sum; |
2776 } | 2773 } |
2777 | 2774 |
2778 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ | 2775 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
2779 MpegEncContext * const s= (MpegEncContext *)c; | 2776 MpegEncContext * const s= (MpegEncContext *)c; |
2780 const uint8_t *scantable= s->intra_scantable.permutated; | 2777 const uint8_t *scantable= s->intra_scantable.permutated; |
2781 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; | 2778 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; |
2782 uint64_t __align8 aligned_bak[stride]; | 2779 uint64_t __align8 aligned_bak[stride]; |
2783 DCTELEM * const temp= (DCTELEM*)aligned_temp; | 2780 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
2785 int i, last, run, bits, level, distoration, start_i; | 2782 int i, last, run, bits, level, distoration, start_i; |
2786 const int esc_length= s->ac_esc_length; | 2783 const int esc_length= s->ac_esc_length; |
2787 uint8_t * length; | 2784 uint8_t * length; |
2788 uint8_t * last_length; | 2785 uint8_t * last_length; |
2789 | 2786 |
2787 assert(h==8); | |
2788 | |
2790 for(i=0; i<8; i++){ | 2789 for(i=0; i<8; i++){ |
2791 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; | 2790 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; |
2792 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; | 2791 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; |
2793 } | 2792 } |
2794 | 2793 |
2845 s->dct_unquantize_inter(s, temp, 0, s->qscale); | 2844 s->dct_unquantize_inter(s, temp, 0, s->qscale); |
2846 } | 2845 } |
2847 | 2846 |
2848 s->dsp.idct_add(bak, stride, temp); | 2847 s->dsp.idct_add(bak, stride, temp); |
2849 | 2848 |
2850 distoration= s->dsp.sse[1](NULL, bak, src1, stride); | 2849 distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8); |
2851 | 2850 |
2852 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7); | 2851 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7); |
2853 } | 2852 } |
2854 | 2853 |
2855 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ | 2854 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
2856 MpegEncContext * const s= (MpegEncContext *)c; | 2855 MpegEncContext * const s= (MpegEncContext *)c; |
2857 const uint8_t *scantable= s->intra_scantable.permutated; | 2856 const uint8_t *scantable= s->intra_scantable.permutated; |
2858 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; | 2857 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; |
2859 DCTELEM * const temp= (DCTELEM*)aligned_temp; | 2858 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
2860 int i, last, run, bits, level, start_i; | 2859 int i, last, run, bits, level, start_i; |
2861 const int esc_length= s->ac_esc_length; | 2860 const int esc_length= s->ac_esc_length; |
2862 uint8_t * length; | 2861 uint8_t * length; |
2863 uint8_t * last_length; | 2862 uint8_t * last_length; |
2863 | |
2864 assert(h==8); | |
2864 | 2865 |
2865 s->dsp.diff_pixels(temp, src1, src2, stride); | 2866 s->dsp.diff_pixels(temp, src1, src2, stride); |
2866 | 2867 |
2867 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); | 2868 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
2868 | 2869 |
2908 } | 2909 } |
2909 | 2910 |
2910 return bits; | 2911 return bits; |
2911 } | 2912 } |
2912 | 2913 |
2913 | 2914 WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) |
2914 WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c) | 2915 WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) |
2915 WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c) | 2916 WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) |
2916 WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c) | 2917 WARPER8_16_SQ(rd8x8_c, rd16_c) |
2917 WARPER88_1616(rd8x8_c, rd16x16_c) | 2918 WARPER8_16_SQ(bit8x8_c, bit16_c) |
2918 WARPER88_1616(bit8x8_c, bit16x16_c) | |
2919 | 2919 |
2920 /* XXX: those functions should be suppressed ASAP when all IDCTs are | 2920 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
2921 converted */ | 2921 converted */ |
2922 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | 2922 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) |
2923 { | 2923 { |
2987 c->gmc1 = gmc1_c; | 2987 c->gmc1 = gmc1_c; |
2988 c->gmc = gmc_c; | 2988 c->gmc = gmc_c; |
2989 c->clear_blocks = clear_blocks_c; | 2989 c->clear_blocks = clear_blocks_c; |
2990 c->pix_sum = pix_sum_c; | 2990 c->pix_sum = pix_sum_c; |
2991 c->pix_norm1 = pix_norm1_c; | 2991 c->pix_norm1 = pix_norm1_c; |
2992 c->sse[0]= sse16_c; | |
2993 c->sse[1]= sse8_c; | |
2994 | 2992 |
2995 /* TODO [0] 16 [1] 8 */ | 2993 /* TODO [0] 16 [1] 8 */ |
2996 c->pix_abs16x16 = pix_abs16x16_c; | 2994 c->pix_abs[0][0] = pix_abs16_c; |
2997 c->pix_abs16x16_x2 = pix_abs16x16_x2_c; | 2995 c->pix_abs[0][1] = pix_abs16_x2_c; |
2998 c->pix_abs16x16_y2 = pix_abs16x16_y2_c; | 2996 c->pix_abs[0][2] = pix_abs16_y2_c; |
2999 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c; | 2997 c->pix_abs[0][3] = pix_abs16_xy2_c; |
3000 c->pix_abs8x8 = pix_abs8x8_c; | 2998 c->pix_abs[1][0] = pix_abs8_c; |
3001 c->pix_abs8x8_x2 = pix_abs8x8_x2_c; | 2999 c->pix_abs[1][1] = pix_abs8_x2_c; |
3002 c->pix_abs8x8_y2 = pix_abs8x8_y2_c; | 3000 c->pix_abs[1][2] = pix_abs8_y2_c; |
3003 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c; | 3001 c->pix_abs[1][3] = pix_abs8_xy2_c; |
3004 | 3002 |
3005 #define dspfunc(PFX, IDX, NUM) \ | 3003 #define dspfunc(PFX, IDX, NUM) \ |
3006 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ | 3004 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ |
3007 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ | 3005 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ |
3008 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ | 3006 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ |
3095 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; | 3093 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; |
3096 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; | 3094 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; |
3097 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; | 3095 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; |
3098 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; | 3096 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; |
3099 | 3097 |
3100 c->hadamard8_diff[0]= hadamard8_diff16_c; | |
3101 c->hadamard8_diff[1]= hadamard8_diff_c; | |
3102 c->hadamard8_abs = hadamard8_abs_c; | 3098 c->hadamard8_abs = hadamard8_abs_c; |
3103 | 3099 |
3104 c->dct_sad[0]= dct_sad16x16_c; | 3100 #define SET_CMP_FUNC(name) \ |
3105 c->dct_sad[1]= dct_sad8x8_c; | 3101 c->name[0]= name ## 16_c;\ |
3106 | 3102 c->name[1]= name ## 8x8_c; |
3107 c->sad[0]= sad16x16_c; | 3103 |
3108 c->sad[1]= sad8x8_c; | 3104 SET_CMP_FUNC(hadamard8_diff) |
3109 | 3105 SET_CMP_FUNC(dct_sad) |
3110 c->quant_psnr[0]= quant_psnr16x16_c; | 3106 c->sad[0]= pix_abs16_c; |
3111 c->quant_psnr[1]= quant_psnr8x8_c; | 3107 c->sad[1]= pix_abs8_c; |
3112 | 3108 c->sse[0]= sse16_c; |
3113 c->rd[0]= rd16x16_c; | 3109 c->sse[1]= sse8_c; |
3114 c->rd[1]= rd8x8_c; | 3110 SET_CMP_FUNC(quant_psnr) |
3115 | 3111 SET_CMP_FUNC(rd) |
3116 c->bit[0]= bit16x16_c; | 3112 SET_CMP_FUNC(bit) |
3117 c->bit[1]= bit8x8_c; | |
3118 | 3113 |
3119 c->add_bytes= add_bytes_c; | 3114 c->add_bytes= add_bytes_c; |
3120 c->diff_bytes= diff_bytes_c; | 3115 c->diff_bytes= diff_bytes_c; |
3121 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; | 3116 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; |
3122 c->bswap_buf= bswap_buf; | 3117 c->bswap_buf= bswap_buf; |