Mercurial > libavcodec.hg
comparison dsputil.c @ 255:db20b987c32d libavcodec
divx5-gmc support
q-pel mc support
neither is totally bugfree yet though :(
author | michaelni |
---|---|
date | Sat, 09 Mar 2002 13:01:16 +0000 |
parents | ddb1a0e94cf4 |
children | 4c1cec7c3c7c |
comparison
equal
deleted
inserted
replaced
254:b4fed8b24e3a | 255:db20b987c32d |
---|---|
25 | 25 |
26 void (*ff_idct)(DCTELEM *block); | 26 void (*ff_idct)(DCTELEM *block); |
27 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); | 27 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); |
28 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | 28 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); |
29 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | 29 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); |
30 void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); | |
30 | 31 |
31 op_pixels_abs_func pix_abs16x16; | 32 op_pixels_abs_func pix_abs16x16; |
32 op_pixels_abs_func pix_abs16x16_x2; | 33 op_pixels_abs_func pix_abs16x16_x2; |
33 op_pixels_abs_func pix_abs16x16_y2; | 34 op_pixels_abs_func pix_abs16x16_y2; |
34 op_pixels_abs_func pix_abs16x16_xy2; | 35 op_pixels_abs_func pix_abs16x16_xy2; |
342 #undef avg2 | 343 #undef avg2 |
343 #undef avg4 | 344 #undef avg4 |
344 #define avg2(a,b) ((a+b+1)>>1) | 345 #define avg2(a,b) ((a+b+1)>>1) |
345 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | 346 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) |
346 | 347 |
348 static void gmc1_c(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder) | |
349 { | |
350 const int A=(16-x16)*(16-y16); | |
351 const int B=( x16)*(16-y16); | |
352 const int C=(16-x16)*( y16); | |
353 const int D=( x16)*( y16); | |
354 int i; | |
355 rounder= 128 - rounder; | |
356 | |
357 for(i=0; i<h; i++) | |
358 { | |
359 dst[0]= (A*src[0] + B*src[1] + C*src[srcStride+0] + D*src[srcStride+1] + rounder)>>8; | |
360 dst[1]= (A*src[1] + B*src[2] + C*src[srcStride+1] + D*src[srcStride+2] + rounder)>>8; | |
361 dst[2]= (A*src[2] + B*src[3] + C*src[srcStride+2] + D*src[srcStride+3] + rounder)>>8; | |
362 dst[3]= (A*src[3] + B*src[4] + C*src[srcStride+3] + D*src[srcStride+4] + rounder)>>8; | |
363 dst[4]= (A*src[4] + B*src[5] + C*src[srcStride+4] + D*src[srcStride+5] + rounder)>>8; | |
364 dst[5]= (A*src[5] + B*src[6] + C*src[srcStride+5] + D*src[srcStride+6] + rounder)>>8; | |
365 dst[6]= (A*src[6] + B*src[7] + C*src[srcStride+6] + D*src[srcStride+7] + rounder)>>8; | |
366 dst[7]= (A*src[7] + B*src[8] + C*src[srcStride+7] + D*src[srcStride+8] + rounder)>>8; | |
367 dst+= srcStride; | |
368 src+= srcStride; | |
369 } | |
370 } | |
371 | |
372 static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h, int r) | |
373 { | |
374 UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
375 int i; | |
376 for(i=0; i<h; i++) | |
377 { | |
378 dst[0]= cm[(((src[0]+src[1])*160 - (src[0]+src[2])*48 + (src[1]+src[3])*24 - (src[2]+src[4])*8 + r)>>8)]; | |
379 dst[1]= cm[(((src[1]+src[2])*160 - (src[0]+src[3])*48 + (src[0]+src[4])*24 - (src[1]+src[5])*8 + r)>>8)]; | |
380 dst[2]= cm[(((src[2]+src[3])*160 - (src[1]+src[4])*48 + (src[0]+src[5])*24 - (src[0]+src[6])*8 + r)>>8)]; | |
381 dst[3]= cm[(((src[3]+src[4])*160 - (src[2]+src[5])*48 + (src[1]+src[6])*24 - (src[0]+src[7])*8 + r)>>8)]; | |
382 dst[4]= cm[(((src[4]+src[5])*160 - (src[3]+src[6])*48 + (src[2]+src[7])*24 - (src[1]+src[8])*8 + r)>>8)]; | |
383 dst[5]= cm[(((src[5]+src[6])*160 - (src[4]+src[7])*48 + (src[3]+src[8])*24 - (src[2]+src[8])*8 + r)>>8)]; | |
384 dst[6]= cm[(((src[6]+src[7])*160 - (src[5]+src[8])*48 + (src[4]+src[8])*24 - (src[3]+src[7])*8 + r)>>8)]; | |
385 dst[7]= cm[(((src[7]+src[8])*160 - (src[6]+src[8])*48 + (src[5]+src[7])*24 - (src[4]+src[6])*8 + r)>>8)]; | |
386 dst+=dstStride; | |
387 src+=srcStride; | |
388 } | |
389 } | |
390 | |
391 static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w, int r) | |
392 { | |
393 UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
394 int i; | |
395 for(i=0; i<w; i++) | |
396 { | |
397 const int src0= src[0*srcStride]; | |
398 const int src1= src[1*srcStride]; | |
399 const int src2= src[2*srcStride]; | |
400 const int src3= src[3*srcStride]; | |
401 const int src4= src[4*srcStride]; | |
402 const int src5= src[5*srcStride]; | |
403 const int src6= src[6*srcStride]; | |
404 const int src7= src[7*srcStride]; | |
405 const int src8= src[8*srcStride]; | |
406 dst[0*dstStride]= cm[(((src0+src1)*160 - (src0+src2)*48 + (src1+src3)*24 - (src2+src4)*8 + r)>>8)]; | |
407 dst[1*dstStride]= cm[(((src1+src2)*160 - (src0+src3)*48 + (src0+src4)*24 - (src1+src5)*8 + r)>>8)]; | |
408 dst[2*dstStride]= cm[(((src2+src3)*160 - (src1+src4)*48 + (src0+src5)*24 - (src0+src6)*8 + r)>>8)]; | |
409 dst[3*dstStride]= cm[(((src3+src4)*160 - (src2+src5)*48 + (src1+src6)*24 - (src0+src7)*8 + r)>>8)]; | |
410 dst[4*dstStride]= cm[(((src4+src5)*160 - (src3+src6)*48 + (src2+src7)*24 - (src1+src8)*8 + r)>>8)]; | |
411 dst[5*dstStride]= cm[(((src5+src6)*160 - (src4+src7)*48 + (src3+src8)*24 - (src2+src8)*8 + r)>>8)]; | |
412 dst[6*dstStride]= cm[(((src6+src7)*160 - (src5+src8)*48 + (src4+src8)*24 - (src3+src7)*8 + r)>>8)]; | |
413 dst[7*dstStride]= cm[(((src7+src8)*160 - (src6+src8)*48 + (src5+src7)*24 - (src4+src6)*8 + r)>>8)]; | |
414 dst++; | |
415 src++; | |
416 } | |
417 } | |
418 | |
419 static inline void put_block(UINT8 *dst, UINT8 *src, int dstStride, int srcStride) | |
420 { | |
421 int i; | |
422 for(i=0; i<8; i++) | |
423 { | |
424 dst[0]= src[0]; | |
425 dst[1]= src[1]; | |
426 dst[2]= src[2]; | |
427 dst[3]= src[3]; | |
428 dst[4]= src[4]; | |
429 dst[5]= src[5]; | |
430 dst[6]= src[6]; | |
431 dst[7]= src[7]; | |
432 dst+=dstStride; | |
433 src+=srcStride; | |
434 } | |
435 } | |
436 | |
437 static inline void avg2_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, int dstStride, int srcStride, int r) | |
438 { | |
439 int i; | |
440 for(i=0; i<8; i++) | |
441 { | |
442 dst[0]= (src1[0] + src2[0] + r)>>1; | |
443 dst[1]= (src1[1] + src2[1] + r)>>1; | |
444 dst[2]= (src1[2] + src2[2] + r)>>1; | |
445 dst[3]= (src1[3] + src2[3] + r)>>1; | |
446 dst[4]= (src1[4] + src2[4] + r)>>1; | |
447 dst[5]= (src1[5] + src2[5] + r)>>1; | |
448 dst[6]= (src1[6] + src2[6] + r)>>1; | |
449 dst[7]= (src1[7] + src2[7] + r)>>1; | |
450 dst+=dstStride; | |
451 src1+=srcStride; | |
452 src2+=8; | |
453 } | |
454 } | |
455 | |
456 static inline void avg4_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, UINT8 *src3, UINT8 *src4, int dstStride, int srcStride, int r) | |
457 { | |
458 int i; | |
459 for(i=0; i<8; i++) | |
460 { | |
461 dst[0]= (src1[0] + src2[0] + src3[0] + src4[0] + r)>>2; | |
462 dst[1]= (src1[1] + src2[1] + src3[1] + src4[1] + r)>>2; | |
463 dst[2]= (src1[2] + src2[2] + src3[2] + src4[2] + r)>>2; | |
464 dst[3]= (src1[3] + src2[3] + src3[3] + src4[3] + r)>>2; | |
465 dst[4]= (src1[4] + src2[4] + src3[4] + src4[4] + r)>>2; | |
466 dst[5]= (src1[5] + src2[5] + src3[5] + src4[5] + r)>>2; | |
467 dst[6]= (src1[6] + src2[6] + src3[6] + src4[6] + r)>>2; | |
468 dst[7]= (src1[7] + src2[7] + src3[7] + src4[7] + r)>>2; | |
469 dst+=dstStride; | |
470 src1+=srcStride; | |
471 src2+=8; | |
472 src3+=9; | |
473 src4+=8; | |
474 } | |
475 } | |
476 | |
477 #define QPEL_MC(r, name) \ | |
478 static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
479 {\ | |
480 put_block(dst, src, dstStride, srcStride);\ | |
481 }\ | |
482 \ | |
483 static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
484 {\ | |
485 UINT8 half[64];\ | |
486 qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\ | |
487 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ | |
488 }\ | |
489 \ | |
490 static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
491 {\ | |
492 qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\ | |
493 }\ | |
494 \ | |
495 static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
496 {\ | |
497 UINT8 half[64];\ | |
498 qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\ | |
499 avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\ | |
500 }\ | |
501 \ | |
502 static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
503 {\ | |
504 UINT8 half[64];\ | |
505 qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\ | |
506 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ | |
507 }\ | |
508 \ | |
509 static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
510 {\ | |
511 qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\ | |
512 }\ | |
513 \ | |
514 static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
515 {\ | |
516 UINT8 half[64];\ | |
517 qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\ | |
518 avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\ | |
519 }\ | |
520 static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
521 {\ | |
522 UINT8 halfH[72];\ | |
523 UINT8 halfV[72];\ | |
524 UINT8 halfHV[64];\ | |
525 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | |
526 qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\ | |
527 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | |
528 avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ | |
529 }\ | |
530 static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
531 {\ | |
532 UINT8 halfH[72];\ | |
533 UINT8 halfV[72];\ | |
534 UINT8 halfHV[64];\ | |
535 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | |
536 qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\ | |
537 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | |
538 avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ | |
539 }\ | |
540 static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
541 {\ | |
542 UINT8 halfH[72];\ | |
543 UINT8 halfV[72];\ | |
544 UINT8 halfHV[64];\ | |
545 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | |
546 qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\ | |
547 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | |
548 avg4_block(dst, src+srcStride, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ | |
549 }\ | |
550 static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
551 {\ | |
552 UINT8 halfH[72];\ | |
553 UINT8 halfV[72];\ | |
554 UINT8 halfHV[64];\ | |
555 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | |
556 qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\ | |
557 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | |
558 avg4_block(dst, src+srcStride+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ | |
559 }\ | |
560 static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
561 {\ | |
562 UINT8 halfH[72];\ | |
563 UINT8 halfHV[64];\ | |
564 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | |
565 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | |
566 avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\ | |
567 }\ | |
568 static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
569 {\ | |
570 UINT8 halfH[72];\ | |
571 UINT8 halfHV[64];\ | |
572 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | |
573 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | |
574 avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\ | |
575 }\ | |
576 static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
577 {\ | |
578 UINT8 halfH[72];\ | |
579 UINT8 halfV[72];\ | |
580 UINT8 halfHV[64];\ | |
581 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | |
582 qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\ | |
583 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | |
584 avg2_block(dst, halfV, halfHV, dstStride, 9, 1-r);\ | |
585 }\ | |
586 static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
587 {\ | |
588 UINT8 halfH[72];\ | |
589 UINT8 halfV[72];\ | |
590 UINT8 halfHV[64];\ | |
591 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | |
592 qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\ | |
593 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | |
594 avg2_block(dst, halfV+1, halfHV, dstStride, 9, 1-r);\ | |
595 }\ | |
596 static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
597 {\ | |
598 UINT8 halfH[72];\ | |
599 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | |
600 qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 128-r);\ | |
601 }\ | |
602 qpel_mc_func qpel_mc ## name ## _tab[16]={ \ | |
603 qpel_mc00_c ## name, \ | |
604 qpel_mc10_c ## name, \ | |
605 qpel_mc20_c ## name, \ | |
606 qpel_mc30_c ## name, \ | |
607 qpel_mc01_c ## name, \ | |
608 qpel_mc11_c ## name, \ | |
609 qpel_mc21_c ## name, \ | |
610 qpel_mc31_c ## name, \ | |
611 qpel_mc02_c ## name, \ | |
612 qpel_mc12_c ## name, \ | |
613 qpel_mc22_c ## name, \ | |
614 qpel_mc32_c ## name, \ | |
615 qpel_mc03_c ## name, \ | |
616 qpel_mc13_c ## name, \ | |
617 qpel_mc23_c ## name, \ | |
618 qpel_mc33_c ## name, \ | |
619 }; | |
620 | |
621 QPEL_MC(0, _rnd) | |
622 QPEL_MC(1, _no_rnd) | |
623 | |
347 int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) | 624 int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) |
348 { | 625 { |
349 int s, i; | 626 int s, i; |
350 | 627 |
351 s = 0; | 628 s = 0; |
519 ff_idct = j_rev_dct; | 796 ff_idct = j_rev_dct; |
520 #endif | 797 #endif |
521 get_pixels = get_pixels_c; | 798 get_pixels = get_pixels_c; |
522 put_pixels_clamped = put_pixels_clamped_c; | 799 put_pixels_clamped = put_pixels_clamped_c; |
523 add_pixels_clamped = add_pixels_clamped_c; | 800 add_pixels_clamped = add_pixels_clamped_c; |
801 gmc1= gmc1_c; | |
524 | 802 |
525 pix_abs16x16 = pix_abs16x16_c; | 803 pix_abs16x16 = pix_abs16x16_c; |
526 pix_abs16x16_x2 = pix_abs16x16_x2_c; | 804 pix_abs16x16_x2 = pix_abs16x16_x2_c; |
527 pix_abs16x16_y2 = pix_abs16x16_y2_c; | 805 pix_abs16x16_y2 = pix_abs16x16_y2_c; |
528 pix_abs16x16_xy2 = pix_abs16x16_xy2_c; | 806 pix_abs16x16_xy2 = pix_abs16x16_xy2_c; |