comparison h264.c @ 2633:72e6ffa1f3a5 libavcodec

MMX for H.264 deblocking filter
author lorenm
date Mon, 25 Apr 2005 01:01:41 +0000
parents 202cd69d8d2e
children ef44d24680d1
comparison
equal deleted inserted replaced
2632:67171616ead6 2633:72e6ffa1f3a5
5622 int i, d; 5622 int i, d;
5623 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); 5623 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
5624 const int alpha = alpha_table[index_a]; 5624 const int alpha = alpha_table[index_a];
5625 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; 5625 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
5626 5626
5627 for( i = 0; i < 4; i++ ) { 5627 if( bS[0] < 4 ) {
5628 if( bS[i] == 0 ) { 5628 int tc[4];
5629 pix += 4 * stride; 5629 for(i=0; i<4; i++)
5630 continue; 5630 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
5631 } 5631 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5632 5632 } else {
5633 if( bS[i] < 4 ) { 5633 /* 16px edge length, because bS=4 is triggered by being at
5634 const int tc0 = tc0_table[index_a][bS[i] - 1]; 5634 * the edge of an intra MB, so all 4 bS are the same */
5635 /* 4px edge length */ 5635 for( d = 0; d < 16; d++ ) {
5636 for( d = 0; d < 4; d++ ) {
5637 const int p0 = pix[-1];
5638 const int p1 = pix[-2];
5639 const int p2 = pix[-3];
5640 const int q0 = pix[0];
5641 const int q1 = pix[1];
5642 const int q2 = pix[2];
5643
5644 if( ABS( p0 - q0 ) < alpha &&
5645 ABS( p1 - p0 ) < beta &&
5646 ABS( q1 - q0 ) < beta ) {
5647 int tc = tc0;
5648 int i_delta;
5649
5650 if( ABS( p2 - p0 ) < beta ) {
5651 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5652 tc++;
5653 }
5654 if( ABS( q2 - q0 ) < beta ) {
5655 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5656 tc++;
5657 }
5658
5659 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5660 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
5661 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
5662 tprintf("filter_mb_edgev i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, tc, bS[i], pix[-3], p1, p0, q0, q1, pix[2], pix[-2], pix[-1], pix[0], pix[1]);
5663 }
5664 pix += stride;
5665 }
5666 }else{
5667 /* 4px edge length */
5668 for( d = 0; d < 4; d++ ) {
5669 const int p0 = pix[-1]; 5636 const int p0 = pix[-1];
5670 const int p1 = pix[-2]; 5637 const int p1 = pix[-2];
5671 const int p2 = pix[-3]; 5638 const int p2 = pix[-3];
5672 5639
5673 const int q0 = pix[0]; 5640 const int q0 = pix[0];
5708 } 5675 }
5709 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]); 5676 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5710 } 5677 }
5711 pix += stride; 5678 pix += stride;
5712 } 5679 }
5713 }
5714 } 5680 }
5715 } 5681 }
5716 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { 5682 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
5717 int i, d; 5683 int i, d;
5718 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); 5684 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
5719 const int alpha = alpha_table[index_a]; 5685 const int alpha = alpha_table[index_a];
5720 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; 5686 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
5721 5687
5722 for( i = 0; i < 4; i++ ) { 5688 if( bS[0] < 4 ) {
5723 if( bS[i] == 0 ) { 5689 int tc[4];
5724 pix += 2 * stride; 5690 for(i=0; i<4; i++)
5725 continue; 5691 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
5726 } 5692 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5727 5693 } else {
5728 if( bS[i] < 4 ) { 5694 /* 8px edge length, see filter_mb_edgev */
5729 const int tc = tc0_table[index_a][bS[i] - 1] + 1; 5695 for( d = 0; d < 8; d++ ){
5730 /* 2px edge length (because we use same bS than the one for luma) */
5731 for( d = 0; d < 2; d++ ){
5732 const int p0 = pix[-1]; 5696 const int p0 = pix[-1];
5733 const int p1 = pix[-2]; 5697 const int p1 = pix[-2];
5734 const int q0 = pix[0]; 5698 const int q0 = pix[0];
5735 const int q1 = pix[1]; 5699 const int q1 = pix[1];
5736 5700
5737 if( ABS( p0 - q0 ) < alpha && 5701 if( ABS( p0 - q0 ) < alpha &&
5738 ABS( p1 - p0 ) < beta && 5702 ABS( p1 - p0 ) < beta &&
5739 ABS( q1 - q0 ) < beta ) { 5703 ABS( q1 - q0 ) < beta ) {
5740 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5741
5742 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
5743 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
5744 tprintf("filter_mb_edgecv i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, tc, bS[i], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5745 }
5746 pix += stride;
5747 }
5748 }else{
5749 /* 2px edge length (because we use same bS than the one for luma) */
5750 for( d = 0; d < 2; d++ ){
5751 const int p0 = pix[-1];
5752 const int p1 = pix[-2];
5753 const int q0 = pix[0];
5754 const int q1 = pix[1];
5755
5756 if( ABS( p0 - q0 ) < alpha &&
5757 ABS( p1 - p0 ) < beta &&
5758 ABS( q1 - q0 ) < beta ) {
5759 5704
5760 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ 5705 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
5761 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ 5706 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
5762 tprintf("filter_mb_edgecv i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); 5707 tprintf("filter_mb_edgecv i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5763 } 5708 }
5764 pix += stride; 5709 pix += stride;
5765 } 5710 }
5766 }
5767 } 5711 }
5768 } 5712 }
5769 5713
5770 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) { 5714 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
5771 int i; 5715 int i;
5926 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); 5870 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
5927 const int alpha = alpha_table[index_a]; 5871 const int alpha = alpha_table[index_a];
5928 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; 5872 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
5929 const int pix_next = stride; 5873 const int pix_next = stride;
5930 5874
5931 for( i = 0; i < 4; i++ ) { 5875 if( bS[0] < 4 ) {
5932 if( bS[i] == 0 ) { 5876 int tc[4];
5933 pix += 4; 5877 for(i=0; i<4; i++)
5934 continue; 5878 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
5935 } 5879 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
5936 5880 } else {
5937 if( bS[i] < 4 ) { 5881 /* 16px edge length, see filter_mb_edgev */
5938 const int tc0 = tc0_table[index_a][bS[i] - 1]; 5882 for( d = 0; d < 16; d++ ) {
5939 /* 4px edge length */
5940 for( d = 0; d < 4; d++ ) {
5941 const int p0 = pix[-1*pix_next];
5942 const int p1 = pix[-2*pix_next];
5943 const int p2 = pix[-3*pix_next];
5944 const int q0 = pix[0];
5945 const int q1 = pix[1*pix_next];
5946 const int q2 = pix[2*pix_next];
5947
5948 if( ABS( p0 - q0 ) < alpha &&
5949 ABS( p1 - p0 ) < beta &&
5950 ABS( q1 - q0 ) < beta ) {
5951
5952 int tc = tc0;
5953 int i_delta;
5954
5955 if( ABS( p2 - p0 ) < beta ) {
5956 pix[-2*pix_next] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5957 tc++;
5958 }
5959 if( ABS( q2 - q0 ) < beta ) {
5960 pix[pix_next] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5961 tc++;
5962 }
5963
5964 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5965 pix[-pix_next] = clip_uint8( p0 + i_delta ); /* p0' */
5966 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
5967 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, tc, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
5968 }
5969 pix++;
5970 }
5971 }else{
5972 /* 4px edge length */
5973 for( d = 0; d < 4; d++ ) {
5974 const int p0 = pix[-1*pix_next]; 5883 const int p0 = pix[-1*pix_next];
5975 const int p1 = pix[-2*pix_next]; 5884 const int p1 = pix[-2*pix_next];
5976 const int p2 = pix[-3*pix_next]; 5885 const int p2 = pix[-3*pix_next];
5977 const int q0 = pix[0]; 5886 const int q0 = pix[0];
5978 const int q1 = pix[1*pix_next]; 5887 const int q1 = pix[1*pix_next];
6011 } 5920 }
6012 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]); 5921 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6013 } 5922 }
6014 pix++; 5923 pix++;
6015 } 5924 }
6016 }
6017 } 5925 }
6018 } 5926 }
6019 5927
6020 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { 5928 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6021 int i, d; 5929 int i, d;
6022 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); 5930 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6023 const int alpha = alpha_table[index_a]; 5931 const int alpha = alpha_table[index_a];
6024 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; 5932 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6025 const int pix_next = stride; 5933 const int pix_next = stride;
6026 5934
6027 for( i = 0; i < 4; i++ ) 5935 if( bS[0] < 4 ) {
6028 { 5936 int tc[4];
6029 if( bS[i] == 0 ) { 5937 for(i=0; i<4; i++)
6030 pix += 2; 5938 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6031 continue; 5939 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6032 } 5940 } else {
6033 5941 /* 8px edge length, see filter_mb_edgev */
6034 if( bS[i] < 4 ) { 5942 for( d = 0; d < 8; d++ ) {
6035 int tc = tc0_table[index_a][bS[i] - 1] + 1;
6036 /* 2px edge length (see deblocking_filter_edgecv) */
6037 for( d = 0; d < 2; d++ ) {
6038 const int p0 = pix[-1*pix_next]; 5943 const int p0 = pix[-1*pix_next];
6039 const int p1 = pix[-2*pix_next]; 5944 const int p1 = pix[-2*pix_next];
6040 const int q0 = pix[0]; 5945 const int q0 = pix[0];
6041 const int q1 = pix[1*pix_next]; 5946 const int q1 = pix[1*pix_next];
6042 5947
6043 if( ABS( p0 - q0 ) < alpha && 5948 if( ABS( p0 - q0 ) < alpha &&
6044 ABS( p1 - p0 ) < beta && 5949 ABS( p1 - p0 ) < beta &&
6045 ABS( q1 - q0 ) < beta ) { 5950 ABS( q1 - q0 ) < beta ) {
6046 5951
6047 int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6048
6049 pix[-pix_next] = clip_uint8( p0 + i_delta ); /* p0' */
6050 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6051 tprintf("filter_mb_edgech i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, tc, bS[i], pix[-3*pix_next], p1, p0, q0, q1, pix[2*pix_next], pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6052 }
6053 pix++;
6054 }
6055 }else{
6056 /* 2px edge length (see deblocking_filter_edgecv) */
6057 for( d = 0; d < 2; d++ ) {
6058 const int p0 = pix[-1*pix_next];
6059 const int p1 = pix[-2*pix_next];
6060 const int q0 = pix[0];
6061 const int q1 = pix[1*pix_next];
6062
6063 if( ABS( p0 - q0 ) < alpha &&
6064 ABS( p1 - p0 ) < beta &&
6065 ABS( q1 - q0 ) < beta ) {
6066
6067 pix[-pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ 5952 pix[-pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6068 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ 5953 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6069 tprintf("filter_mb_edgech i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], pix[-3*pix_next], p1, p0, q0, q1, pix[2*pix_next], pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]); 5954 tprintf("filter_mb_edgech i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], pix[-3*pix_next], p1, p0, q0, q1, pix[2*pix_next], pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6070 } 5955 }
6071 pix++; 5956 pix++;
6072 } 5957 }
6073 }
6074 } 5958 }
6075 } 5959 }
6076 5960
6077 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { 5961 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6078 MpegEncContext * const s = &h->s; 5962 MpegEncContext * const s = &h->s;