comparison mpegvideo.c @ 13:174ef88f619a libavcodec

use block[] in structure to have it aligned on 8 bytes for mmx optimizations - dct_unquantize is always a function pointer - added specialized dct_unquantize_h263
author glantau
date Mon, 30 Jul 2001 23:26:26 +0000
parents 1b4461b5a7fb
children b69fe46fd708
comparison
equal deleted inserted replaced
12:4d50c7d89e0f 13:174ef88f619a
22 #include <string.h> 22 #include <string.h>
23 #include "avcodec.h" 23 #include "avcodec.h"
24 #include "dsputil.h" 24 #include "dsputil.h"
25 #include "mpegvideo.h" 25 #include "mpegvideo.h"
26 26
27 #include "../config.h" 27 static void encode_picture(MpegEncContext *s, int picture_number);
28 28 static void rate_control_init(MpegEncContext *s);
29 #ifdef ARCH_X86 29 static int rate_estimate_qscale(MpegEncContext *s);
30 #include "i386/mpegvideo.c" 30 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
31 #endif 31 DCTELEM *block, int n, int qscale);
32 #ifndef DCT_UNQUANTIZE 32 static void dct_unquantize_h263_c(MpegEncContext *s,
33 #define DCT_UNQUANTIZE(a,b,c,d) dct_unquantize(a,b,c,d) 33 DCTELEM *block, int n, int qscale);
34 #endif 34 static int dct_quantize(MpegEncContext *s, DCTELEM *block, int n, int qscale);
35 35 static int dct_quantize_mmx(MpegEncContext *s,
36 DCTELEM *block, int n,
37 int qscale);
36 #define EDGE_WIDTH 16 38 #define EDGE_WIDTH 16
37 39
38 /* enable all paranoid tests for rounding, overflows, etc... */ 40 /* enable all paranoid tests for rounding, overflows, etc... */
39 //#define PARANOID 41 //#define PARANOID
40 42
57 59
58 static UINT8 h263_chroma_roundtab[16] = { 60 static UINT8 h263_chroma_roundtab[16] = {
59 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 61 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
60 }; 62 };
61 63
62 static void encode_picture(MpegEncContext *s, int picture_number);
63 static void rate_control_init(MpegEncContext *s);
64 static int rate_estimate_qscale(MpegEncContext *s);
65
66 /* default motion estimation */ 64 /* default motion estimation */
67 int motion_estimation_method = ME_LOG; 65 int motion_estimation_method = ME_LOG;
68 66
69 /* XXX: should use variable shift ? */ 67 /* XXX: should use variable shift ? */
70 #define QMAT_SHIFT_MMX 19 68 #define QMAT_SHIFT_MMX 19
96 int MPV_common_init(MpegEncContext *s) 94 int MPV_common_init(MpegEncContext *s)
97 { 95 {
98 int c_size, i; 96 int c_size, i;
99 UINT8 *pict; 97 UINT8 *pict;
100 98
101 #if defined ( HAVE_MMX ) && defined ( BIN_PORTABILITY ) 99 if (s->out_format == FMT_H263)
102 MPV_common_init_mmx(); 100 s->dct_unquantize = dct_unquantize_h263_c;
101 else
102 s->dct_unquantize = dct_unquantize_mpeg1_c;
103
104 #ifdef HAVE_MMX
105 MPV_common_init_mmx(s);
103 #endif 106 #endif
104 s->mb_width = (s->width + 15) / 16; 107 s->mb_width = (s->width + 15) / 16;
105 s->mb_height = (s->height + 15) / 16; 108 s->mb_height = (s->height + 15) / 16;
106 s->linesize = s->mb_width * 16 + 2 * EDGE_WIDTH; 109 s->linesize = s->mb_width * 16 + 2 * EDGE_WIDTH;
107 110
356 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */ 359 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
357 } 360 }
358 } 361 }
359 362
360 /* generic function for encode/decode called before a frame is coded/decoded */ 363 /* generic function for encode/decode called before a frame is coded/decoded */
361 #ifndef ARCH_X86
362 void MPV_frame_start(MpegEncContext *s) 364 void MPV_frame_start(MpegEncContext *s)
363 { 365 {
364 int i; 366 int i;
365 UINT8 *tmp; 367 UINT8 *tmp;
366 368
376 s->next_picture[i] = tmp; 378 s->next_picture[i] = tmp;
377 s->current_picture[i] = tmp; 379 s->current_picture[i] = tmp;
378 } 380 }
379 } 381 }
380 } 382 }
381 #endif 383
382 /* generic function for encode/decode called after a frame has been coded/decoded */ 384 /* generic function for encode/decode called after a frame has been coded/decoded */
383 void MPV_frame_end(MpegEncContext *s) 385 void MPV_frame_end(MpegEncContext *s)
384 { 386 {
385 /* draw edge for correct motion prediction if outside */ 387 /* draw edge for correct motion prediction if outside */
386 if (s->pict_type != B_TYPE) { 388 if (s->pict_type != B_TYPE) {
458 else if (a > amax) 460 else if (a > amax)
459 return amax; 461 return amax;
460 else 462 else
461 return a; 463 return a;
462 } 464 }
463
464 static int dct_quantize(MpegEncContext *s, DCTELEM *block, int n, int qscale);
465 static int dct_quantize_mmx(MpegEncContext *s,
466 DCTELEM *block, int n,
467 int qscale);
468 static void dct_unquantize(MpegEncContext *s, DCTELEM *block, int n, int qscale);
469 465
470 /* apply one mpeg motion vector to the three components */ 466 /* apply one mpeg motion vector to the three components */
471 static inline void mpeg_motion(MpegEncContext *s, 467 static inline void mpeg_motion(MpegEncContext *s,
472 UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr, 468 UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
473 int dest_offset, 469 int dest_offset,
631 /* put block[] to dest[] */ 627 /* put block[] to dest[] */
632 static inline void put_dct(MpegEncContext *s, 628 static inline void put_dct(MpegEncContext *s,
633 DCTELEM *block, int i, UINT8 *dest, int line_size) 629 DCTELEM *block, int i, UINT8 *dest, int line_size)
634 { 630 {
635 if (!s->mpeg2) 631 if (!s->mpeg2)
636 DCT_UNQUANTIZE(s, block, i, s->qscale); 632 s->dct_unquantize(s, block, i, s->qscale);
637 j_rev_dct (block); 633 j_rev_dct (block);
638 put_pixels_clamped(block, dest, line_size); 634 put_pixels_clamped(block, dest, line_size);
639 } 635 }
640 636
641 /* add block[] to dest[] */ 637 /* add block[] to dest[] */
642 static inline void add_dct(MpegEncContext *s, 638 static inline void add_dct(MpegEncContext *s,
643 DCTELEM *block, int i, UINT8 *dest, int line_size) 639 DCTELEM *block, int i, UINT8 *dest, int line_size)
644 { 640 {
645 if (s->block_last_index[i] >= 0) { 641 if (s->block_last_index[i] >= 0) {
646 if (!s->mpeg2) 642 if (!s->mpeg2)
647 DCT_UNQUANTIZE(s, block, i, s->qscale); 643 s->dct_unquantize(s, block, i, s->qscale);
648 j_rev_dct (block); 644 j_rev_dct (block);
649 add_pixels_clamped(block, dest, line_size); 645 add_pixels_clamped(block, dest, line_size);
650 } 646 }
651 } 647 }
652 648
738 if (!s->intra_only) { 734 if (!s->intra_only) {
739 UINT8 *dest_y, *dest_cb, *dest_cr; 735 UINT8 *dest_y, *dest_cb, *dest_cr;
740 UINT8 *mbskip_ptr; 736 UINT8 *mbskip_ptr;
741 737
742 /* avoid copy if macroblock skipped in last frame too */ 738 /* avoid copy if macroblock skipped in last frame too */
743 if (!s->encoding) { 739 if (!s->encoding && s->pict_type != B_TYPE) {
744 mbskip_ptr = &s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]; 740 mbskip_ptr = &s->mbskip_table[s->mb_y * s->mb_width + s->mb_x];
745 if (s->mb_skiped) { 741 if (s->mb_skiped) {
746 s->mb_skiped = 0; 742 s->mb_skiped = 0;
747 /* if previous was skipped too, then nothing to do ! */ 743 /* if previous was skipped too, then nothing to do ! */
748 if (*mbskip_ptr != 0) 744 if (*mbskip_ptr != 0)
808 804
809 static void encode_picture(MpegEncContext *s, int picture_number) 805 static void encode_picture(MpegEncContext *s, int picture_number)
810 { 806 {
811 int mb_x, mb_y, wrap; 807 int mb_x, mb_y, wrap;
812 UINT8 *ptr; 808 UINT8 *ptr;
813 DCTELEM block[6][64];
814 int i, motion_x, motion_y; 809 int i, motion_x, motion_y;
815 810
816 s->picture_number = picture_number; 811 s->picture_number = picture_number;
817 if (!s->fixed_qscale) 812 if (!s->fixed_qscale)
818 s->qscale = rate_estimate_qscale(s); 813 s->qscale = rate_estimate_qscale(s);
877 } 872 }
878 873
879 /* get the pixels */ 874 /* get the pixels */
880 wrap = s->linesize; 875 wrap = s->linesize;
881 ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16; 876 ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16;
882 get_pixels(block[0], ptr, wrap); 877 get_pixels(s->block[0], ptr, wrap);
883 get_pixels(block[1], ptr + 8, wrap); 878 get_pixels(s->block[1], ptr + 8, wrap);
884 get_pixels(block[2], ptr + 8 * wrap, wrap); 879 get_pixels(s->block[2], ptr + 8 * wrap, wrap);
885 get_pixels(block[3], ptr + 8 * wrap + 8, wrap); 880 get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap);
886 wrap = s->linesize >> 1; 881 wrap = s->linesize >> 1;
887 ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8; 882 ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8;
888 get_pixels(block[4], ptr, wrap); 883 get_pixels(s->block[4], ptr, wrap);
889 884
890 wrap = s->linesize >> 1; 885 wrap = s->linesize >> 1;
891 ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8; 886 ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8;
892 get_pixels(block[5], ptr, wrap); 887 get_pixels(s->block[5], ptr, wrap);
893 888
894 /* subtract previous frame if non intra */ 889 /* subtract previous frame if non intra */
895 if (!s->mb_intra) { 890 if (!s->mb_intra) {
896 int dxy, offset, mx, my; 891 int dxy, offset, mx, my;
897 892
898 dxy = ((motion_y & 1) << 1) | (motion_x & 1); 893 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
899 ptr = s->last_picture[0] + 894 ptr = s->last_picture[0] +
900 ((mb_y * 16 + (motion_y >> 1)) * s->linesize) + 895 ((mb_y * 16 + (motion_y >> 1)) * s->linesize) +
901 (mb_x * 16 + (motion_x >> 1)); 896 (mb_x * 16 + (motion_x >> 1));
902 897
903 sub_pixels_2(block[0], ptr, s->linesize, dxy); 898 sub_pixels_2(s->block[0], ptr, s->linesize, dxy);
904 sub_pixels_2(block[1], ptr + 8, s->linesize, dxy); 899 sub_pixels_2(s->block[1], ptr + 8, s->linesize, dxy);
905 sub_pixels_2(block[2], ptr + s->linesize * 8, s->linesize, dxy); 900 sub_pixels_2(s->block[2], ptr + s->linesize * 8, s->linesize, dxy);
906 sub_pixels_2(block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy); 901 sub_pixels_2(s->block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy);
907 902
908 if (s->out_format == FMT_H263) { 903 if (s->out_format == FMT_H263) {
909 /* special rounding for h263 */ 904 /* special rounding for h263 */
910 dxy = 0; 905 dxy = 0;
911 if ((motion_x & 3) != 0) 906 if ((motion_x & 3) != 0)
921 mx >>= 1; 916 mx >>= 1;
922 my >>= 1; 917 my >>= 1;
923 } 918 }
924 offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx); 919 offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx);
925 ptr = s->last_picture[1] + offset; 920 ptr = s->last_picture[1] + offset;
926 sub_pixels_2(block[4], ptr, s->linesize >> 1, dxy); 921 sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy);
927 ptr = s->last_picture[2] + offset; 922 ptr = s->last_picture[2] + offset;
928 sub_pixels_2(block[5], ptr, s->linesize >> 1, dxy); 923 sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy);
929 } 924 }
930 emms_c(); 925 emms_c();
931 926
932 /* DCT & quantize */ 927 /* DCT & quantize */
933 if (s->h263_msmpeg4) { 928 if (s->h263_msmpeg4) {
941 } 936 }
942 937
943 for(i=0;i<6;i++) { 938 for(i=0;i<6;i++) {
944 int last_index; 939 int last_index;
945 if (av_fdct == jpeg_fdct_ifast) 940 if (av_fdct == jpeg_fdct_ifast)
946 last_index = dct_quantize(s, block[i], i, s->qscale); 941 last_index = dct_quantize(s, s->block[i], i, s->qscale);
947 else 942 else
948 last_index = dct_quantize_mmx(s, block[i], i, s->qscale); 943 last_index = dct_quantize_mmx(s, s->block[i], i, s->qscale);
949 s->block_last_index[i] = last_index; 944 s->block_last_index[i] = last_index;
950 } 945 }
951 946
952 /* huffman encode */ 947 /* huffman encode */
953 switch(s->out_format) { 948 switch(s->out_format) {
954 case FMT_MPEG1: 949 case FMT_MPEG1:
955 mpeg1_encode_mb(s, block, motion_x, motion_y); 950 mpeg1_encode_mb(s, s->block, motion_x, motion_y);
956 break; 951 break;
957 case FMT_H263: 952 case FMT_H263:
958 if (s->h263_msmpeg4) 953 if (s->h263_msmpeg4)
959 msmpeg4_encode_mb(s, block, motion_x, motion_y); 954 msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
960 else 955 else
961 h263_encode_mb(s, block, motion_x, motion_y); 956 h263_encode_mb(s, s->block, motion_x, motion_y);
962 break; 957 break;
963 case FMT_MJPEG: 958 case FMT_MJPEG:
964 mjpeg_encode_mb(s, block); 959 mjpeg_encode_mb(s, s->block);
965 break; 960 break;
966 } 961 }
967 962
968 /* decompress blocks so that we keep the state of the decoder */ 963 /* decompress blocks so that we keep the state of the decoder */
969 s->mv[0][0][0] = motion_x; 964 s->mv[0][0][0] = motion_x;
970 s->mv[0][0][1] = motion_y; 965 s->mv[0][0][1] = motion_y;
971 966
972 MPV_decode_mb(s, block); 967 MPV_decode_mb(s, s->block);
973 } 968 }
974 } 969 }
975 } 970 }
976 971
977 static int dct_quantize(MpegEncContext *s, 972 static int dct_quantize(MpegEncContext *s,
1119 } 1114 }
1120 } 1115 }
1121 return last_non_zero; 1116 return last_non_zero;
1122 } 1117 }
1123 1118
1124 #ifndef HAVE_DCT_UNQUANTIZE 1119 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
1125 static void dct_unquantize(MpegEncContext *s, 1120 DCTELEM *block, int n, int qscale)
1126 DCTELEM *block, int n, int qscale)
1127 { 1121 {
1128 int i, level; 1122 int i, level;
1129 const UINT16 *quant_matrix; 1123 const UINT16 *quant_matrix;
1130 1124
1131 if (s->mb_intra) { 1125 if (s->mb_intra) {
1132 if (n < 4) 1126 if (n < 4)
1133 block[0] = block[0] * s->y_dc_scale; 1127 block[0] = block[0] * s->y_dc_scale;
1134 else 1128 else
1135 block[0] = block[0] * s->c_dc_scale; 1129 block[0] = block[0] * s->c_dc_scale;
1136 if (s->out_format == FMT_H263) {
1137 i = 1;
1138 goto unquant_even;
1139 }
1140 /* XXX: only mpeg1 */ 1130 /* XXX: only mpeg1 */
1141 quant_matrix = s->intra_matrix; 1131 quant_matrix = s->intra_matrix;
1142 for(i=1;i<64;i++) { 1132 for(i=1;i<64;i++) {
1143 level = block[i]; 1133 level = block[i];
1144 if (level) { 1134 if (level) {
1158 block[i] = level; 1148 block[i] = level;
1159 } 1149 }
1160 } 1150 }
1161 } else { 1151 } else {
1162 i = 0; 1152 i = 0;
1163 unquant_even:
1164 quant_matrix = s->non_intra_matrix; 1153 quant_matrix = s->non_intra_matrix;
1165 for(;i<64;i++) { 1154 for(;i<64;i++) {
1166 level = block[i]; 1155 level = block[i];
1167 if (level) { 1156 if (level) {
1168 if (level < 0) { 1157 if (level < 0) {
1183 block[i] = level; 1172 block[i] = level;
1184 } 1173 }
1185 } 1174 }
1186 } 1175 }
1187 } 1176 }
1188 #endif 1177
1178 static void dct_unquantize_h263_c(MpegEncContext *s,
1179 DCTELEM *block, int n, int qscale)
1180 {
1181 int i, level, qmul, qadd;
1182
1183 if (s->mb_intra) {
1184 if (n < 4)
1185 block[0] = block[0] * s->y_dc_scale;
1186 else
1187 block[0] = block[0] * s->c_dc_scale;
1188 i = 1;
1189 } else {
1190 i = 0;
1191 }
1192
1193 qmul = s->qscale << 1;
1194 qadd = (s->qscale - 1) | 1;
1195
1196 for(;i<64;i++) {
1197 level = block[i];
1198 if (level) {
1199 if (level < 0) {
1200 level = level * qmul - qadd;
1201 } else {
1202 level = level * qmul + qadd;
1203 }
1204 #ifdef PARANOID
1205 if (level < -2048 || level > 2047)
1206 fprintf(stderr, "unquant error %d %d\n", i, level);
1207 #endif
1208 block[i] = level;
1209 }
1210 }
1211 }
1189 1212
1190 /* rate control */ 1213 /* rate control */
1191 1214
1192 /* an I frame is I_FRAME_SIZE_RATIO bigger than a P frame */ 1215 /* an I frame is I_FRAME_SIZE_RATIO bigger than a P frame */
1193 #define I_FRAME_SIZE_RATIO 3.0 1216 #define I_FRAME_SIZE_RATIO 3.0