comparison mpegvideo.c @ 1092:f59c3f66363b libavcodec

MpegEncContext.(i)dct_* -> DspContext.(i)dct_* bitexact cleanup
author michaelni
date Mon, 03 Mar 2003 14:54:00 +0000
parents 7e79a58954b1
children 0ed00edd4d74
comparison
equal deleted inserted replaced
1091:03df246fb06b 1092:f59c3f66363b
22 #include <ctype.h> 22 #include <ctype.h>
23 #include <limits.h> 23 #include <limits.h>
24 #include "avcodec.h" 24 #include "avcodec.h"
25 #include "dsputil.h" 25 #include "dsputil.h"
26 #include "mpegvideo.h" 26 #include "mpegvideo.h"
27 #include "simple_idct.h"
28 27
29 #ifdef USE_FASTMEMCPY 28 #ifdef USE_FASTMEMCPY
30 #include "fastmemcpy.h" 29 #include "fastmemcpy.h"
31 #endif 30 #endif
32 31
70 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, 69 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
71 8867 , 12299, 11585, 10426, 8867, 6967, 4799, 2446, 70 8867 , 12299, 11585, 10426, 8867, 6967, 4799, 2446,
72 4520 , 6270, 5906, 5315, 4520, 3552, 2446, 1247 71 4520 , 6270, 5906, 5315, 4520, 3552, 2446, 1247
73 }; 72 };
74 73
75 /* Input permutation for the simple_idct_mmx */
76 static const uint8_t simple_mmx_permutation[64]={
77 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
78 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
79 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
80 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
81 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
82 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
83 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
84 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
85 };
86
87 static const uint8_t h263_chroma_roundtab[16] = { 74 static const uint8_t h263_chroma_roundtab[16] = {
88 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 75 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
89 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 76 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
90 }; 77 };
91 78
100 { 87 {
101 int qscale; 88 int qscale;
102 89
103 for(qscale=qmin; qscale<=qmax; qscale++){ 90 for(qscale=qmin; qscale<=qmax; qscale++){
104 int i; 91 int i;
105 if (s->fdct == ff_jpeg_fdct_islow) { 92 if (s->dsp.fdct == ff_jpeg_fdct_islow) {
106 for(i=0;i<64;i++) { 93 for(i=0;i<64;i++) {
107 const int j= s->idct_permutation[i]; 94 const int j= s->dsp.idct_permutation[i];
108 /* 16 <= qscale * quant_matrix[i] <= 7905 */ 95 /* 16 <= qscale * quant_matrix[i] <= 7905 */
109 /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ 96 /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
110 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ 97 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
111 /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ 98 /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
112 99
113 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / 100 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
114 (qscale * quant_matrix[j])); 101 (qscale * quant_matrix[j]));
115 } 102 }
116 } else if (s->fdct == fdct_ifast) { 103 } else if (s->dsp.fdct == fdct_ifast) {
117 for(i=0;i<64;i++) { 104 for(i=0;i<64;i++) {
118 const int j= s->idct_permutation[i]; 105 const int j= s->dsp.idct_permutation[i];
119 /* 16 <= qscale * quant_matrix[i] <= 7905 */ 106 /* 16 <= qscale * quant_matrix[i] <= 7905 */
120 /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ 107 /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
121 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ 108 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
122 /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ 109 /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
123 110
124 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) / 111 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
125 (aanscales[i] * qscale * quant_matrix[j])); 112 (aanscales[i] * qscale * quant_matrix[j]));
126 } 113 }
127 } else { 114 } else {
128 for(i=0;i<64;i++) { 115 for(i=0;i<64;i++) {
129 const int j= s->idct_permutation[i]; 116 const int j= s->dsp.idct_permutation[i];
130 /* We can safely suppose that 16 <= quant_matrix[i] <= 255 117 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
131 So 16 <= qscale * quant_matrix[i] <= 7905 118 So 16 <= qscale * quant_matrix[i] <= 7905
132 so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905 119 so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
133 so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67 120 so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67
134 */ 121 */
161 st->scantable= src_scantable; 148 st->scantable= src_scantable;
162 149
163 for(i=0; i<64; i++){ 150 for(i=0; i<64; i++){
164 int j; 151 int j;
165 j = src_scantable[i]; 152 j = src_scantable[i];
166 st->permutated[i] = s->idct_permutation[j]; 153 st->permutated[i] = s->dsp.idct_permutation[j];
167 #ifdef ARCH_POWERPC 154 #ifdef ARCH_POWERPC
168 st->inverse[j] = i; 155 st->inverse[j] = i;
169 #endif 156 #endif
170 } 157 }
171 158
176 if(j>end) end=j; 163 if(j>end) end=j;
177 st->raster_end[i]= end; 164 st->raster_end[i]= end;
178 } 165 }
179 } 166 }
180 167
181 /* XXX: those functions should be suppressed ASAP when all IDCTs are
182 converted */
183 // *FIXME* this is ugly hack using local static
184 static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
185 static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
186 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
187 {
188 j_rev_dct (block);
189 ff_put_pixels_clamped(block, dest, line_size);
190 }
191 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
192 {
193 j_rev_dct (block);
194 ff_add_pixels_clamped(block, dest, line_size);
195 }
196
197 /* init common dct for both encoder and decoder */ 168 /* init common dct for both encoder and decoder */
198 int DCT_common_init(MpegEncContext *s) 169 int DCT_common_init(MpegEncContext *s)
199 { 170 {
200 int i;
201
202 ff_put_pixels_clamped = s->dsp.put_pixels_clamped;
203 ff_add_pixels_clamped = s->dsp.add_pixels_clamped;
204
205 s->dct_unquantize_h263 = dct_unquantize_h263_c; 171 s->dct_unquantize_h263 = dct_unquantize_h263_c;
206 s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c; 172 s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
207 s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c; 173 s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
174
208 #ifdef CONFIG_ENCODERS 175 #ifdef CONFIG_ENCODERS
209 s->dct_quantize= dct_quantize_c; 176 s->dct_quantize= dct_quantize_c;
210 177 #endif
211 if(s->avctx->dct_algo==FF_DCT_FASTINT)
212 s->fdct = fdct_ifast;
213 else
214 s->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
215 #endif //CONFIG_ENCODERS
216
217 if(s->avctx->idct_algo==FF_IDCT_INT){
218 s->idct_put= ff_jref_idct_put;
219 s->idct_add= ff_jref_idct_add;
220 s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
221 }else{ //accurate/default
222 s->idct_put= simple_idct_put;
223 s->idct_add= simple_idct_add;
224 s->idct_permutation_type= FF_NO_IDCT_PERM;
225 }
226 178
227 #ifdef HAVE_MMX 179 #ifdef HAVE_MMX
228 MPV_common_init_mmx(s); 180 MPV_common_init_mmx(s);
229 #endif 181 #endif
230 #ifdef ARCH_ALPHA 182 #ifdef ARCH_ALPHA
250 s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_* 202 s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
251 } 203 }
252 204
253 #endif //CONFIG_ENCODERS 205 #endif //CONFIG_ENCODERS
254 206
255 switch(s->idct_permutation_type){
256 case FF_NO_IDCT_PERM:
257 for(i=0; i<64; i++)
258 s->idct_permutation[i]= i;
259 break;
260 case FF_LIBMPEG2_IDCT_PERM:
261 for(i=0; i<64; i++)
262 s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
263 break;
264 case FF_SIMPLE_IDCT_PERM:
265 for(i=0; i<64; i++)
266 s->idct_permutation[i]= simple_mmx_permutation[i];
267 break;
268 case FF_TRANSPOSE_IDCT_PERM:
269 for(i=0; i<64; i++)
270 s->idct_permutation[i]= ((i&7)<<3) | (i>>3);
271 break;
272 default:
273 fprintf(stderr, "Internal error, IDCT permutation not set\n");
274 return -1;
275 }
276
277
278 /* load & permutate scantables 207 /* load & permutate scantables
279 note: only wmv uses differnt ones 208 note: only wmv uses differnt ones
280 */ 209 */
281 ff_init_scantable(s, &s->inter_scantable , ff_zigzag_direct); 210 ff_init_scantable(s, &s->inter_scantable , ff_zigzag_direct);
282 ff_init_scantable(s, &s->intra_scantable , ff_zigzag_direct); 211 ff_init_scantable(s, &s->intra_scantable , ff_zigzag_direct);
382 /* init common structure for both encoder and decoder */ 311 /* init common structure for both encoder and decoder */
383 int MPV_common_init(MpegEncContext *s) 312 int MPV_common_init(MpegEncContext *s)
384 { 313 {
385 int y_size, c_size, yc_size, i; 314 int y_size, c_size, yc_size, i;
386 315
387 dsputil_init(&s->dsp, s->avctx->dsp_mask); 316 dsputil_init(&s->dsp, s->avctx);
388 DCT_common_init(s); 317 DCT_common_init(s);
389 318
390 s->flags= s->avctx->flags; 319 s->flags= s->avctx->flags;
391 320
392 s->mb_width = (s->width + 15) / 16; 321 s->mb_width = (s->width + 15) / 16;
766 ff_mpeg1_encode_init(s); 695 ff_mpeg1_encode_init(s);
767 #endif 696 #endif
768 697
769 /* init default q matrix */ 698 /* init default q matrix */
770 for(i=0;i<64;i++) { 699 for(i=0;i<64;i++) {
771 int j= s->idct_permutation[i]; 700 int j= s->dsp.idct_permutation[i];
772 #ifdef CONFIG_RISKY 701 #ifdef CONFIG_RISKY
773 if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){ 702 if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
774 s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i]; 703 s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
775 s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i]; 704 s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
776 }else if(s->out_format == FMT_H263){ 705 }else if(s->out_format == FMT_H263){
1936 /* put block[] to dest[] */ 1865 /* put block[] to dest[] */
1937 static inline void put_dct(MpegEncContext *s, 1866 static inline void put_dct(MpegEncContext *s,
1938 DCTELEM *block, int i, uint8_t *dest, int line_size) 1867 DCTELEM *block, int i, uint8_t *dest, int line_size)
1939 { 1868 {
1940 s->dct_unquantize(s, block, i, s->qscale); 1869 s->dct_unquantize(s, block, i, s->qscale);
1941 s->idct_put (dest, line_size, block); 1870 s->dsp.idct_put (dest, line_size, block);
1942 } 1871 }
1943 1872
1944 /* add block[] to dest[] */ 1873 /* add block[] to dest[] */
1945 static inline void add_dct(MpegEncContext *s, 1874 static inline void add_dct(MpegEncContext *s,
1946 DCTELEM *block, int i, uint8_t *dest, int line_size) 1875 DCTELEM *block, int i, uint8_t *dest, int line_size)
1947 { 1876 {
1948 if (s->block_last_index[i] >= 0) { 1877 if (s->block_last_index[i] >= 0) {
1949 s->idct_add (dest, line_size, block); 1878 s->dsp.idct_add (dest, line_size, block);
1950 } 1879 }
1951 } 1880 }
1952 1881
1953 static inline void add_dequant_dct(MpegEncContext *s, 1882 static inline void add_dequant_dct(MpegEncContext *s,
1954 DCTELEM *block, int i, uint8_t *dest, int line_size) 1883 DCTELEM *block, int i, uint8_t *dest, int line_size)
1955 { 1884 {
1956 if (s->block_last_index[i] >= 0) { 1885 if (s->block_last_index[i] >= 0) {
1957 s->dct_unquantize(s, block, i, s->qscale); 1886 s->dct_unquantize(s, block, i, s->qscale);
1958 1887
1959 s->idct_add (dest, line_size, block); 1888 s->dsp.idct_add (dest, line_size, block);
1960 } 1889 }
1961 } 1890 }
1962 1891
1963 /** 1892 /**
1964 * cleans dc, ac, coded_block for the current non intra MB 1893 * cleans dc, ac, coded_block for the current non intra MB
2191 if(!(s->flags&CODEC_FLAG_GRAY)){ 2120 if(!(s->flags&CODEC_FLAG_GRAY)){
2192 put_dct(s, block[4], 4, dest_cb, s->uvlinesize); 2121 put_dct(s, block[4], 4, dest_cb, s->uvlinesize);
2193 put_dct(s, block[5], 5, dest_cr, s->uvlinesize); 2122 put_dct(s, block[5], 5, dest_cr, s->uvlinesize);
2194 } 2123 }
2195 }else{ 2124 }else{
2196 s->idct_put(dest_y , dct_linesize, block[0]); 2125 s->dsp.idct_put(dest_y , dct_linesize, block[0]);
2197 s->idct_put(dest_y + 8, dct_linesize, block[1]); 2126 s->dsp.idct_put(dest_y + 8, dct_linesize, block[1]);
2198 s->idct_put(dest_y + dct_offset , dct_linesize, block[2]); 2127 s->dsp.idct_put(dest_y + dct_offset , dct_linesize, block[2]);
2199 s->idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]); 2128 s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
2200 2129
2201 if(!(s->flags&CODEC_FLAG_GRAY)){ 2130 if(!(s->flags&CODEC_FLAG_GRAY)){
2202 s->idct_put(dest_cb, s->uvlinesize, block[4]); 2131 s->dsp.idct_put(dest_cb, s->uvlinesize, block[4]);
2203 s->idct_put(dest_cr, s->uvlinesize, block[5]); 2132 s->dsp.idct_put(dest_cr, s->uvlinesize, block[5]);
2204 } 2133 }
2205 } 2134 }
2206 } 2135 }
2207 } 2136 }
2208 } 2137 }
3038 2967
3039 if (s->out_format == FMT_MJPEG) { 2968 if (s->out_format == FMT_MJPEG) {
3040 /* for mjpeg, we do include qscale in the matrix */ 2969 /* for mjpeg, we do include qscale in the matrix */
3041 s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0]; 2970 s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
3042 for(i=1;i<64;i++){ 2971 for(i=1;i<64;i++){
3043 int j= s->idct_permutation[i]; 2972 int j= s->dsp.idct_permutation[i];
3044 2973
3045 s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); 2974 s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3046 } 2975 }
3047 convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, 2976 convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3048 s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8); 2977 s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8);
3547 uint8_t * length; 3476 uint8_t * length;
3548 uint8_t * last_length; 3477 uint8_t * last_length;
3549 int score_limit=0; 3478 int score_limit=0;
3550 int left_limit= 0; 3479 int left_limit= 0;
3551 3480
3552 s->fdct (block); 3481 s->dsp.fdct (block);
3553 3482
3554 qmul= qscale*16; 3483 qmul= qscale*16;
3555 qadd= ((qscale-1)|1)*8; 3484 qadd= ((qscale-1)|1)*8;
3556 3485
3557 if (s->mb_intra) { 3486 if (s->mb_intra) {
3646 unquant_coeff= level*qmul + qadd; 3575 unquant_coeff= level*qmul + qadd;
3647 }else{ 3576 }else{
3648 unquant_coeff= level*qmul - qadd; 3577 unquant_coeff= level*qmul - qadd;
3649 } 3578 }
3650 }else{ //MPEG1 3579 }else{ //MPEG1
3651 j= s->idct_permutation[ scantable[i + start_i] ]; //FIXME optimize 3580 j= s->dsp.idct_permutation[ scantable[i + start_i] ]; //FIXME optimize
3652 if(s->mb_intra){ 3581 if(s->mb_intra){
3653 if (level < 0) { 3582 if (level < 0) {
3654 unquant_coeff = (int)((-level) * qscale * s->intra_matrix[j]) >> 3; 3583 unquant_coeff = (int)((-level) * qscale * s->intra_matrix[j]) >> 3;
3655 unquant_coeff = -((unquant_coeff - 1) | 1); 3584 unquant_coeff = -((unquant_coeff - 1) | 1);
3656 } else { 3585 } else {
3758 return last_non_zero; 3687 return last_non_zero;
3759 3688
3760 i= last_i; 3689 i= last_i;
3761 assert(last_level); 3690 assert(last_level);
3762 //FIXME use permutated scantable 3691 //FIXME use permutated scantable
3763 block[ s->idct_permutation[ scantable[last_non_zero] ] ]= last_level; 3692 block[ s->dsp.idct_permutation[ scantable[last_non_zero] ] ]= last_level;
3764 i -= last_run + 1; 3693 i -= last_run + 1;
3765 3694
3766 for(;i>0 ; i -= run_tab[i] + 1){ 3695 for(;i>0 ; i -= run_tab[i] + 1){
3767 const int j= s->idct_permutation[ scantable[i - 1 + start_i] ]; 3696 const int j= s->dsp.idct_permutation[ scantable[i - 1 + start_i] ];
3768 3697
3769 block[j]= level_tab[i]; 3698 block[j]= level_tab[i];
3770 assert(block[j]); 3699 assert(block[j]);
3771 } 3700 }
3772 3701
3782 const uint8_t *scantable= s->intra_scantable.scantable; 3711 const uint8_t *scantable= s->intra_scantable.scantable;
3783 int bias; 3712 int bias;
3784 int max=0; 3713 int max=0;
3785 unsigned int threshold1, threshold2; 3714 unsigned int threshold1, threshold2;
3786 3715
3787 s->fdct (block); 3716 s->dsp.fdct (block);
3788 3717
3789 if (s->mb_intra) { 3718 if (s->mb_intra) {
3790 if (!s->h263_aic) { 3719 if (!s->h263_aic) {
3791 if (n < 4) 3720 if (n < 4)
3792 q = s->y_dc_scale; 3721 q = s->y_dc_scale;
3834 } 3763 }
3835 } 3764 }
3836 *overflow= s->max_qcoeff < max; //overflow might have happend 3765 *overflow= s->max_qcoeff < max; //overflow might have happend
3837 3766
3838 /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ 3767 /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
3839 if (s->idct_permutation_type != FF_NO_IDCT_PERM) 3768 if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
3840 ff_block_permute(block, s->idct_permutation, scantable, last_non_zero); 3769 ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
3841 3770
3842 return last_non_zero; 3771 return last_non_zero;
3843 } 3772 }
3844 3773
3845 #endif //CONFIG_ENCODERS 3774 #endif //CONFIG_ENCODERS