comparison ffv1.c @ 9548:c005889b5389 libavcodec

Support >8bit per component YUV in FFV1. decoding becomes slower, encoding becomes faster, with gcc on duron. some inlining overrides like av_flatten are added to keep inlining similar to before.
author michael
date Fri, 24 Apr 2009 04:10:32 +0000
parents df1827eb3aaa
children 4fd9ac9978e5
comparison
equal deleted inserted replaced
9547:df1827eb3aaa 9548:c005889b5389
54 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 54 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 55 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
56 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 56 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
57 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 57 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
58 }; 58 };
59
60 static const int8_t quant5_10bit[256]={
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
65 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
66 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
67 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
68 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
69 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
70 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
71 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
72 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
73 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,
74 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
75 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
76 -1,-1,-1,-1,-1,-1,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,
77 };
78
59 static const int8_t quant5[256]={ 79 static const int8_t quant5[256]={
60 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 80 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
61 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 81 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
62 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
63 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 83 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 128 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
109 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 129 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
110 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, 130 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
111 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, 131 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
112 }; 132 };
133 static const int8_t quant9_10bit[256]={
134 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
135 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
136 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
137 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
138 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
139 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
140 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
141 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
142 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
145 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
146 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,
147 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
148 -3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
149 -2,-2,-2,-2,-1,-1,-1,-1,-1,-1,-1,-1,-0,-0,-0,-0,
150 };
151
113 static const int8_t quant11[256]={ 152 static const int8_t quant11[256]={
114 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 153 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
115 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 154 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 155 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
117 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 156 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
222 261
223 if(v){ 262 if(v){
224 const int a= FFABS(v); 263 const int a= FFABS(v);
225 const int e= av_log2(a); 264 const int e= av_log2(a);
226 put_rac(c, state+0, 0); 265 put_rac(c, state+0, 0);
227 266 if(e<=9){
228 assert(e<=9);
229
230 for(i=0; i<e; i++){ 267 for(i=0; i<e; i++){
231 put_rac(c, state+1+i, 1); //1..10 268 put_rac(c, state+1+i, 1); //1..10
232 } 269 }
233 put_rac(c, state+1+i, 0); 270 put_rac(c, state+1+i, 0);
234 271
236 put_rac(c, state+22+i, (a>>i)&1); //22..31 273 put_rac(c, state+22+i, (a>>i)&1); //22..31
237 } 274 }
238 275
239 if(is_signed) 276 if(is_signed)
240 put_rac(c, state+11 + e, v < 0); //11..21 277 put_rac(c, state+11 + e, v < 0); //11..21
278 }else{
279 for(i=0; i<e; i++){
280 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
281 }
282 put_rac(c, state+1+9, 0);
283
284 for(i=e-1; i>=0; i--){
285 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
286 }
287
288 if(is_signed)
289 put_rac(c, state+11 + 10, v < 0); //11..21
290 }
241 }else{ 291 }else{
242 put_rac(c, state+0, 1); 292 put_rac(c, state+0, 1);
243 } 293 }
244 } 294 }
245 295
246 static void av_noinline put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ 296 static void av_noinline put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
247 put_symbol_inline(c, state, v, is_signed); 297 put_symbol_inline(c, state, v, is_signed);
248 } 298 }
249 299
250 static inline int get_symbol_inline(RangeCoder *c, uint8_t *state, int is_signed){ 300 static inline av_flatten int get_symbol_inline(RangeCoder *c, uint8_t *state, int is_signed){
251 if(get_rac(c, state+0)) 301 if(get_rac(c, state+0))
252 return 0; 302 return 0;
253 else{ 303 else{
254 int i, e, a; 304 int i, e, a;
255 e= 0; 305 e= 0;
256 while(get_rac(c, state+1 + e) && e<9){ //1..10 306 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
257 e++; 307 e++;
258 } 308 }
259 309
260 a= 1; 310 a= 1;
261 for(i=e-1; i>=0; i--){ 311 for(i=e-1; i>=0; i--){
262 a += a + get_rac(c, state+22 + i); //22..31 312 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
263 } 313 }
264 314
265 e= -(is_signed && get_rac(c, state+11 + e)); //11..21 315 e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
266 return (a^e)-e; 316 return (a^e)-e;
267 } 317 }
268 } 318 }
269 319
270 static int av_noinline get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ 320 static int av_noinline get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
449 sample[i]= sample_buffer[(h+i-y)%ring_size]+3; 499 sample[i]= sample_buffer[(h+i-y)%ring_size]+3;
450 500
451 sample[0][-1]= sample[1][0 ]; 501 sample[0][-1]= sample[1][0 ];
452 sample[1][ w]= sample[1][w-1]; 502 sample[1][ w]= sample[1][w-1];
453 //{START_TIMER 503 //{START_TIMER
504 if(s->avctx->bits_per_raw_sample<=8){
454 for(x=0; x<w; x++){ 505 for(x=0; x<w; x++){
455 sample[0][x]= src[x + stride*y]; 506 sample[0][x]= src[x + stride*y];
456 } 507 }
457 encode_line(s, w, sample, plane_index, 8); 508 encode_line(s, w, sample, plane_index, 8);
509 }else{
510 for(x=0; x<w; x++){
511 sample[0][x]= ((uint16_t*)(src + stride*y))[x] >> (16 - s->avctx->bits_per_raw_sample);
512 }
513 encode_line(s, w, sample, plane_index, s->avctx->bits_per_raw_sample);
514 }
458 //STOP_TIMER("encode line")} 515 //STOP_TIMER("encode line")}
459 } 516 }
460 } 517 }
461 518
462 static void encode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int stride){ 519 static void encode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int stride){
521 memset(state, 128, sizeof(state)); 578 memset(state, 128, sizeof(state));
522 579
523 put_symbol(c, state, f->version, 0); 580 put_symbol(c, state, f->version, 0);
524 put_symbol(c, state, f->avctx->coder_type, 0); 581 put_symbol(c, state, f->avctx->coder_type, 0);
525 put_symbol(c, state, f->colorspace, 0); //YUV cs type 582 put_symbol(c, state, f->colorspace, 0); //YUV cs type
583 if(f->version>0)
584 put_symbol(c, state, f->avctx->bits_per_raw_sample, 0);
526 put_rac(c, state, 1); //chroma planes 585 put_rac(c, state, 1); //chroma planes
527 put_symbol(c, state, f->chroma_h_shift, 0); 586 put_symbol(c, state, f->chroma_h_shift, 0);
528 put_symbol(c, state, f->chroma_v_shift, 0); 587 put_symbol(c, state, f->chroma_v_shift, 0);
529 put_rac(c, state, 0); //no transparency plane 588 put_rac(c, state, 0); //no transparency plane
530 589
560 s->version=0; 619 s->version=0;
561 s->ac= avctx->coder_type; 620 s->ac= avctx->coder_type;
562 621
563 s->plane_count=2; 622 s->plane_count=2;
564 for(i=0; i<256; i++){ 623 for(i=0; i<256; i++){
624 if(avctx->bits_per_raw_sample <=8){
565 s->quant_table[0][i]= quant11[i]; 625 s->quant_table[0][i]= quant11[i];
566 s->quant_table[1][i]= 11*quant11[i]; 626 s->quant_table[1][i]= 11*quant11[i];
567 if(avctx->context_model==0){ 627 if(avctx->context_model==0){
568 s->quant_table[2][i]= 11*11*quant11[i]; 628 s->quant_table[2][i]= 11*11*quant11[i];
569 s->quant_table[3][i]= 629 s->quant_table[3][i]=
571 }else{ 631 }else{
572 s->quant_table[2][i]= 11*11*quant5 [i]; 632 s->quant_table[2][i]= 11*11*quant5 [i];
573 s->quant_table[3][i]= 5*11*11*quant5 [i]; 633 s->quant_table[3][i]= 5*11*11*quant5 [i];
574 s->quant_table[4][i]= 5*5*11*11*quant5 [i]; 634 s->quant_table[4][i]= 5*5*11*11*quant5 [i];
575 } 635 }
636 }else{
637 s->quant_table[0][i]= quant9_10bit[i];
638 s->quant_table[1][i]= 11*quant9_10bit[i];
639 if(avctx->context_model==0){
640 s->quant_table[2][i]= 11*11*quant9_10bit[i];
641 s->quant_table[3][i]=
642 s->quant_table[4][i]=0;
643 }else{
644 s->quant_table[2][i]= 11*11*quant5_10bit[i];
645 s->quant_table[3][i]= 5*11*11*quant5_10bit[i];
646 s->quant_table[4][i]= 5*5*11*11*quant5_10bit[i];
647 }
648 }
576 } 649 }
577 650
578 for(i=0; i<s->plane_count; i++){ 651 for(i=0; i<s->plane_count; i++){
579 PlaneContext * const p= &s->plane[i]; 652 PlaneContext * const p= &s->plane[i];
580 653
591 } 664 }
592 } 665 }
593 666
594 avctx->coded_frame= &s->picture; 667 avctx->coded_frame= &s->picture;
595 switch(avctx->pix_fmt){ 668 switch(avctx->pix_fmt){
669 case PIX_FMT_YUV444P16:
670 case PIX_FMT_YUV422P16:
671 case PIX_FMT_YUV420P16:
672 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
673 av_log(avctx, AV_LOG_ERROR, "More than 8 bit per component is still experimental and no gurantee is yet made for future compatibility\n"
674 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
675 return -1;
676 }
677 if(avctx->bits_per_raw_sample <=8){
678 av_log(avctx, AV_LOG_ERROR, "bits_per_raw_sample inavlid\n");
679 return -1;
680 }
681 s->version= 1;
596 case PIX_FMT_YUV444P: 682 case PIX_FMT_YUV444P:
597 case PIX_FMT_YUV422P: 683 case PIX_FMT_YUV422P:
598 case PIX_FMT_YUV420P: 684 case PIX_FMT_YUV420P:
599 case PIX_FMT_YUV411P: 685 case PIX_FMT_YUV411P:
600 case PIX_FMT_YUV410P: 686 case PIX_FMT_YUV410P:
786 872
787 sample[1][-1]= sample[0][0 ]; 873 sample[1][-1]= sample[0][0 ];
788 sample[0][ w]= sample[0][w-1]; 874 sample[0][ w]= sample[0][w-1];
789 875
790 //{START_TIMER 876 //{START_TIMER
877 if(s->avctx->bits_per_raw_sample <= 8){
791 decode_line(s, w, sample, plane_index, 8); 878 decode_line(s, w, sample, plane_index, 8);
792 for(x=0; x<w; x++){ 879 for(x=0; x<w; x++){
793 src[x + stride*y]= sample[1][x]; 880 src[x + stride*y]= sample[1][x];
881 }
882 }else{
883 decode_line(s, w, sample, plane_index, s->avctx->bits_per_raw_sample);
884 for(x=0; x<w; x++){
885 ((uint16_t*)(src + stride*y))[x]= sample[1][x] << (16 - s->avctx->bits_per_raw_sample);
886 }
794 } 887 }
795 //STOP_TIMER("decode-line")} 888 //STOP_TIMER("decode-line")}
796 } 889 }
797 } 890 }
798 891
875 memset(state, 128, sizeof(state)); 968 memset(state, 128, sizeof(state));
876 969
877 f->version= get_symbol(c, state, 0); 970 f->version= get_symbol(c, state, 0);
878 f->ac= f->avctx->coder_type= get_symbol(c, state, 0); 971 f->ac= f->avctx->coder_type= get_symbol(c, state, 0);
879 f->colorspace= get_symbol(c, state, 0); //YUV cs type 972 f->colorspace= get_symbol(c, state, 0); //YUV cs type
973 if(f->version>0)
974 f->avctx->bits_per_raw_sample= get_symbol(c, state, 0);
880 get_rac(c, state); //no chroma = false 975 get_rac(c, state); //no chroma = false
881 f->chroma_h_shift= get_symbol(c, state, 0); 976 f->chroma_h_shift= get_symbol(c, state, 0);
882 f->chroma_v_shift= get_symbol(c, state, 0); 977 f->chroma_v_shift= get_symbol(c, state, 0);
883 get_rac(c, state); //transparency plane 978 get_rac(c, state); //transparency plane
884 f->plane_count= 2; 979 f->plane_count= 2;
885 980
886 if(f->colorspace==0){ 981 if(f->colorspace==0){
982 if(f->avctx->bits_per_raw_sample<=8){
887 switch(16*f->chroma_h_shift + f->chroma_v_shift){ 983 switch(16*f->chroma_h_shift + f->chroma_v_shift){
888 case 0x00: f->avctx->pix_fmt= PIX_FMT_YUV444P; break; 984 case 0x00: f->avctx->pix_fmt= PIX_FMT_YUV444P; break;
889 case 0x10: f->avctx->pix_fmt= PIX_FMT_YUV422P; break; 985 case 0x10: f->avctx->pix_fmt= PIX_FMT_YUV422P; break;
890 case 0x11: f->avctx->pix_fmt= PIX_FMT_YUV420P; break; 986 case 0x11: f->avctx->pix_fmt= PIX_FMT_YUV420P; break;
891 case 0x20: f->avctx->pix_fmt= PIX_FMT_YUV411P; break; 987 case 0x20: f->avctx->pix_fmt= PIX_FMT_YUV411P; break;
892 case 0x22: f->avctx->pix_fmt= PIX_FMT_YUV410P; break; 988 case 0x22: f->avctx->pix_fmt= PIX_FMT_YUV410P; break;
893 default: 989 default:
894 av_log(f->avctx, AV_LOG_ERROR, "format not supported\n"); 990 av_log(f->avctx, AV_LOG_ERROR, "format not supported\n");
895 return -1; 991 return -1;
992 }
993 }else{
994 switch(16*f->chroma_h_shift + f->chroma_v_shift){
995 case 0x00: f->avctx->pix_fmt= PIX_FMT_YUV444P16; break;
996 case 0x10: f->avctx->pix_fmt= PIX_FMT_YUV422P16; break;
997 case 0x11: f->avctx->pix_fmt= PIX_FMT_YUV420P16; break;
998 default:
999 av_log(f->avctx, AV_LOG_ERROR, "format not supported\n");
1000 return -1;
1001 }
896 } 1002 }
897 }else if(f->colorspace==1){ 1003 }else if(f->colorspace==1){
898 if(f->chroma_h_shift || f->chroma_v_shift){ 1004 if(f->chroma_h_shift || f->chroma_v_shift){
899 av_log(f->avctx, AV_LOG_ERROR, "chroma subsampling not supported in this colorspace\n"); 1005 av_log(f->avctx, AV_LOG_ERROR, "chroma subsampling not supported in this colorspace\n");
900 return -1; 1006 return -1;
1040 CODEC_ID_FFV1, 1146 CODEC_ID_FFV1,
1041 sizeof(FFV1Context), 1147 sizeof(FFV1Context),
1042 encode_init, 1148 encode_init,
1043 encode_frame, 1149 encode_frame,
1044 common_end, 1150 common_end,
1045 .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_YUV444P, PIX_FMT_YUV422P, PIX_FMT_YUV411P, PIX_FMT_YUV410P, PIX_FMT_RGB32, PIX_FMT_NONE}, 1151 .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_YUV444P, PIX_FMT_YUV422P, PIX_FMT_YUV411P, PIX_FMT_YUV410P, PIX_FMT_RGB32, PIX_FMT_YUV420P16, PIX_FMT_YUV422P16, PIX_FMT_YUV444P16, PIX_FMT_NONE},
1046 .long_name= NULL_IF_CONFIG_SMALL("FFmpeg codec #1"), 1152 .long_name= NULL_IF_CONFIG_SMALL("FFmpeg codec #1"),
1047 }; 1153 };
1048 #endif 1154 #endif