Mercurial > libavcodec.hg
comparison snow.c @ 5587:3ae03eacbe9f libavcodec
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
the old 32bit code)
disable mmx/sse2 optimizations as they need a rewrite now
author | michael |
---|---|
date | Sat, 25 Aug 2007 03:00:51 +0000 |
parents | a6f5ed050335 |
children | effa59ca89b3 |
comparison
equal
deleted
inserted
replaced
5586:f065fc609145 | 5587:3ae03eacbe9f |
---|---|
405 int stride; | 405 int stride; |
406 int width; | 406 int width; |
407 int height; | 407 int height; |
408 int qlog; ///< log(qscale)/log[2^(1/6)] | 408 int qlog; ///< log(qscale)/log[2^(1/6)] |
409 DWTELEM *buf; | 409 DWTELEM *buf; |
410 IDWTELEM *ibuf; | |
410 int buf_x_offset; | 411 int buf_x_offset; |
411 int buf_y_offset; | 412 int buf_y_offset; |
412 int stride_line; ///< Stride measured in lines, not pixels. | 413 int stride_line; ///< Stride measured in lines, not pixels. |
413 x_and_coeff * x_coeff; | 414 x_and_coeff * x_coeff; |
414 struct SubBand *parent; | 415 struct SubBand *parent; |
446 int max_ref_frames; | 447 int max_ref_frames; |
447 int ref_frames; | 448 int ref_frames; |
448 int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; | 449 int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; |
449 uint32_t *ref_scores[MAX_REF_FRAMES]; | 450 uint32_t *ref_scores[MAX_REF_FRAMES]; |
450 DWTELEM *spatial_dwt_buffer; | 451 DWTELEM *spatial_dwt_buffer; |
452 IDWTELEM *spatial_idwt_buffer; | |
451 int colorspace_type; | 453 int colorspace_type; |
452 int chroma_h_shift; | 454 int chroma_h_shift; |
453 int chroma_v_shift; | 455 int chroma_v_shift; |
454 int spatial_scalability; | 456 int spatial_scalability; |
455 int qlog; | 457 int qlog; |
475 | 477 |
476 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX) | 478 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
477 }SnowContext; | 479 }SnowContext; |
478 | 480 |
479 typedef struct { | 481 typedef struct { |
480 DWTELEM *b0; | 482 IDWTELEM *b0; |
481 DWTELEM *b1; | 483 IDWTELEM *b1; |
482 DWTELEM *b2; | 484 IDWTELEM *b2; |
483 DWTELEM *b3; | 485 IDWTELEM *b3; |
484 int y; | 486 int y; |
485 } dwt_compose_t; | 487 } dwt_compose_t; |
486 | 488 |
487 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) | 489 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) |
488 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) | 490 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) |
489 | 491 |
490 static void iterative_me(SnowContext *s); | 492 static void iterative_me(SnowContext *s); |
491 | 493 |
492 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer) | 494 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) |
493 { | 495 { |
494 int i; | 496 int i; |
495 | 497 |
496 buf->base_buffer = base_buffer; | 498 buf->base_buffer = base_buffer; |
497 buf->line_count = line_count; | 499 buf->line_count = line_count; |
498 buf->line_width = line_width; | 500 buf->line_width = line_width; |
499 buf->data_count = max_allocated_lines; | 501 buf->data_count = max_allocated_lines; |
500 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count); | 502 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count); |
501 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines); | 503 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines); |
502 | 504 |
503 for (i = 0; i < max_allocated_lines; i++) | 505 for (i = 0; i < max_allocated_lines; i++) |
504 { | 506 { |
505 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width); | 507 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width); |
506 } | 508 } |
507 | 509 |
508 buf->data_stack_top = max_allocated_lines - 1; | 510 buf->data_stack_top = max_allocated_lines - 1; |
509 } | 511 } |
510 | 512 |
511 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) | 513 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) |
512 { | 514 { |
513 int offset; | 515 int offset; |
514 DWTELEM * buffer; | 516 IDWTELEM * buffer; |
515 | 517 |
516 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line); | 518 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line); |
517 | 519 |
518 assert(buf->data_stack_top >= 0); | 520 assert(buf->data_stack_top >= 0); |
519 // assert(!buf->line[line]); | 521 // assert(!buf->line[line]); |
531 } | 533 } |
532 | 534 |
533 static void slice_buffer_release(slice_buffer * buf, int line) | 535 static void slice_buffer_release(slice_buffer * buf, int line) |
534 { | 536 { |
535 int offset; | 537 int offset; |
536 DWTELEM * buffer; | 538 IDWTELEM * buffer; |
537 | 539 |
538 assert(line >= 0 && line < buf->line_count); | 540 assert(line >= 0 && line < buf->line_count); |
539 assert(buf->line[line]); | 541 assert(buf->line[line]); |
540 | 542 |
541 offset = buf->line_width * line; | 543 offset = buf->line_width * line; |
734 if(mirror_right){ | 736 if(mirror_right){ |
735 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse); | 737 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse); |
736 } | 738 } |
737 } | 739 } |
738 | 740 |
741 static av_always_inline void inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ | |
742 const int mirror_left= !highpass; | |
743 const int mirror_right= (width&1) ^ highpass; | |
744 const int w= (width>>1) - 1 + (highpass & width); | |
745 int i; | |
746 | |
747 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) | |
748 if(mirror_left){ | |
749 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); | |
750 dst += dst_step; | |
751 src += src_step; | |
752 } | |
753 | |
754 for(i=0; i<w; i++){ | |
755 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse); | |
756 } | |
757 | |
758 if(mirror_right){ | |
759 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse); | |
760 } | |
761 } | |
762 | |
763 | |
739 #ifndef lift5 | 764 #ifndef lift5 |
740 static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ | 765 static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ |
741 const int mirror_left= !highpass; | 766 const int mirror_left= !highpass; |
742 const int mirror_right= (width&1) ^ highpass; | 767 const int mirror_right= (width&1) ^ highpass; |
743 const int w= (width>>1) - 1 + (highpass & width); | 768 const int w= (width>>1) - 1 + (highpass & width); |
768 } | 793 } |
769 #endif | 794 #endif |
770 | 795 |
771 #ifndef liftS | 796 #ifndef liftS |
772 static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ | 797 static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ |
798 const int mirror_left= !highpass; | |
799 const int mirror_right= (width&1) ^ highpass; | |
800 const int w= (width>>1) - 1 + (highpass & width); | |
801 int i; | |
802 | |
803 assert(shift == 4); | |
804 #define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
805 if(mirror_left){ | |
806 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); | |
807 dst += dst_step; | |
808 src += src_step; | |
809 } | |
810 | |
811 for(i=0; i<w; i++){ | |
812 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse); | |
813 } | |
814 | |
815 if(mirror_right){ | |
816 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
817 } | |
818 } | |
819 static av_always_inline void inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ | |
773 const int mirror_left= !highpass; | 820 const int mirror_left= !highpass; |
774 const int mirror_right= (width&1) ^ highpass; | 821 const int mirror_right= (width&1) ^ highpass; |
775 const int w= (width>>1) - 1 + (highpass & width); | 822 const int w= (width>>1) - 1 + (highpass & width); |
776 int i; | 823 int i; |
777 | 824 |
961 if(width&1) | 1008 if(width&1) |
962 temp[x ]= b[2*x ]; | 1009 temp[x ]= b[2*x ]; |
963 memcpy(b, temp, width*sizeof(int)); | 1010 memcpy(b, temp, width*sizeof(int)); |
964 } | 1011 } |
965 | 1012 |
966 static void horizontal_composeX(DWTELEM *b, int width){ | 1013 static void horizontal_composeX(IDWTELEM *b, int width){ |
967 DWTELEM temp[width]; | 1014 IDWTELEM temp[width]; |
968 const int width2= width>>1; | 1015 const int width2= width>>1; |
969 int x; | 1016 int x; |
970 const int w2= (width+1)>>1; | 1017 const int w2= (width+1)>>1; |
971 | 1018 |
972 memcpy(temp, b, width*sizeof(int)); | 1019 memcpy(temp, b, width*sizeof(IDWTELEM)); |
973 for(x=0; x<width2; x++){ | 1020 for(x=0; x<width2; x++){ |
974 b[2*x ]= temp[x ]; | 1021 b[2*x ]= temp[x ]; |
975 b[2*x + 1]= temp[x+w2]; | 1022 b[2*x + 1]= temp[x+w2]; |
976 } | 1023 } |
977 if(width&1) | 1024 if(width&1) |
1000 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0); | 1047 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0); |
1001 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0); | 1048 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0); |
1002 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0); | 1049 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0); |
1003 } | 1050 } |
1004 | 1051 |
1005 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){ | 1052 static void spatial_composeX(IDWTELEM *buffer, int width, int height, int stride){ |
1006 int x, y; | 1053 int x, y; |
1007 | 1054 |
1008 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1); | 1055 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1); |
1009 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1); | 1056 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1); |
1010 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1); | 1057 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1); |
1209 case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break; | 1256 case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break; |
1210 } | 1257 } |
1211 } | 1258 } |
1212 } | 1259 } |
1213 | 1260 |
1214 static void horizontal_compose53i(DWTELEM *b, int width){ | 1261 static void horizontal_compose53i(IDWTELEM *b, int width){ |
1215 DWTELEM temp[width]; | 1262 IDWTELEM temp[width]; |
1216 const int width2= width>>1; | 1263 const int width2= width>>1; |
1217 const int w2= (width+1)>>1; | 1264 const int w2= (width+1)>>1; |
1218 int x; | 1265 int x; |
1219 | 1266 |
1220 #if 0 | 1267 #if 0 |
1245 A3 -= A2; | 1292 A3 -= A2; |
1246 A2 += (A1 + A3 + 2)>>2; | 1293 A2 += (A1 + A3 + 2)>>2; |
1247 b[width -1] = A3; | 1294 b[width -1] = A3; |
1248 b[width2-1] = A2; | 1295 b[width2-1] = A2; |
1249 #else | 1296 #else |
1250 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); | 1297 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); |
1251 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); | 1298 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); |
1252 #endif | 1299 #endif |
1253 for(x=0; x<width2; x++){ | 1300 for(x=0; x<width2; x++){ |
1254 b[2*x ]= temp[x ]; | 1301 b[2*x ]= temp[x ]; |
1255 b[2*x + 1]= temp[x+w2]; | 1302 b[2*x + 1]= temp[x+w2]; |
1256 } | 1303 } |
1257 if(width&1) | 1304 if(width&1) |
1258 b[2*x ]= temp[x ]; | 1305 b[2*x ]= temp[x ]; |
1259 } | 1306 } |
1260 | 1307 |
1261 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ | 1308 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
1262 int i; | 1309 int i; |
1263 | 1310 |
1264 for(i=0; i<width; i++){ | 1311 for(i=0; i<width; i++){ |
1265 b1[i] += (b0[i] + b2[i])>>1; | 1312 b1[i] += (b0[i] + b2[i])>>1; |
1266 } | 1313 } |
1267 } | 1314 } |
1268 | 1315 |
1269 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ | 1316 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
1270 int i; | 1317 int i; |
1271 | 1318 |
1272 for(i=0; i<width; i++){ | 1319 for(i=0; i<width; i++){ |
1273 b1[i] -= (b0[i] + b2[i] + 2)>>2; | 1320 b1[i] -= (b0[i] + b2[i] + 2)>>2; |
1274 } | 1321 } |
1278 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); | 1325 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); |
1279 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); | 1326 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); |
1280 cs->y = -1; | 1327 cs->y = -1; |
1281 } | 1328 } |
1282 | 1329 |
1283 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ | 1330 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){ |
1284 cs->b0 = buffer + mirror(-1-1, height-1)*stride; | 1331 cs->b0 = buffer + mirror(-1-1, height-1)*stride; |
1285 cs->b1 = buffer + mirror(-1 , height-1)*stride; | 1332 cs->b1 = buffer + mirror(-1 , height-1)*stride; |
1286 cs->y = -1; | 1333 cs->y = -1; |
1287 } | 1334 } |
1288 | 1335 |
1289 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ | 1336 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
1290 int y= cs->y; | 1337 int y= cs->y; |
1291 | 1338 |
1292 DWTELEM *b0= cs->b0; | 1339 IDWTELEM *b0= cs->b0; |
1293 DWTELEM *b1= cs->b1; | 1340 IDWTELEM *b1= cs->b1; |
1294 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); | 1341 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); |
1295 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); | 1342 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); |
1296 | 1343 |
1297 {START_TIMER | 1344 {START_TIMER |
1298 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); | 1345 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1299 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | 1346 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); |
1300 STOP_TIMER("vertical_compose53i*")} | 1347 STOP_TIMER("vertical_compose53i*")} |
1307 cs->b0 = b2; | 1354 cs->b0 = b2; |
1308 cs->b1 = b3; | 1355 cs->b1 = b3; |
1309 cs->y += 2; | 1356 cs->y += 2; |
1310 } | 1357 } |
1311 | 1358 |
1312 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ | 1359 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){ |
1313 int y= cs->y; | 1360 int y= cs->y; |
1314 DWTELEM *b0= cs->b0; | 1361 IDWTELEM *b0= cs->b0; |
1315 DWTELEM *b1= cs->b1; | 1362 IDWTELEM *b1= cs->b1; |
1316 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | 1363 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; |
1317 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | 1364 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; |
1318 | 1365 |
1319 {START_TIMER | 1366 {START_TIMER |
1320 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); | 1367 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1321 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | 1368 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); |
1322 STOP_TIMER("vertical_compose53i*")} | 1369 STOP_TIMER("vertical_compose53i*")} |
1329 cs->b0 = b2; | 1376 cs->b0 = b2; |
1330 cs->b1 = b3; | 1377 cs->b1 = b3; |
1331 cs->y += 2; | 1378 cs->y += 2; |
1332 } | 1379 } |
1333 | 1380 |
1334 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){ | 1381 static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){ |
1335 dwt_compose_t cs; | 1382 dwt_compose_t cs; |
1336 spatial_compose53i_init(&cs, buffer, height, stride); | 1383 spatial_compose53i_init(&cs, buffer, height, stride); |
1337 while(cs.y <= height) | 1384 while(cs.y <= height) |
1338 spatial_compose53i_dy(&cs, buffer, width, height, stride); | 1385 spatial_compose53i_dy(&cs, buffer, width, height, stride); |
1339 } | 1386 } |
1340 | 1387 |
1341 | 1388 |
1342 void ff_snow_horizontal_compose97i(DWTELEM *b, int width){ | 1389 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ |
1343 DWTELEM temp[width]; | 1390 IDWTELEM temp[width]; |
1344 const int w2= (width+1)>>1; | 1391 const int w2= (width+1)>>1; |
1345 | 1392 |
1346 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); | 1393 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); |
1347 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); | 1394 inv_lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); |
1348 liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1); | 1395 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1); |
1349 lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0); | 1396 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0); |
1350 } | 1397 } |
1351 | 1398 |
1352 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ | 1399 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
1353 int i; | 1400 int i; |
1354 | 1401 |
1355 for(i=0; i<width; i++){ | 1402 for(i=0; i<width; i++){ |
1356 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | 1403 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; |
1357 } | 1404 } |
1358 } | 1405 } |
1359 | 1406 |
1360 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ | 1407 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
1361 int i; | 1408 int i; |
1362 | 1409 |
1363 for(i=0; i<width; i++){ | 1410 for(i=0; i<width; i++){ |
1364 #ifdef lift5 | 1411 #ifdef lift5 |
1365 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | 1412 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; |
1370 b1[i] -= (r+W_CO)>>W_CS; | 1417 b1[i] -= (r+W_CO)>>W_CS; |
1371 #endif | 1418 #endif |
1372 } | 1419 } |
1373 } | 1420 } |
1374 | 1421 |
1375 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ | 1422 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
1376 int i; | 1423 int i; |
1377 | 1424 |
1378 for(i=0; i<width; i++){ | 1425 for(i=0; i<width; i++){ |
1379 #ifdef liftS | 1426 #ifdef liftS |
1380 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; | 1427 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
1382 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; | 1429 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; |
1383 #endif | 1430 #endif |
1384 } | 1431 } |
1385 } | 1432 } |
1386 | 1433 |
1387 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ | 1434 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
1388 int i; | 1435 int i; |
1389 | 1436 |
1390 for(i=0; i<width; i++){ | 1437 for(i=0; i<width; i++){ |
1391 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | 1438 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; |
1392 } | 1439 } |
1393 } | 1440 } |
1394 | 1441 |
1395 void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){ | 1442 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ |
1396 int i; | 1443 int i; |
1397 | 1444 |
1398 for(i=0; i<width; i++){ | 1445 for(i=0; i<width; i++){ |
1399 #ifndef lift5 | 1446 #ifndef lift5 |
1400 int r; | 1447 int r; |
1423 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); | 1470 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); |
1424 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); | 1471 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); |
1425 cs->y = -3; | 1472 cs->y = -3; |
1426 } | 1473 } |
1427 | 1474 |
1428 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ | 1475 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){ |
1429 cs->b0 = buffer + mirror(-3-1, height-1)*stride; | 1476 cs->b0 = buffer + mirror(-3-1, height-1)*stride; |
1430 cs->b1 = buffer + mirror(-3 , height-1)*stride; | 1477 cs->b1 = buffer + mirror(-3 , height-1)*stride; |
1431 cs->b2 = buffer + mirror(-3+1, height-1)*stride; | 1478 cs->b2 = buffer + mirror(-3+1, height-1)*stride; |
1432 cs->b3 = buffer + mirror(-3+2, height-1)*stride; | 1479 cs->b3 = buffer + mirror(-3+2, height-1)*stride; |
1433 cs->y = -3; | 1480 cs->y = -3; |
1434 } | 1481 } |
1435 | 1482 |
1436 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ | 1483 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
1437 int y = cs->y; | 1484 int y = cs->y; |
1438 | 1485 |
1439 DWTELEM *b0= cs->b0; | 1486 IDWTELEM *b0= cs->b0; |
1440 DWTELEM *b1= cs->b1; | 1487 IDWTELEM *b1= cs->b1; |
1441 DWTELEM *b2= cs->b2; | 1488 IDWTELEM *b2= cs->b2; |
1442 DWTELEM *b3= cs->b3; | 1489 IDWTELEM *b3= cs->b3; |
1443 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); | 1490 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); |
1444 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); | 1491 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); |
1445 | 1492 |
1446 {START_TIMER | 1493 {START_TIMER |
1447 if(y>0 && y+4<height){ | 1494 if(y>0 && y+4<height){ |
1448 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); | 1495 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); |
1449 }else{ | 1496 }else{ |
1466 cs->b2=b4; | 1513 cs->b2=b4; |
1467 cs->b3=b5; | 1514 cs->b3=b5; |
1468 cs->y += 2; | 1515 cs->y += 2; |
1469 } | 1516 } |
1470 | 1517 |
1471 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ | 1518 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){ |
1472 int y = cs->y; | 1519 int y = cs->y; |
1473 DWTELEM *b0= cs->b0; | 1520 IDWTELEM *b0= cs->b0; |
1474 DWTELEM *b1= cs->b1; | 1521 IDWTELEM *b1= cs->b1; |
1475 DWTELEM *b2= cs->b2; | 1522 IDWTELEM *b2= cs->b2; |
1476 DWTELEM *b3= cs->b3; | 1523 IDWTELEM *b3= cs->b3; |
1477 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | 1524 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; |
1478 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | 1525 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; |
1479 | 1526 |
1480 {START_TIMER | 1527 {START_TIMER |
1481 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); | 1528 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1482 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | 1529 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); |
1483 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | 1530 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); |
1496 cs->b2=b4; | 1543 cs->b2=b4; |
1497 cs->b3=b5; | 1544 cs->b3=b5; |
1498 cs->y += 2; | 1545 cs->y += 2; |
1499 } | 1546 } |
1500 | 1547 |
1501 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){ | 1548 static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){ |
1502 dwt_compose_t cs; | 1549 dwt_compose_t cs; |
1503 spatial_compose97i_init(&cs, buffer, height, stride); | 1550 spatial_compose97i_init(&cs, buffer, height, stride); |
1504 while(cs.y <= height) | 1551 while(cs.y <= height) |
1505 spatial_compose97i_dy(&cs, buffer, width, height, stride); | 1552 spatial_compose97i_dy(&cs, buffer, width, height, stride); |
1506 } | 1553 } |
1516 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break; | 1563 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break; |
1517 } | 1564 } |
1518 } | 1565 } |
1519 } | 1566 } |
1520 | 1567 |
1521 static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ | 1568 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
1522 int level; | 1569 int level; |
1523 for(level=decomposition_count-1; level>=0; level--){ | 1570 for(level=decomposition_count-1; level>=0; level--){ |
1524 switch(type){ | 1571 switch(type){ |
1525 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; | 1572 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; |
1526 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; | 1573 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; |
1528 case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break; | 1575 case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break; |
1529 } | 1576 } |
1530 } | 1577 } |
1531 } | 1578 } |
1532 | 1579 |
1533 static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ | 1580 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ |
1534 const int support = type==1 ? 3 : 5; | 1581 const int support = type==1 ? 3 : 5; |
1535 int level; | 1582 int level; |
1536 if(type==2) return; | 1583 if(type==2) return; |
1537 | 1584 |
1538 for(level=decomposition_count-1; level>=0; level--){ | 1585 for(level=decomposition_count-1; level>=0; level--){ |
1564 } | 1611 } |
1565 } | 1612 } |
1566 } | 1613 } |
1567 } | 1614 } |
1568 | 1615 |
1569 static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ | 1616 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
1570 if(type==2){ | 1617 if(type==2){ |
1571 int level; | 1618 int level; |
1572 for(level=decomposition_count-1; level>=0; level--) | 1619 for(level=decomposition_count-1; level>=0; level--) |
1573 spatial_composeX (buffer, width>>level, height>>level, stride<<level); | 1620 spatial_composeX (buffer, width>>level, height>>level, stride<<level); |
1574 }else{ | 1621 }else{ |
1578 for(y=0; y<height; y+=4) | 1625 for(y=0; y<height; y+=4) |
1579 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); | 1626 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); |
1580 } | 1627 } |
1581 } | 1628 } |
1582 | 1629 |
1583 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ | 1630 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
1584 const int w= b->width; | 1631 const int w= b->width; |
1585 const int h= b->height; | 1632 const int h= b->height; |
1586 int x, y; | 1633 int x, y; |
1587 | 1634 |
1588 if(1){ | 1635 if(1){ |
1698 } | 1745 } |
1699 } | 1746 } |
1700 return 0; | 1747 return 0; |
1701 } | 1748 } |
1702 | 1749 |
1703 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ | 1750 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
1704 // encode_subband_qtree(s, b, src, parent, stride, orientation); | 1751 // encode_subband_qtree(s, b, src, parent, stride, orientation); |
1705 // encode_subband_z0run(s, b, src, parent, stride, orientation); | 1752 // encode_subband_z0run(s, b, src, parent, stride, orientation); |
1706 return encode_subband_c0run(s, b, src, parent, stride, orientation); | 1753 return encode_subband_c0run(s, b, src, parent, stride, orientation); |
1707 // encode_subband_dzr(s, b, src, parent, stride, orientation); | 1754 // encode_subband_dzr(s, b, src, parent, stride, orientation); |
1708 } | 1755 } |
1815 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; | 1862 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
1816 int new_index = 0; | 1863 int new_index = 0; |
1817 | 1864 |
1818 START_TIMER | 1865 START_TIMER |
1819 | 1866 |
1820 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){ | 1867 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ |
1821 qadd= 0; | 1868 qadd= 0; |
1822 qmul= 1<<QEXPSHIFT; | 1869 qmul= 1<<QEXPSHIFT; |
1823 } | 1870 } |
1824 | 1871 |
1825 /* If we are on the second or later slice, restore our index. */ | 1872 /* If we are on the second or later slice, restore our index. */ |
1828 | 1875 |
1829 | 1876 |
1830 for(y=start_y; y<h; y++){ | 1877 for(y=start_y; y<h; y++){ |
1831 int x = 0; | 1878 int x = 0; |
1832 int v; | 1879 int v; |
1833 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; | 1880 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; |
1834 memset(line, 0, b->width*sizeof(DWTELEM)); | 1881 memset(line, 0, b->width*sizeof(IDWTELEM)); |
1835 v = b->x_coeff[new_index].coeff; | 1882 v = b->x_coeff[new_index].coeff; |
1836 x = b->x_coeff[new_index++].x; | 1883 x = b->x_coeff[new_index++].x; |
1837 while(x < w) | 1884 while(x < w) |
1838 { | 1885 { |
1839 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; | 1886 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; |
2517 } | 2564 } |
2518 | 2565 |
2519 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | 2566 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, |
2520 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | 2567 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ |
2521 int y, x; | 2568 int y, x; |
2522 DWTELEM * dst; | 2569 IDWTELEM * dst; |
2523 for(y=0; y<b_h; y++){ | 2570 for(y=0; y<b_h; y++){ |
2524 //FIXME ugly misuse of obmc_stride | 2571 //FIXME ugly misuse of obmc_stride |
2525 const uint8_t *obmc1= obmc + y*obmc_stride; | 2572 const uint8_t *obmc1= obmc + y*obmc_stride; |
2526 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); | 2573 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
2527 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | 2574 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
2533 +obmc3[x] * block[1][x + y*src_stride] | 2580 +obmc3[x] * block[1][x + y*src_stride] |
2534 +obmc4[x] * block[0][x + y*src_stride]; | 2581 +obmc4[x] * block[0][x + y*src_stride]; |
2535 | 2582 |
2536 v <<= 8 - LOG2_OBMC_MAX; | 2583 v <<= 8 - LOG2_OBMC_MAX; |
2537 if(FRAC_BITS != 8){ | 2584 if(FRAC_BITS != 8){ |
2538 v += 1<<(7 - FRAC_BITS); | |
2539 v >>= 8 - FRAC_BITS; | 2585 v >>= 8 - FRAC_BITS; |
2540 } | 2586 } |
2541 if(add){ | 2587 if(add){ |
2542 v += dst[x + src_x]; | 2588 v += dst[x + src_x]; |
2543 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | 2589 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
2549 } | 2595 } |
2550 } | 2596 } |
2551 } | 2597 } |
2552 | 2598 |
2553 //FIXME name clenup (b_w, block_w, b_width stuff) | 2599 //FIXME name clenup (b_w, block_w, b_width stuff) |
2554 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ | 2600 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ |
2555 const int b_width = s->b_width << s->block_max_depth; | 2601 const int b_width = s->b_width << s->block_max_depth; |
2556 const int b_height= s->b_height << s->block_max_depth; | 2602 const int b_height= s->b_height << s->block_max_depth; |
2557 const int b_stride= b_width; | 2603 const int b_stride= b_width; |
2558 BlockNode *lt= &s->block[b_x + b_y*b_stride]; | 2604 BlockNode *lt= &s->block[b_x + b_y*b_stride]; |
2559 BlockNode *rt= lt+1; | 2605 BlockNode *rt= lt+1; |
2692 +obmc3[x] * block[1][x + y*src_stride] | 2738 +obmc3[x] * block[1][x + y*src_stride] |
2693 +obmc4[x] * block[0][x + y*src_stride]; | 2739 +obmc4[x] * block[0][x + y*src_stride]; |
2694 | 2740 |
2695 v <<= 8 - LOG2_OBMC_MAX; | 2741 v <<= 8 - LOG2_OBMC_MAX; |
2696 if(FRAC_BITS != 8){ | 2742 if(FRAC_BITS != 8){ |
2697 v += 1<<(7 - FRAC_BITS); | |
2698 v >>= 8 - FRAC_BITS; | 2743 v >>= 8 - FRAC_BITS; |
2699 } | 2744 } |
2700 if(add){ | 2745 if(add){ |
2701 v += dst[x + y*dst_stride]; | 2746 v += dst[x + y*dst_stride]; |
2702 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | 2747 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
2708 } | 2753 } |
2709 } | 2754 } |
2710 #endif | 2755 #endif |
2711 } | 2756 } |
2712 | 2757 |
2713 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){ | 2758 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){ |
2714 Plane *p= &s->plane[plane_index]; | 2759 Plane *p= &s->plane[plane_index]; |
2715 const int mb_w= s->b_width << s->block_max_depth; | 2760 const int mb_w= s->b_width << s->block_max_depth; |
2716 const int mb_h= s->b_height << s->block_max_depth; | 2761 const int mb_h= s->b_height << s->block_max_depth; |
2717 int x, y, mb_x; | 2762 int x, y, mb_x; |
2718 int block_size = MB_SIZE >> s->block_max_depth; | 2763 int block_size = MB_SIZE >> s->block_max_depth; |
2731 | 2776 |
2732 if(add){ | 2777 if(add){ |
2733 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) | 2778 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) |
2734 { | 2779 { |
2735 // DWTELEM * line = slice_buffer_get_line(sb, y); | 2780 // DWTELEM * line = slice_buffer_get_line(sb, y); |
2736 DWTELEM * line = sb->line[y]; | 2781 IDWTELEM * line = sb->line[y]; |
2737 for(x=0; x<w; x++) | 2782 for(x=0; x<w; x++) |
2738 { | 2783 { |
2739 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | 2784 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
2740 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | 2785 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
2741 v >>= FRAC_BITS; | 2786 v >>= FRAC_BITS; |
2745 } | 2790 } |
2746 }else{ | 2791 }else{ |
2747 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) | 2792 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) |
2748 { | 2793 { |
2749 // DWTELEM * line = slice_buffer_get_line(sb, y); | 2794 // DWTELEM * line = slice_buffer_get_line(sb, y); |
2750 DWTELEM * line = sb->line[y]; | 2795 IDWTELEM * line = sb->line[y]; |
2751 for(x=0; x<w; x++) | 2796 for(x=0; x<w; x++) |
2752 { | 2797 { |
2753 line[x] -= 128 << FRAC_BITS; | 2798 line[x] -= 128 << FRAC_BITS; |
2754 // buf[x + y*w]-= 128<<FRAC_BITS; | 2799 // buf[x + y*w]-= 128<<FRAC_BITS; |
2755 } | 2800 } |
2775 } | 2820 } |
2776 | 2821 |
2777 STOP_TIMER("predict_slice") | 2822 STOP_TIMER("predict_slice") |
2778 } | 2823 } |
2779 | 2824 |
2780 static av_always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){ | 2825 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ |
2781 Plane *p= &s->plane[plane_index]; | 2826 Plane *p= &s->plane[plane_index]; |
2782 const int mb_w= s->b_width << s->block_max_depth; | 2827 const int mb_w= s->b_width << s->block_max_depth; |
2783 const int mb_h= s->b_height << s->block_max_depth; | 2828 const int mb_h= s->b_height << s->block_max_depth; |
2784 int x, y, mb_x; | 2829 int x, y, mb_x; |
2785 int block_size = MB_SIZE >> s->block_max_depth; | 2830 int block_size = MB_SIZE >> s->block_max_depth; |
2832 } | 2877 } |
2833 | 2878 |
2834 STOP_TIMER("predict_slice") | 2879 STOP_TIMER("predict_slice") |
2835 } | 2880 } |
2836 | 2881 |
2837 static av_always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ | 2882 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ |
2838 const int mb_h= s->b_height << s->block_max_depth; | 2883 const int mb_h= s->b_height << s->block_max_depth; |
2839 int mb_y; | 2884 int mb_y; |
2840 for(mb_y=0; mb_y<=mb_h; mb_y++) | 2885 for(mb_y=0; mb_y<=mb_h; mb_y++) |
2841 predict_slice(s, buf, plane_index, add, mb_y); | 2886 predict_slice(s, buf, plane_index, add, mb_y); |
2842 } | 2887 } |
2848 const int block_w = plane_index ? block_size/2 : block_size; | 2893 const int block_w = plane_index ? block_size/2 : block_size; |
2849 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | 2894 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
2850 const int obmc_stride= plane_index ? block_size : 2*block_size; | 2895 const int obmc_stride= plane_index ? block_size : 2*block_size; |
2851 const int ref_stride= s->current_picture.linesize[plane_index]; | 2896 const int ref_stride= s->current_picture.linesize[plane_index]; |
2852 uint8_t *src= s-> input_picture.data[plane_index]; | 2897 uint8_t *src= s-> input_picture.data[plane_index]; |
2853 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; | 2898 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned |
2854 const int b_stride = s->b_width << s->block_max_depth; | 2899 const int b_stride = s->b_width << s->block_max_depth; |
2855 const int w= p->width; | 2900 const int w= p->width; |
2856 const int h= p->height; | 2901 const int h= p->height; |
2857 int index= mb_x + mb_y*b_stride; | 2902 int index= mb_x + mb_y*b_stride; |
2858 BlockNode *b= &s->block[index]; | 2903 BlockNode *b= &s->block[index]; |
2860 int ab=0; | 2905 int ab=0; |
2861 int aa=0; | 2906 int aa=0; |
2862 | 2907 |
2863 b->type|= BLOCK_INTRA; | 2908 b->type|= BLOCK_INTRA; |
2864 b->color[plane_index]= 0; | 2909 b->color[plane_index]= 0; |
2865 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM)); | 2910 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM)); |
2866 | 2911 |
2867 for(i=0; i<4; i++){ | 2912 for(i=0; i<4; i++){ |
2868 int mb_x2= mb_x + (i &1) - 1; | 2913 int mb_x2= mb_x + (i &1) - 1; |
2869 int mb_y2= mb_y + (i>>1) - 1; | 2914 int mb_y2= mb_y + (i>>1) - 1; |
2870 int x= block_w*mb_x2 + block_w/2; | 2915 int x= block_w*mb_x2 + block_w/2; |
2940 const int block_w = plane_index ? block_size/2 : block_size; | 2985 const int block_w = plane_index ? block_size/2 : block_size; |
2941 const int obmc_stride= plane_index ? block_size : 2*block_size; | 2986 const int obmc_stride= plane_index ? block_size : 2*block_size; |
2942 const int ref_stride= s->current_picture.linesize[plane_index]; | 2987 const int ref_stride= s->current_picture.linesize[plane_index]; |
2943 uint8_t *dst= s->current_picture.data[plane_index]; | 2988 uint8_t *dst= s->current_picture.data[plane_index]; |
2944 uint8_t *src= s-> input_picture.data[plane_index]; | 2989 uint8_t *src= s-> input_picture.data[plane_index]; |
2945 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; | 2990 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; |
2946 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment | 2991 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment |
2947 uint8_t tmp[ref_stride*(2*MB_SIZE+5)]; | 2992 uint8_t tmp[ref_stride*(2*MB_SIZE+5)]; |
2948 const int b_stride = s->b_width << s->block_max_depth; | 2993 const int b_stride = s->b_width << s->block_max_depth; |
2949 const int b_height = s->b_height<< s->block_max_depth; | 2994 const int b_height = s->b_height<< s->block_max_depth; |
2950 const int w= p->width; | 2995 const int w= p->width; |
2962 | 3007 |
2963 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); | 3008 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); |
2964 | 3009 |
2965 for(y=y0; y<y1; y++){ | 3010 for(y=y0; y<y1; y++){ |
2966 const uint8_t *obmc1= obmc_edged + y*obmc_stride; | 3011 const uint8_t *obmc1= obmc_edged + y*obmc_stride; |
2967 const DWTELEM *pred1 = pred + y*obmc_stride; | 3012 const IDWTELEM *pred1 = pred + y*obmc_stride; |
2968 uint8_t *cur1 = cur + y*ref_stride; | 3013 uint8_t *cur1 = cur + y*ref_stride; |
2969 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; | 3014 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; |
2970 for(x=x0; x<x1; x++){ | 3015 for(x=x0; x<x1; x++){ |
3016 #if FRAC_BITS >= LOG2_OBMC_MAX | |
2971 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); | 3017 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); |
3018 #else | |
3019 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS); | |
3020 #endif | |
2972 v = (v + pred1[x]) >> FRAC_BITS; | 3021 v = (v + pred1[x]) >> FRAC_BITS; |
2973 if(v&(~255)) v= ~(v>>31); | 3022 if(v&(~255)) v= ~(v>>31); |
2974 dst1[x] = v; | 3023 dst1[x] = v; |
2975 } | 3024 } |
2976 } | 3025 } |
3036 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | 3085 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
3037 const int obmc_stride= plane_index ? block_size : 2*block_size; | 3086 const int obmc_stride= plane_index ? block_size : 2*block_size; |
3038 const int ref_stride= s->current_picture.linesize[plane_index]; | 3087 const int ref_stride= s->current_picture.linesize[plane_index]; |
3039 uint8_t *dst= s->current_picture.data[plane_index]; | 3088 uint8_t *dst= s->current_picture.data[plane_index]; |
3040 uint8_t *src= s-> input_picture.data[plane_index]; | 3089 uint8_t *src= s-> input_picture.data[plane_index]; |
3041 static const DWTELEM zero_dst[4096]; //FIXME | 3090 static const IDWTELEM zero_dst[4096]; //FIXME |
3042 const int b_stride = s->b_width << s->block_max_depth; | 3091 const int b_stride = s->b_width << s->block_max_depth; |
3043 const int w= p->width; | 3092 const int w= p->width; |
3044 const int h= p->height; | 3093 const int h= p->height; |
3045 int distortion= 0; | 3094 int distortion= 0; |
3046 int rate= 0; | 3095 int rate= 0; |
3396 } | 3445 } |
3397 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); | 3446 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); |
3398 } | 3447 } |
3399 } | 3448 } |
3400 | 3449 |
3401 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){ | 3450 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ |
3402 const int level= b->level; | 3451 const int level= b->level; |
3403 const int w= b->width; | 3452 const int w= b->width; |
3404 const int h= b->height; | 3453 const int h= b->height; |
3405 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); | 3454 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
3406 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); | 3455 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); |
3407 int x,y, thres1, thres2; | 3456 int x,y, thres1, thres2; |
3408 // START_TIMER | 3457 // START_TIMER |
3409 | 3458 |
3410 if(s->qlog == LOSSLESS_QLOG) return; | 3459 if(s->qlog == LOSSLESS_QLOG){ |
3460 for(y=0; y<h; y++) | |
3461 for(x=0; x<w; x++) | |
3462 dst[x + y*stride]= src[x + y*stride]; | |
3463 return; | |
3464 } | |
3411 | 3465 |
3412 bias= bias ? 0 : (3*qmul)>>3; | 3466 bias= bias ? 0 : (3*qmul)>>3; |
3413 thres1= ((qmul - bias)>>QEXPSHIFT) - 1; | 3467 thres1= ((qmul - bias)>>QEXPSHIFT) - 1; |
3414 thres2= 2*thres1; | 3468 thres2= 2*thres1; |
3415 | 3469 |
3420 | 3474 |
3421 if((unsigned)(i+thres1) > thres2){ | 3475 if((unsigned)(i+thres1) > thres2){ |
3422 if(i>=0){ | 3476 if(i>=0){ |
3423 i<<= QEXPSHIFT; | 3477 i<<= QEXPSHIFT; |
3424 i/= qmul; //FIXME optimize | 3478 i/= qmul; //FIXME optimize |
3425 src[x + y*stride]= i; | 3479 dst[x + y*stride]= i; |
3426 }else{ | 3480 }else{ |
3427 i= -i; | 3481 i= -i; |
3428 i<<= QEXPSHIFT; | 3482 i<<= QEXPSHIFT; |
3429 i/= qmul; //FIXME optimize | 3483 i/= qmul; //FIXME optimize |
3430 src[x + y*stride]= -i; | 3484 dst[x + y*stride]= -i; |
3431 } | 3485 } |
3432 }else | 3486 }else |
3433 src[x + y*stride]= 0; | 3487 dst[x + y*stride]= 0; |
3434 } | 3488 } |
3435 } | 3489 } |
3436 }else{ | 3490 }else{ |
3437 for(y=0; y<h; y++){ | 3491 for(y=0; y<h; y++){ |
3438 for(x=0; x<w; x++){ | 3492 for(x=0; x<w; x++){ |
3440 | 3494 |
3441 if((unsigned)(i+thres1) > thres2){ | 3495 if((unsigned)(i+thres1) > thres2){ |
3442 if(i>=0){ | 3496 if(i>=0){ |
3443 i<<= QEXPSHIFT; | 3497 i<<= QEXPSHIFT; |
3444 i= (i + bias) / qmul; //FIXME optimize | 3498 i= (i + bias) / qmul; //FIXME optimize |
3445 src[x + y*stride]= i; | 3499 dst[x + y*stride]= i; |
3446 }else{ | 3500 }else{ |
3447 i= -i; | 3501 i= -i; |
3448 i<<= QEXPSHIFT; | 3502 i<<= QEXPSHIFT; |
3449 i= (i + bias) / qmul; //FIXME optimize | 3503 i= (i + bias) / qmul; //FIXME optimize |
3450 src[x + y*stride]= -i; | 3504 dst[x + y*stride]= -i; |
3451 } | 3505 } |
3452 }else | 3506 }else |
3453 src[x + y*stride]= 0; | 3507 dst[x + y*stride]= 0; |
3454 } | 3508 } |
3455 } | 3509 } |
3456 } | 3510 } |
3457 if(level+1 == s->spatial_decomposition_count){ | 3511 if(level+1 == s->spatial_decomposition_count){ |
3458 // STOP_TIMER("quantize") | 3512 // STOP_TIMER("quantize") |
3459 } | 3513 } |
3460 } | 3514 } |
3461 | 3515 |
3462 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){ | 3516 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ |
3463 const int w= b->width; | 3517 const int w= b->width; |
3464 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); | 3518 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
3465 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | 3519 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
3466 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; | 3520 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
3467 int x,y; | 3521 int x,y; |
3469 | 3523 |
3470 if(s->qlog == LOSSLESS_QLOG) return; | 3524 if(s->qlog == LOSSLESS_QLOG) return; |
3471 | 3525 |
3472 for(y=start_y; y<end_y; y++){ | 3526 for(y=start_y; y<end_y; y++){ |
3473 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); | 3527 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
3474 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; | 3528 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
3475 for(x=0; x<w; x++){ | 3529 for(x=0; x<w; x++){ |
3476 int i= line[x]; | 3530 int i= line[x]; |
3477 if(i<0){ | 3531 if(i<0){ |
3478 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | 3532 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias |
3479 }else if(i>0){ | 3533 }else if(i>0){ |
3484 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ | 3538 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ |
3485 STOP_TIMER("dquant") | 3539 STOP_TIMER("dquant") |
3486 } | 3540 } |
3487 } | 3541 } |
3488 | 3542 |
3489 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){ | 3543 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ |
3490 const int w= b->width; | 3544 const int w= b->width; |
3491 const int h= b->height; | 3545 const int h= b->height; |
3492 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); | 3546 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
3493 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | 3547 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
3494 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; | 3548 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
3510 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ | 3564 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ |
3511 STOP_TIMER("dquant") | 3565 STOP_TIMER("dquant") |
3512 } | 3566 } |
3513 } | 3567 } |
3514 | 3568 |
3515 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){ | 3569 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
3516 const int w= b->width; | 3570 const int w= b->width; |
3517 const int h= b->height; | 3571 const int h= b->height; |
3518 int x,y; | 3572 int x,y; |
3519 | 3573 |
3520 for(y=h-1; y>=0; y--){ | 3574 for(y=h-1; y>=0; y--){ |
3534 } | 3588 } |
3535 } | 3589 } |
3536 } | 3590 } |
3537 } | 3591 } |
3538 | 3592 |
3539 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ | 3593 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ |
3540 const int w= b->width; | 3594 const int w= b->width; |
3541 int x,y; | 3595 int x,y; |
3542 | 3596 |
3543 // START_TIMER | 3597 // START_TIMER |
3544 | 3598 |
3545 DWTELEM * line=0; // silence silly "could be used without having been initialized" warning | 3599 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning |
3546 DWTELEM * prev; | 3600 IDWTELEM * prev; |
3547 | 3601 |
3548 if (start_y != 0) | 3602 if (start_y != 0) |
3549 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; | 3603 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
3550 | 3604 |
3551 for(y=start_y; y<end_y; y++){ | 3605 for(y=start_y; y<end_y; y++){ |
3568 } | 3622 } |
3569 | 3623 |
3570 // STOP_TIMER("correlate") | 3624 // STOP_TIMER("correlate") |
3571 } | 3625 } |
3572 | 3626 |
3573 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){ | 3627 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
3574 const int w= b->width; | 3628 const int w= b->width; |
3575 const int h= b->height; | 3629 const int h= b->height; |
3576 int x,y; | 3630 int x,y; |
3577 | 3631 |
3578 for(y=0; y<h; y++){ | 3632 for(y=0; y<h; y++){ |
3778 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); | 3832 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); |
3779 | 3833 |
3780 width= s->avctx->width; | 3834 width= s->avctx->width; |
3781 height= s->avctx->height; | 3835 height= s->avctx->height; |
3782 | 3836 |
3783 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); | 3837 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); |
3838 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here | |
3784 | 3839 |
3785 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; | 3840 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; |
3786 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0; | 3841 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0; |
3787 | 3842 |
3788 for(plane_index=0; plane_index<3; plane_index++){ | 3843 for(plane_index=0; plane_index<3; plane_index++){ |
3816 } | 3871 } |
3817 if(orientation>1){ | 3872 if(orientation>1){ |
3818 b->buf += b->stride>>1; | 3873 b->buf += b->stride>>1; |
3819 b->buf_y_offset = b->stride_line >> 1; | 3874 b->buf_y_offset = b->stride_line >> 1; |
3820 } | 3875 } |
3876 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer); | |
3821 | 3877 |
3822 if(level) | 3878 if(level) |
3823 b->parent= &s->plane[plane_index].band[level-1][orientation]; | 3879 b->parent= &s->plane[plane_index].band[level-1][orientation]; |
3824 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); | 3880 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); |
3825 } | 3881 } |
3858 int level, orientation, delta_qlog; | 3914 int level, orientation, delta_qlog; |
3859 | 3915 |
3860 for(level=0; level<s->spatial_decomposition_count; level++){ | 3916 for(level=0; level<s->spatial_decomposition_count; level++){ |
3861 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | 3917 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
3862 SubBand *b= &s->plane[0].band[level][orientation]; | 3918 SubBand *b= &s->plane[0].band[level][orientation]; |
3863 DWTELEM *buf= b->buf; | 3919 IDWTELEM *buf= b->ibuf; |
3864 const int w= b->width; | 3920 const int w= b->width; |
3865 const int h= b->height; | 3921 const int h= b->height; |
3866 const int stride= b->stride; | 3922 const int stride= b->stride; |
3867 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16); | 3923 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16); |
3868 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | 3924 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
3869 const int qdiv= (1<<16)/qmul; | 3925 const int qdiv= (1<<16)/qmul; |
3870 int x, y; | 3926 int x, y; |
3927 //FIXME this is ugly | |
3928 for(y=0; y<h; y++) | |
3929 for(x=0; x<w; x++) | |
3930 buf[x+y*stride]= b->buf[x+y*stride]; | |
3871 if(orientation==0) | 3931 if(orientation==0) |
3872 decorrelate(s, b, buf, stride, 1, 0); | 3932 decorrelate(s, b, buf, stride, 1, 0); |
3873 for(y=0; y<h; y++) | 3933 for(y=0; y<h; y++) |
3874 for(x=0; x<w; x++) | 3934 for(x=0; x<w; x++) |
3875 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16; | 3935 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16; |
3876 if(orientation==0) | |
3877 correlate(s, b, buf, stride, 1, 0); | |
3878 } | 3936 } |
3879 } | 3937 } |
3880 | 3938 |
3881 /* ugly, ratecontrol just takes a sqrt again */ | 3939 /* ugly, ratecontrol just takes a sqrt again */ |
3882 coef_sum = (uint64_t)coef_sum * coef_sum >> 16; | 3940 coef_sum = (uint64_t)coef_sum * coef_sum >> 16; |
3905 int level, orientation, x, y; | 3963 int level, orientation, x, y; |
3906 | 3964 |
3907 for(level=0; level<s->spatial_decomposition_count; level++){ | 3965 for(level=0; level<s->spatial_decomposition_count; level++){ |
3908 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | 3966 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
3909 SubBand *b= &p->band[level][orientation]; | 3967 SubBand *b= &p->band[level][orientation]; |
3910 DWTELEM *buf= b->buf; | 3968 IDWTELEM *ibuf= b->ibuf; |
3911 int64_t error=0; | 3969 int64_t error=0; |
3912 | 3970 |
3913 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height); | 3971 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height); |
3914 buf[b->width/2 + b->height/2*b->stride]= 256*256; | 3972 ibuf[b->width/2 + b->height/2*b->stride]= 256*16; |
3915 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); | 3973 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); |
3916 for(y=0; y<height; y++){ | 3974 for(y=0; y<height; y++){ |
3917 for(x=0; x<width; x++){ | 3975 for(x=0; x<width; x++){ |
3918 int64_t d= s->spatial_dwt_buffer[x + y*width]; | 3976 int64_t d= s->spatial_idwt_buffer[x + y*width]*16; |
3919 error += d*d; | 3977 error += d*d; |
3920 } | 3978 } |
3921 } | 3979 } |
3922 | 3980 |
3923 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); | 3981 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); |
4170 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){ | 4228 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){ |
4171 //FIXME optimize | 4229 //FIXME optimize |
4172 if(pict->data[plane_index]) //FIXME gray hack | 4230 if(pict->data[plane_index]) //FIXME gray hack |
4173 for(y=0; y<h; y++){ | 4231 for(y=0; y<h; y++){ |
4174 for(x=0; x<w; x++){ | 4232 for(x=0; x<w; x++){ |
4175 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; | 4233 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; |
4176 } | 4234 } |
4177 } | 4235 } |
4178 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0); | 4236 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0); |
4179 | 4237 |
4180 if( plane_index==0 | 4238 if( plane_index==0 |
4181 && pict->pict_type == P_TYPE | 4239 && pict->pict_type == P_TYPE |
4182 && !(avctx->flags&CODEC_FLAG_PASS2) | 4240 && !(avctx->flags&CODEC_FLAG_PASS2) |
4183 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ | 4241 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ |
4190 } | 4248 } |
4191 | 4249 |
4192 if(s->qlog == LOSSLESS_QLOG){ | 4250 if(s->qlog == LOSSLESS_QLOG){ |
4193 for(y=0; y<h; y++){ | 4251 for(y=0; y<h; y++){ |
4194 for(x=0; x<w; x++){ | 4252 for(x=0; x<w; x++){ |
4195 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; | 4253 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; |
4196 } | 4254 } |
4197 } | 4255 } |
4198 }else{ | 4256 }else{ |
4199 for(y=0; y<h; y++){ | 4257 for(y=0; y<h; y++){ |
4200 for(x=0; x<w; x++){ | 4258 for(x=0; x<w; x++){ |
4201 s->spatial_dwt_buffer[y*w + x]<<=ENCODER_EXTRA_BITS; | 4259 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS; |
4202 } | 4260 } |
4203 } | 4261 } |
4204 } | 4262 } |
4205 | 4263 |
4206 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | 4264 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); |
4221 | 4279 |
4222 for(level=0; level<s->spatial_decomposition_count; level++){ | 4280 for(level=0; level<s->spatial_decomposition_count; level++){ |
4223 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | 4281 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
4224 SubBand *b= &p->band[level][orientation]; | 4282 SubBand *b= &p->band[level][orientation]; |
4225 | 4283 |
4226 quantize(s, b, b->buf, b->stride, s->qbias); | 4284 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias); |
4227 if(orientation==0) | 4285 if(orientation==0) |
4228 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0); | 4286 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0); |
4229 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation); | 4287 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation); |
4230 assert(b->parent==NULL || b->parent->stride == b->stride*2); | 4288 assert(b->parent==NULL || b->parent->stride == b->stride*2); |
4231 if(orientation==0) | 4289 if(orientation==0) |
4232 correlate(s, b, b->buf, b->stride, 1, 0); | 4290 correlate(s, b, b->ibuf, b->stride, 1, 0); |
4233 } | 4291 } |
4234 } | 4292 } |
4235 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits); | 4293 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits); |
4236 | 4294 |
4237 for(level=0; level<s->spatial_decomposition_count; level++){ | 4295 for(level=0; level<s->spatial_decomposition_count; level++){ |
4238 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | 4296 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
4239 SubBand *b= &p->band[level][orientation]; | 4297 SubBand *b= &p->band[level][orientation]; |
4240 | 4298 |
4241 dequantize(s, b, b->buf, b->stride); | 4299 dequantize(s, b, b->ibuf, b->stride); |
4242 } | 4300 } |
4243 } | 4301 } |
4244 | 4302 |
4245 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | 4303 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); |
4246 if(s->qlog == LOSSLESS_QLOG){ | 4304 if(s->qlog == LOSSLESS_QLOG){ |
4247 for(y=0; y<h; y++){ | 4305 for(y=0; y<h; y++){ |
4248 for(x=0; x<w; x++){ | 4306 for(x=0; x<w; x++){ |
4249 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS; | 4307 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; |
4250 } | 4308 } |
4251 } | 4309 } |
4252 } | 4310 } |
4253 {START_TIMER | 4311 {START_TIMER |
4254 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); | 4312 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
4255 STOP_TIMER("pred-conv")} | 4313 STOP_TIMER("pred-conv")} |
4256 }else{ | 4314 }else{ |
4257 //ME/MC only | 4315 //ME/MC only |
4258 if(pict->pict_type == I_TYPE){ | 4316 if(pict->pict_type == I_TYPE){ |
4259 for(y=0; y<h; y++){ | 4317 for(y=0; y<h; y++){ |
4261 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= | 4319 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= |
4262 pict->data[plane_index][y*pict->linesize[plane_index] + x]; | 4320 pict->data[plane_index][y*pict->linesize[plane_index] + x]; |
4263 } | 4321 } |
4264 } | 4322 } |
4265 }else{ | 4323 }else{ |
4266 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); | 4324 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h); |
4267 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); | 4325 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
4268 } | 4326 } |
4269 } | 4327 } |
4270 if(s->avctx->flags&CODEC_FLAG_PSNR){ | 4328 if(s->avctx->flags&CODEC_FLAG_PSNR){ |
4271 int64_t error= 0; | 4329 int64_t error= 0; |
4272 | 4330 |
4312 | 4370 |
4313 static void common_end(SnowContext *s){ | 4371 static void common_end(SnowContext *s){ |
4314 int plane_index, level, orientation, i; | 4372 int plane_index, level, orientation, i; |
4315 | 4373 |
4316 av_freep(&s->spatial_dwt_buffer); | 4374 av_freep(&s->spatial_dwt_buffer); |
4375 av_freep(&s->spatial_idwt_buffer); | |
4317 | 4376 |
4318 av_freep(&s->m.me.scratchpad); | 4377 av_freep(&s->m.me.scratchpad); |
4319 av_freep(&s->m.me.map); | 4378 av_freep(&s->m.me.map); |
4320 av_freep(&s->m.me.score_map); | 4379 av_freep(&s->m.me.score_map); |
4321 av_freep(&s->m.obmc_scratchpad); | 4380 av_freep(&s->m.obmc_scratchpad); |
4358 avctx->pix_fmt= PIX_FMT_YUV420P; | 4417 avctx->pix_fmt= PIX_FMT_YUV420P; |
4359 | 4418 |
4360 common_init(avctx); | 4419 common_init(avctx); |
4361 | 4420 |
4362 block_size = MB_SIZE >> s->block_max_depth; | 4421 block_size = MB_SIZE >> s->block_max_depth; |
4363 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer); | 4422 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_idwt_buffer); |
4364 | 4423 |
4365 return 0; | 4424 return 0; |
4366 } | 4425 } |
4367 | 4426 |
4368 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){ | 4427 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){ |
4393 int x, y; | 4452 int x, y; |
4394 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ | 4453 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ |
4395 | 4454 |
4396 if(s->avctx->debug&2048){ | 4455 if(s->avctx->debug&2048){ |
4397 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); | 4456 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); |
4398 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); | 4457 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
4399 | 4458 |
4400 for(y=0; y<h; y++){ | 4459 for(y=0; y<h; y++){ |
4401 for(x=0; x<w; x++){ | 4460 for(x=0; x<w; x++){ |
4402 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; | 4461 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; |
4403 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; | 4462 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; |
4458 if (orientation == 0){ | 4517 if (orientation == 0){ |
4459 SubBand * correlate_band = &p->band[0][0]; | 4518 SubBand * correlate_band = &p->band[0][0]; |
4460 int correlate_end_y = FFMIN(b->height, end_y + 1); | 4519 int correlate_end_y = FFMIN(b->height, end_y + 1); |
4461 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); | 4520 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); |
4462 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); | 4521 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); |
4463 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); | 4522 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); |
4464 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y); | 4523 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); |
4465 } | 4524 } |
4466 else | 4525 else |
4467 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); | 4526 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); |
4468 } | 4527 } |
4469 } | 4528 } |
4478 STOP_TIMER("idwt slice");} | 4537 STOP_TIMER("idwt slice");} |
4479 | 4538 |
4480 | 4539 |
4481 if(s->qlog == LOSSLESS_QLOG){ | 4540 if(s->qlog == LOSSLESS_QLOG){ |
4482 for(; yq<slice_h && yq<h; yq++){ | 4541 for(; yq<slice_h && yq<h; yq++){ |
4483 DWTELEM * line = slice_buffer_get_line(&s->sb, yq); | 4542 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); |
4484 for(x=0; x<w; x++){ | 4543 for(x=0; x<w; x++){ |
4485 line[x] <<= FRAC_BITS; | 4544 line[x] <<= FRAC_BITS; |
4486 } | 4545 } |
4487 } | 4546 } |
4488 } | 4547 } |
4489 | 4548 |
4490 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y); | 4549 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y); |
4491 | 4550 |
4492 y = FFMIN(p->height, slice_starty); | 4551 y = FFMIN(p->height, slice_starty); |
4493 end_y = FFMIN(p->height, slice_h); | 4552 end_y = FFMIN(p->height, slice_h); |
4494 while(y < end_y) | 4553 while(y < end_y) |
4495 slice_buffer_release(&s->sb, y++); | 4554 slice_buffer_release(&s->sb, y++); |