comparison svq3.c @ 1265:09165461996f libavcodec

optimize
author michaelni
date Thu, 15 May 2003 00:43:43 +0000
parents 2fa34e615c76
children 85b71f9f7450
comparison
equal deleted inserted replaced
1264:2fa34e615c76 1265:09165461996f
41 /** 41 /**
42 * @file svq3.c 42 * @file svq3.c
43 * svq3 decoder. 43 * svq3 decoder.
44 */ 44 */
45 45
46 #define FULLPEL_MODE 1
47 #define HALFPEL_MODE 2
48 #define THIRDPEL_MODE 3
49
46 /* dual scan (from some older h264 draft) 50 /* dual scan (from some older h264 draft)
47 o-->o-->o o 51 o-->o-->o o
48 | /| 52 | /|
49 o o o / o 53 o o o / o
50 | / | |/ | 54 | / | |/ |
372 break; 376 break;
373 } 377 }
374 } 378 }
375 379
376 static inline void svq3_mc_dir_part (MpegEncContext *s, int x, int y, 380 static inline void svq3_mc_dir_part (MpegEncContext *s, int x, int y,
377 int width, int height, int mx, int my) { 381 int width, int height, int mx, int my, int dxy) {
378 uint8_t *src, *dest; 382 uint8_t *src, *dest;
379 int i, emu = 0; 383 int i, emu = 0;
380 const int sx = ((unsigned) (mx + 0x7FFFFFFE)) % 6; 384
381 const int sy = ((unsigned) (my + 0x7FFFFFFE)) % 6; 385 mx += x;
382 const int dxy= 6*sy + sx; 386 my += y;
383 387
384 /* decode and clip motion vector to frame border (+16) */
385 mx = x + (mx - sx) / 6;
386 my = y + (my - sy) / 6;
387
388 if (mx < 0 || mx >= (s->width - width - 1) || 388 if (mx < 0 || mx >= (s->width - width - 1) ||
389 my < 0 || my >= (s->height - height - 1)) { 389 my < 0 || my >= (s->height - height - 1)) {
390 390
391 if ((s->flags & CODEC_FLAG_EMU_EDGE)) { 391 if ((s->flags & CODEC_FLAG_EMU_EDGE)) {
392 emu = 1; 392 emu = 1;
439 h->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF; 439 h->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
440 h->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF; 440 h->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
441 h->topright_samples_available = 0xFFFF; 441 h->topright_samples_available = 0xFFFF;
442 442
443 if (mb_type == 0) { /* SKIP */ 443 if (mb_type == 0) { /* SKIP */
444 svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0); 444 svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0);
445 445
446 cbp = 0; 446 cbp = 0;
447 mb_type = MB_TYPE_SKIP; 447 mb_type = MB_TYPE_SKIP;
448 } else if (mb_type < 8) { /* INTER */ 448 } else if (mb_type < 8) { /* INTER */
449 if (h->thirdpel_flag && h->halfpel_flag == !get_bits (&s->gb, 1)) { 449 if (h->thirdpel_flag && h->halfpel_flag == !get_bits (&s->gb, 1)) {
450 mode = 3; /* thirdpel */ 450 mode = THIRDPEL_MODE;
451 } else if (h->halfpel_flag && h->thirdpel_flag == !get_bits (&s->gb, 1)) { 451 } else if (h->halfpel_flag && h->thirdpel_flag == !get_bits (&s->gb, 1)) {
452 mode = 2; /* halfpel */ 452 mode = HALFPEL_MODE;
453 } else { 453 } else {
454 mode = 1; /* fullpel */ 454 mode = FULLPEL_MODE;
455 } 455 }
456 456
457 /* fill caches */ 457 /* fill caches */
458 /* note ref_cache[0] should contain here: 458 /* note ref_cache[0] should contain here:
459 ???????? 459 ????????
494 part_width = ((mb_type & 5) == 5) ? 4 : 8 << (mb_type & 1); 494 part_width = ((mb_type & 5) == 5) ? 4 : 8 << (mb_type & 1);
495 part_height = 16 >> ((unsigned) mb_type / 3); 495 part_height = 16 >> ((unsigned) mb_type / 3);
496 496
497 for (i=0; i < 16; i+=part_height) { 497 for (i=0; i < 16; i+=part_height) {
498 for (j=0; j < 16; j+=part_width) { 498 for (j=0; j < 16; j+=part_width) {
499 int dxy;
499 x = 16*s->mb_x + j; 500 x = 16*s->mb_x + j;
500 y = 16*s->mb_y + i; 501 y = 16*s->mb_y + i;
501 k = ((j>>2)&1) + ((i>>1)&2) + ((j>>1)&4) + (i&8); 502 k = ((j>>2)&1) + ((i>>1)&2) + ((j>>1)&4) + (i&8);
502 503
503 pred_motion (h, k, (part_width >> 2), 0, 1, &mx, &my); 504 pred_motion (h, k, (part_width >> 2), 0, 1, &mx, &my);
511 dx = svq3_get_se_golomb (&s->gb); 512 dx = svq3_get_se_golomb (&s->gb);
512 513
513 if (dx == INVALID_VLC || dy == INVALID_VLC) { 514 if (dx == INVALID_VLC || dy == INVALID_VLC) {
514 return -1; 515 return -1;
515 } 516 }
516
517 /* compute motion vector */ 517 /* compute motion vector */
518 if (mode == 3) { 518 if (mode == THIRDPEL_MODE) {
519 mx = ((mx + 1) & ~0x1) + 2*dx; 519 int fx, fy;
520 my = ((my + 1) & ~0x1) + 2*dy; 520 mx = ((mx + 1)>>1) + dx;
521 } else if (mode == 2) { 521 my = ((my + 1)>>1) + dy;
522 mx = (mx + 1) - ((unsigned) (0x7FFFFFFF + mx) % 3) + 3*dx; 522 fx= ((unsigned)(mx + 0x3000))/3 - 0x1000;
523 my = (my + 1) - ((unsigned) (0x7FFFFFFF + my) % 3) + 3*dy; 523 fy= ((unsigned)(my + 0x3000))/3 - 0x1000;
524 } else if (mode == 1) { 524 dxy= 2*(mx - 3*fx) + 2*6*(my - 3*fy);
525 mx = (mx + 3) - ((unsigned) (0x7FFFFFFB + mx) % 6) + 6*dx; 525
526 my = (my + 3) - ((unsigned) (0x7FFFFFFB + my) % 6) + 6*dy; 526 svq3_mc_dir_part (s, x, y, part_width, part_height, fx, fy, dxy);
527 mx += mx;
528 my += my;
529 } else if (mode == HALFPEL_MODE) {
530 mx = ((unsigned)(mx + 1 + 0x3000))/3 + dx - 0x1000;
531 my = ((unsigned)(my + 1 + 0x3000))/3 + dy - 0x1000;
532 dxy= 3*(mx&1) + 6*3*(my&1);
533
534 svq3_mc_dir_part (s, x, y, part_width, part_height, mx>>1, my>>1, dxy);
535 mx *= 3;
536 my *= 3;
537 } else {
538 assert(mode == FULLPEL_MODE);
539 mx = ((unsigned)(mx + 3 + 0x6000))/6 + dx - 0x1000;
540 my = ((unsigned)(my + 3 + 0x6000))/6 + dy - 0x1000;
541
542 svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my, 0);
543 mx *= 6;
544 my *= 6;
527 } 545 }
528 546
529 /* update mv_cache */ 547 /* update mv_cache */
530 fill_rectangle(h->mv_cache[0][scan8[k]], part_width>>2, part_height>>2, 8, (mx&0xFFFF)+(my<<16), 4); 548 fill_rectangle(h->mv_cache[0][scan8[k]], part_width>>2, part_height>>2, 8, (mx&0xFFFF)+(my<<16), 4);
531
532 svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my);
533 } 549 }
534 } 550 }
535 551
536 for (i=0; i < 4; i++) { 552 for (i=0; i < 4; i++) {
537 memcpy (s->current_picture.motion_val[0][b_xy + i*h->b_stride], h->mv_cache[0][scan8[0] + 8*i], 4*2*sizeof(int16_t)); 553 memcpy (s->current_picture.motion_val[0][b_xy + i*h->b_stride], h->mv_cache[0][scan8[0] + 8*i], 4*2*sizeof(int16_t));