Mercurial > libavcodec.hg
comparison vp3.c @ 11133:cd2956d08cc1 libavcodec
Don't pre-calculate first_pixel
3.6% faster on Elephants_Dream_HD-q7-aq7.ogg on my penryn
author | conrad |
---|---|
date | Fri, 12 Feb 2010 22:01:38 +0000 |
parents | 449c12b6c3a0 |
children | 68e16ac22032 |
comparison
equal
deleted
inserted
replaced
11132:449c12b6c3a0 | 11133:cd2956d08cc1 |
---|---|
51 } Coeff; | 51 } Coeff; |
52 | 52 |
53 //FIXME split things out into their own arrays | 53 //FIXME split things out into their own arrays |
54 typedef struct Vp3Fragment { | 54 typedef struct Vp3Fragment { |
55 Coeff *next_coeff; | 55 Coeff *next_coeff; |
56 /* address of first pixel taking into account which plane the fragment | |
57 * lives on as well as the plane stride */ | |
58 int first_pixel; | |
59 /* this is the macroblock that the fragment belongs to */ | 56 /* this is the macroblock that the fragment belongs to */ |
60 uint16_t macroblock; | 57 uint16_t macroblock; |
61 uint8_t coding_method; | 58 uint8_t coding_method; |
62 int8_t motion_x; | 59 int8_t motion_x; |
63 int8_t motion_y; | 60 int8_t motion_y; |
161 Vp3Fragment *all_fragments; | 158 Vp3Fragment *all_fragments; |
162 uint8_t *coeff_counts; | 159 uint8_t *coeff_counts; |
163 Coeff *coeffs; | 160 Coeff *coeffs; |
164 Coeff *next_coeff; | 161 Coeff *next_coeff; |
165 int fragment_start[3]; | 162 int fragment_start[3]; |
163 int data_offset[3]; | |
166 | 164 |
167 ScanTable scantable; | 165 ScanTable scantable; |
168 | 166 |
169 /* tables */ | 167 /* tables */ |
170 uint16_t coded_dc_scale_factor[64]; | 168 uint16_t coded_dc_scale_factor[64]; |
176 | 174 |
177 /* this is a list of indexes into the all_fragments array indicating | 175 /* this is a list of indexes into the all_fragments array indicating |
178 * which of the fragments are coded */ | 176 * which of the fragments are coded */ |
179 int *coded_fragment_list; | 177 int *coded_fragment_list; |
180 int coded_fragment_list_index; | 178 int coded_fragment_list_index; |
181 int pixel_addresses_initialized; | |
182 | 179 |
183 /* track which fragments have already been decoded; called 'fast' | 180 /* track which fragments have already been decoded; called 'fast' |
184 * because this data structure avoids having to iterate through every | 181 * because this data structure avoids having to iterate through every |
185 * fragment in coded_fragment_list; once a fragment has been fully | 182 * fragment in coded_fragment_list; once a fragment has been fully |
186 * decoded, it is removed from this list */ | 183 * decoded, it is removed from this list */ |
1399 int height = s->fragment_height >> !!plane; | 1396 int height = s->fragment_height >> !!plane; |
1400 int fragment = s->fragment_start [plane] + ystart * width; | 1397 int fragment = s->fragment_start [plane] + ystart * width; |
1401 int stride = s->current_frame.linesize[plane]; | 1398 int stride = s->current_frame.linesize[plane]; |
1402 uint8_t *plane_data = s->current_frame.data [plane]; | 1399 uint8_t *plane_data = s->current_frame.data [plane]; |
1403 if (!s->flipped_image) stride = -stride; | 1400 if (!s->flipped_image) stride = -stride; |
1401 plane_data += s->data_offset[plane] + 8*ystart*stride; | |
1404 | 1402 |
1405 for (y = ystart; y < yend; y++) { | 1403 for (y = ystart; y < yend; y++) { |
1406 | 1404 |
1407 for (x = 0; x < width; x++) { | 1405 for (x = 0; x < width; x++) { |
1408 /* This code basically just deblocks on the edges of coded blocks. | 1406 /* This code basically just deblocks on the edges of coded blocks. |
1412 if( s->all_fragments[fragment].coding_method != MODE_COPY ) | 1410 if( s->all_fragments[fragment].coding_method != MODE_COPY ) |
1413 { | 1411 { |
1414 /* do not perform left edge filter for left columns frags */ | 1412 /* do not perform left edge filter for left columns frags */ |
1415 if (x > 0) { | 1413 if (x > 0) { |
1416 s->dsp.vp3_h_loop_filter( | 1414 s->dsp.vp3_h_loop_filter( |
1417 plane_data + s->all_fragments[fragment].first_pixel, | 1415 plane_data + 8*x, |
1418 stride, bounding_values); | 1416 stride, bounding_values); |
1419 } | 1417 } |
1420 | 1418 |
1421 /* do not perform top edge filter for top row fragments */ | 1419 /* do not perform top edge filter for top row fragments */ |
1422 if (y > 0) { | 1420 if (y > 0) { |
1423 s->dsp.vp3_v_loop_filter( | 1421 s->dsp.vp3_v_loop_filter( |
1424 plane_data + s->all_fragments[fragment].first_pixel, | 1422 plane_data + 8*x, |
1425 stride, bounding_values); | 1423 stride, bounding_values); |
1426 } | 1424 } |
1427 | 1425 |
1428 /* do not perform right edge filter for right column | 1426 /* do not perform right edge filter for right column |
1429 * fragments or if right fragment neighbor is also coded | 1427 * fragments or if right fragment neighbor is also coded |
1430 * in this frame (it will be filtered in next iteration) */ | 1428 * in this frame (it will be filtered in next iteration) */ |
1431 if ((x < width - 1) && | 1429 if ((x < width - 1) && |
1432 (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) { | 1430 (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) { |
1433 s->dsp.vp3_h_loop_filter( | 1431 s->dsp.vp3_h_loop_filter( |
1434 plane_data + s->all_fragments[fragment + 1].first_pixel, | 1432 plane_data + 8*x + 8, |
1435 stride, bounding_values); | 1433 stride, bounding_values); |
1436 } | 1434 } |
1437 | 1435 |
1438 /* do not perform bottom edge filter for bottom row | 1436 /* do not perform bottom edge filter for bottom row |
1439 * fragments or if bottom fragment neighbor is also coded | 1437 * fragments or if bottom fragment neighbor is also coded |
1440 * in this frame (it will be filtered in the next row) */ | 1438 * in this frame (it will be filtered in the next row) */ |
1441 if ((y < height - 1) && | 1439 if ((y < height - 1) && |
1442 (s->all_fragments[fragment + width].coding_method == MODE_COPY)) { | 1440 (s->all_fragments[fragment + width].coding_method == MODE_COPY)) { |
1443 s->dsp.vp3_v_loop_filter( | 1441 s->dsp.vp3_v_loop_filter( |
1444 plane_data + s->all_fragments[fragment + width].first_pixel, | 1442 plane_data + 8*x + 8*stride, |
1445 stride, bounding_values); | 1443 stride, bounding_values); |
1446 } | 1444 } |
1447 } | 1445 } |
1448 | 1446 |
1449 fragment++; | 1447 fragment++; |
1450 } | 1448 } |
1449 plane_data += 8*stride; | |
1451 } | 1450 } |
1452 } | 1451 } |
1453 | 1452 |
1454 /** | 1453 /** |
1455 * called when all pixels up to row y are complete | 1454 * called when all pixels up to row y are complete |
1499 | 1498 |
1500 if (slice >= s->macroblock_height) | 1499 if (slice >= s->macroblock_height) |
1501 return; | 1500 return; |
1502 | 1501 |
1503 for (plane = 0; plane < 3; plane++) { | 1502 for (plane = 0; plane < 3; plane++) { |
1504 uint8_t *output_plane = s->current_frame.data [plane]; | 1503 uint8_t *output_plane = s->current_frame.data [plane] + s->data_offset[plane]; |
1505 uint8_t * last_plane = s-> last_frame.data [plane]; | 1504 uint8_t * last_plane = s-> last_frame.data [plane] + s->data_offset[plane]; |
1506 uint8_t *golden_plane = s-> golden_frame.data [plane]; | 1505 uint8_t *golden_plane = s-> golden_frame.data [plane] + s->data_offset[plane]; |
1507 int stride = s->current_frame.linesize[plane]; | 1506 int stride = s->current_frame.linesize[plane]; |
1508 int plane_width = s->width >> !!plane; | 1507 int plane_width = s->width >> !!plane; |
1509 int plane_height = s->height >> !!plane; | 1508 int plane_height = s->height >> !!plane; |
1510 int y = slice * FRAGMENT_PIXELS << !plane ; | 1509 int y = slice * FRAGMENT_PIXELS << !plane ; |
1511 int slice_height = y + (FRAGMENT_PIXELS << !plane); | 1510 int slice_height = y + (FRAGMENT_PIXELS << !plane); |
1520 /* for each fragment row in the slice (both of them)... */ | 1519 /* for each fragment row in the slice (both of them)... */ |
1521 for (; y < slice_height; y += 8) { | 1520 for (; y < slice_height; y += 8) { |
1522 | 1521 |
1523 /* for each fragment in a row... */ | 1522 /* for each fragment in a row... */ |
1524 for (x = 0; x < plane_width; x += 8, i++) { | 1523 for (x = 0; x < plane_width; x += 8, i++) { |
1524 int first_pixel = y*stride + x; | |
1525 | 1525 |
1526 if ((i < 0) || (i >= s->fragment_count)) { | 1526 if ((i < 0) || (i >= s->fragment_count)) { |
1527 av_log(s->avctx, AV_LOG_ERROR, " vp3:render_slice(): bad fragment number (%d)\n", i); | 1527 av_log(s->avctx, AV_LOG_ERROR, " vp3:render_slice(): bad fragment number (%d)\n", i); |
1528 return; | 1528 return; |
1529 } | 1529 } |
1536 (s->all_fragments[i].coding_method == MODE_GOLDEN_MV)) | 1536 (s->all_fragments[i].coding_method == MODE_GOLDEN_MV)) |
1537 motion_source= golden_plane; | 1537 motion_source= golden_plane; |
1538 else | 1538 else |
1539 motion_source= last_plane; | 1539 motion_source= last_plane; |
1540 | 1540 |
1541 motion_source += s->all_fragments[i].first_pixel; | 1541 motion_source += first_pixel; |
1542 motion_halfpel_index = 0; | 1542 motion_halfpel_index = 0; |
1543 | 1543 |
1544 /* sort out the motion vector if this fragment is coded | 1544 /* sort out the motion vector if this fragment is coded |
1545 * using a motion vector method */ | 1545 * using a motion vector method */ |
1546 if ((s->all_fragments[i].coding_method > MODE_INTRA) && | 1546 if ((s->all_fragments[i].coding_method > MODE_INTRA) && |
1582 put_no_rnd_pixels_l2 which would look more like the | 1582 put_no_rnd_pixels_l2 which would look more like the |
1583 VP3 source but this would be slower as | 1583 VP3 source but this would be slower as |
1584 put_no_rnd_pixels_tab is better optimzed */ | 1584 put_no_rnd_pixels_tab is better optimzed */ |
1585 if(motion_halfpel_index != 3){ | 1585 if(motion_halfpel_index != 3){ |
1586 s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index]( | 1586 s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index]( |
1587 output_plane + s->all_fragments[i].first_pixel, | 1587 output_plane + first_pixel, |
1588 motion_source, stride, 8); | 1588 motion_source, stride, 8); |
1589 }else{ | 1589 }else{ |
1590 int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1 | 1590 int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1 |
1591 s->dsp.put_no_rnd_pixels_l2[1]( | 1591 s->dsp.put_no_rnd_pixels_l2[1]( |
1592 output_plane + s->all_fragments[i].first_pixel, | 1592 output_plane + first_pixel, |
1593 motion_source - d, | 1593 motion_source - d, |
1594 motion_source + stride + 1 + d, | 1594 motion_source + stride + 1 + d, |
1595 stride, 8); | 1595 stride, 8); |
1596 } | 1596 } |
1597 dequantizer = s->qmat[s->all_fragments[i].qpi][1][plane]; | 1597 dequantizer = s->qmat[s->all_fragments[i].qpi][1][plane]; |
1620 | 1620 |
1621 if (s->all_fragments[i].coding_method == MODE_INTRA) { | 1621 if (s->all_fragments[i].coding_method == MODE_INTRA) { |
1622 if(s->avctx->idct_algo!=FF_IDCT_VP3) | 1622 if(s->avctx->idct_algo!=FF_IDCT_VP3) |
1623 block[0] += 128<<3; | 1623 block[0] += 128<<3; |
1624 s->dsp.idct_put( | 1624 s->dsp.idct_put( |
1625 output_plane + s->all_fragments[i].first_pixel, | 1625 output_plane + first_pixel, |
1626 stride, | 1626 stride, |
1627 block); | 1627 block); |
1628 } else { | 1628 } else { |
1629 s->dsp.idct_add( | 1629 s->dsp.idct_add( |
1630 output_plane + s->all_fragments[i].first_pixel, | 1630 output_plane + first_pixel, |
1631 stride, | 1631 stride, |
1632 block); | 1632 block); |
1633 } | 1633 } |
1634 } else { | 1634 } else { |
1635 | 1635 |
1636 /* copy directly from the previous frame */ | 1636 /* copy directly from the previous frame */ |
1637 s->dsp.put_pixels_tab[1][0]( | 1637 s->dsp.put_pixels_tab[1][0]( |
1638 output_plane + s->all_fragments[i].first_pixel, | 1638 output_plane + first_pixel, |
1639 last_plane + s->all_fragments[i].first_pixel, | 1639 last_plane + first_pixel, |
1640 stride, 8); | 1640 stride, 8); |
1641 | 1641 |
1642 } | 1642 } |
1643 } | 1643 } |
1644 // Filter the previous block row. We can't filter the current row yet | 1644 // Filter the previous block row. We can't filter the current row yet |
1657 */ | 1657 */ |
1658 | 1658 |
1659 // now that we've filtered the last rows, they're safe to display | 1659 // now that we've filtered the last rows, they're safe to display |
1660 if (slice) | 1660 if (slice) |
1661 vp3_draw_horiz_band(s, 16*slice); | 1661 vp3_draw_horiz_band(s, 16*slice); |
1662 } | |
1663 | |
1664 /* | |
1665 * This function computes the first pixel addresses for each fragment. | |
1666 * This function needs to be invoked after the first frame is allocated | |
1667 * so that it has access to the plane strides. | |
1668 */ | |
1669 static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s) | |
1670 { | |
1671 #define Y_INITIAL(chroma_shift) s->flipped_image ? 1 : s->fragment_height >> chroma_shift | |
1672 #define Y_FINISHED(chroma_shift) s->flipped_image ? y <= s->fragment_height >> chroma_shift : y > 0 | |
1673 | |
1674 int i, x, y; | |
1675 const int y_inc = s->flipped_image ? 1 : -1; | |
1676 | |
1677 /* figure out the first pixel addresses for each of the fragments */ | |
1678 /* Y plane */ | |
1679 i = 0; | |
1680 for (y = Y_INITIAL(0); Y_FINISHED(0); y += y_inc) { | |
1681 for (x = 0; x < s->fragment_width; x++) { | |
1682 s->all_fragments[i++].first_pixel = | |
1683 s->golden_frame.linesize[0] * y * FRAGMENT_PIXELS - | |
1684 s->golden_frame.linesize[0] + | |
1685 x * FRAGMENT_PIXELS; | |
1686 } | |
1687 } | |
1688 | |
1689 /* U plane */ | |
1690 i = s->fragment_start[1]; | |
1691 for (y = Y_INITIAL(1); Y_FINISHED(1); y += y_inc) { | |
1692 for (x = 0; x < s->fragment_width / 2; x++) { | |
1693 s->all_fragments[i++].first_pixel = | |
1694 s->golden_frame.linesize[1] * y * FRAGMENT_PIXELS - | |
1695 s->golden_frame.linesize[1] + | |
1696 x * FRAGMENT_PIXELS; | |
1697 } | |
1698 } | |
1699 | |
1700 /* V plane */ | |
1701 i = s->fragment_start[2]; | |
1702 for (y = Y_INITIAL(1); Y_FINISHED(1); y += y_inc) { | |
1703 for (x = 0; x < s->fragment_width / 2; x++) { | |
1704 s->all_fragments[i++].first_pixel = | |
1705 s->golden_frame.linesize[2] * y * FRAGMENT_PIXELS - | |
1706 s->golden_frame.linesize[2] + | |
1707 x * FRAGMENT_PIXELS; | |
1708 } | |
1709 } | |
1710 } | 1662 } |
1711 | 1663 |
1712 /* | 1664 /* |
1713 * This is the ffmpeg/libavcodec API init function. | 1665 * This is the ffmpeg/libavcodec API init function. |
1714 */ | 1666 */ |
1773 s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment)); | 1725 s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment)); |
1774 s->coeff_counts = av_malloc(s->fragment_count * sizeof(*s->coeff_counts)); | 1726 s->coeff_counts = av_malloc(s->fragment_count * sizeof(*s->coeff_counts)); |
1775 s->coeffs = av_malloc(s->fragment_count * sizeof(Coeff) * 65); | 1727 s->coeffs = av_malloc(s->fragment_count * sizeof(Coeff) * 65); |
1776 s->coded_fragment_list = av_malloc(s->fragment_count * sizeof(int)); | 1728 s->coded_fragment_list = av_malloc(s->fragment_count * sizeof(int)); |
1777 s->fast_fragment_list = av_malloc(s->fragment_count * sizeof(int)); | 1729 s->fast_fragment_list = av_malloc(s->fragment_count * sizeof(int)); |
1778 s->pixel_addresses_initialized = 0; | |
1779 if (!s->superblock_coding || !s->all_fragments || !s->coeff_counts || | 1730 if (!s->superblock_coding || !s->all_fragments || !s->coeff_counts || |
1780 !s->coeffs || !s->coded_fragment_list || !s->fast_fragment_list) { | 1731 !s->coeffs || !s->coded_fragment_list || !s->fast_fragment_list) { |
1781 vp3_decode_end(avctx); | 1732 vp3_decode_end(avctx); |
1782 return -1; | 1733 return -1; |
1783 } | 1734 } |
1994 return -1; | 1945 return -1; |
1995 } | 1946 } |
1996 | 1947 |
1997 /* golden frame is also the current frame */ | 1948 /* golden frame is also the current frame */ |
1998 s->current_frame= s->golden_frame; | 1949 s->current_frame= s->golden_frame; |
1999 | |
2000 /* time to figure out pixel addresses? */ | |
2001 if (!s->pixel_addresses_initialized) | |
2002 { | |
2003 vp3_calculate_pixel_addresses(s); | |
2004 s->pixel_addresses_initialized = 1; | |
2005 } | |
2006 } else { | 1950 } else { |
2007 /* allocate a new current frame */ | 1951 /* allocate a new current frame */ |
2008 s->current_frame.reference = 3; | 1952 s->current_frame.reference = 3; |
2009 if (!s->pixel_addresses_initialized) { | 1953 if (!s->golden_frame.data[0]) { |
2010 av_log(s->avctx, AV_LOG_ERROR, "vp3: first frame not a keyframe\n"); | 1954 av_log(s->avctx, AV_LOG_ERROR, "vp3: first frame not a keyframe\n"); |
2011 return -1; | 1955 return -1; |
2012 } | 1956 } |
2013 if(avctx->get_buffer(avctx, &s->current_frame) < 0) { | 1957 if(avctx->get_buffer(avctx, &s->current_frame) < 0) { |
2014 av_log(s->avctx, AV_LOG_ERROR, "vp3: get_buffer() failed\n"); | 1958 av_log(s->avctx, AV_LOG_ERROR, "vp3: get_buffer() failed\n"); |
2038 return -1; | 1982 return -1; |
2039 } | 1983 } |
2040 if (unpack_dct_coeffs(s, &gb)){ | 1984 if (unpack_dct_coeffs(s, &gb)){ |
2041 av_log(s->avctx, AV_LOG_ERROR, "error in unpack_dct_coeffs\n"); | 1985 av_log(s->avctx, AV_LOG_ERROR, "error in unpack_dct_coeffs\n"); |
2042 return -1; | 1986 return -1; |
1987 } | |
1988 | |
1989 for (i = 0; i < 3; i++) { | |
1990 if (s->flipped_image) | |
1991 s->data_offset[i] = 0; | |
1992 else | |
1993 s->data_offset[i] = ((s->height>>!!i)-1) * s->current_frame.linesize[i]; | |
2043 } | 1994 } |
2044 | 1995 |
2045 s->last_slice_end = 0; | 1996 s->last_slice_end = 0; |
2046 for (i = 0; i < s->macroblock_height; i++) | 1997 for (i = 0; i < s->macroblock_height; i++) |
2047 render_slice(s, i); | 1998 render_slice(s, i); |