changeset 8522:f8c091bb5779 libavcodec

Add VDPAU hardware accelerated decoding for H264 which can be used by video players. Original patch by NVIDIA corporation.
author cehoyos
date Sun, 04 Jan 2009 23:55:27 +0000
parents 42179b2cfd09
children 2f94b161f9f7
files Makefile allcodecs.c avcodec.h h264.c h264_parser.c imgconvert.c mpegvideo.c vdpau_internal.h vdpau_render.h vdpauvideo.c
diffstat 10 files changed, 351 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Sun Jan 04 18:58:49 2009 +0000
+++ b/Makefile	Sun Jan 04 23:55:27 2009 +0000
@@ -100,6 +100,7 @@
 OBJS-$(CONFIG_H263P_ENCODER)           += mpegvideo_enc.o motion_est.o ratecontrol.o h263.o mpeg12data.o mpegvideo.o error_resilience.o
 OBJS-$(CONFIG_H264_DECODER)            += h264.o h264idct.o h264pred.o h264_parser.o cabac.o mpegvideo.o error_resilience.o
 OBJS-$(CONFIG_H264_ENCODER)            += h264enc.o h264dspenc.o
+OBJS-$(CONFIG_H264_VDPAU_DECODER)      += vdpauvideo.o
 OBJS-$(CONFIG_HUFFYUV_DECODER)         += huffyuv.o
 OBJS-$(CONFIG_HUFFYUV_ENCODER)         += huffyuv.o
 OBJS-$(CONFIG_IDCIN_DECODER)           += idcinvideo.o
--- a/allcodecs.c	Sun Jan 04 18:58:49 2009 +0000
+++ b/allcodecs.c	Sun Jan 04 23:55:27 2009 +0000
@@ -88,6 +88,7 @@
     REGISTER_DECODER (H263I, h263i);
     REGISTER_ENCODER (H263P, h263p);
     REGISTER_DECODER (H264, h264);
+    REGISTER_DECODER (H264_VDPAU, h264_vdpau);
     REGISTER_ENCDEC  (HUFFYUV, huffyuv);
     REGISTER_DECODER (IDCIN, idcin);
     REGISTER_DECODER (INDEO2, indeo2);
--- a/avcodec.h	Sun Jan 04 18:58:49 2009 +0000
+++ b/avcodec.h	Sun Jan 04 23:55:27 2009 +0000
@@ -191,6 +191,9 @@
     CODEC_ID_TGV,
     CODEC_ID_TGQ,
 
+    /* "codecs" for HW decoding with VDPAU */
+    CODEC_ID_H264_VDPAU= 0x9000,
+
     /* various PCM "codecs" */
     CODEC_ID_PCM_S16LE= 0x10000,
     CODEC_ID_PCM_S16BE,
@@ -527,6 +530,10 @@
  * This can be used to prevent truncation of the last audio samples.
  */
 #define CODEC_CAP_SMALL_LAST_FRAME 0x0040
+/**
+ * Codec can export data for HW decoding (VDPAU).
+ */
+#define CODEC_CAP_HWACCEL_VDPAU    0x0080
 
 //The following defines may change, don't expect compatibility if you use them.
 #define MB_TYPE_INTRA4x4   0x0001
--- a/h264.c	Sun Jan 04 18:58:49 2009 +0000
+++ b/h264.c	Sun Jan 04 23:55:27 2009 +0000
@@ -33,6 +33,7 @@
 #include "h264_parser.h"
 #include "golomb.h"
 #include "rectangle.h"
+#include "vdpau_internal.h"
 
 #include "cabac.h"
 #ifdef ARCH_X86
@@ -2188,6 +2189,8 @@
 
     if(avctx->codec_id == CODEC_ID_SVQ3)
         avctx->pix_fmt= PIX_FMT_YUVJ420P;
+    else if(avctx->codec_id == CODEC_ID_H264_VDPAU)
+        avctx->pix_fmt= PIX_FMT_VDPAU_H264;
     else
         avctx->pix_fmt= PIX_FMT_YUV420P;
 
@@ -7289,6 +7292,8 @@
     H264Context *hx;
     int i;
 
+    if(avctx->codec_id == CODEC_ID_H264_VDPAU)
+        return;
     if(context_count == 1) {
         decode_slice(avctx, &h);
     } else {
@@ -7416,8 +7421,14 @@
                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
-               && avctx->skip_frame < AVDISCARD_ALL)
+               && avctx->skip_frame < AVDISCARD_ALL){
+                if(ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU){
+                    static const uint8_t start_code[] = {0x00, 0x00, 0x01};
+                    ff_VDPAU_h264_add_data_chunk(h, start_code, sizeof(start_code));
+                    ff_VDPAU_h264_add_data_chunk(h, &buf[buf_index - consumed], consumed );
+                }else
                 context_count++;
+            }
             break;
         case NAL_DPA:
             init_get_bits(&hx->s.gb, ptr, bit_length);
@@ -7620,6 +7631,9 @@
         h->prev_frame_num_offset= h->frame_num_offset;
         h->prev_frame_num= h->frame_num;
 
+        if (ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU)
+            ff_VDPAU_h264_picture_complete(h);
+
         /*
          * FIXME: Error handling code does not seem to support interlaced
          * when slices span multiple rows
@@ -7632,7 +7646,7 @@
          * past end by one (callers fault) and resync_mb_y != 0
          * causes problems for the first MB line, too.
          */
-        if (!FIELD_PICTURE)
+        if (!avctx->codec_id == CODEC_ID_H264_VDPAU && !FIELD_PICTURE)
             ff_er_frame_end(s);
 
         MPV_frame_end(s);
@@ -8005,4 +8019,20 @@
     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
 };
 
+#ifdef CONFIG_H264_VDPAU_DECODER
+AVCodec h264_vdpau_decoder = {
+    "h264_vdpau",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_H264_VDPAU,
+    sizeof(H264Context),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
+    .flush= flush_dpb,
+    .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
+};
+#endif
+
 #include "svq3.c"
--- a/h264_parser.c	Sun Jan 04 18:58:49 2009 +0000
+++ b/h264_parser.c	Sun Jan 04 23:55:27 2009 +0000
@@ -161,7 +161,8 @@
 
 
 AVCodecParser h264_parser = {
-    { CODEC_ID_H264 },
+    { CODEC_ID_H264,
+      CODEC_ID_H264_VDPAU },
     sizeof(H264Context),
     NULL,
     h264_parse,
--- a/imgconvert.c	Sun Jan 04 18:58:49 2009 +0000
+++ b/imgconvert.c	Sun Jan 04 23:55:27 2009 +0000
@@ -267,6 +267,9 @@
     [PIX_FMT_XVMC_MPEG2_IDCT] = {
         .name = "xvmcidct",
     },
+    [PIX_FMT_VDPAU_H264] = {
+        .name = "vdpau_h264",
+    },
     [PIX_FMT_UYYVYY411] = {
         .name = "uyyvyy411",
         .nb_channels = 1,
--- a/mpegvideo.c	Sun Jan 04 18:58:49 2009 +0000
+++ b/mpegvideo.c	Sun Jan 04 23:55:27 2009 +0000
@@ -957,7 +957,11 @@
         XVMC_field_end(s);
     }else
 #endif
-    if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
+    if(!(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+       && s->unrestricted_mv
+       && s->current_picture.reference
+       && !s->intra_only
+       && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
             s->dsp.draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
             s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
             s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vdpau_internal.h	Sun Jan 04 23:55:27 2009 +0000
@@ -0,0 +1,31 @@
+/*
+ * Video Decode and Presentation API for UNIX (VDPAU) is used for
+ * HW decode acceleration for MPEG-1/2, H.264 and VC-1.
+ *
+ * Copyright (C) 2008 NVIDIA.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VDPAU_INTERNAL_H
+#define AVCODEC_VDPAU_INTERNAL_H
+
+void ff_VDPAU_h264_add_data_chunk(H264Context *h, const uint8_t *buf,
+                                  int buf_size);
+void ff_VDPAU_h264_picture_complete(H264Context *h);
+
+#endif /* AVCODEC_VDPAU_INTERNAL_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vdpau_render.h	Sun Jan 04 23:55:27 2009 +0000
@@ -0,0 +1,86 @@
+/*
+ * Video Decode and Presentation API for UNIX (VDPAU) is used for
+ * HW decode acceleration for MPEG-1/2, H.264 and VC-1.
+ *
+ * Copyright (C) 2008 NVIDIA.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VDPAU_RENDER_H
+#define AVCODEC_VDPAU_RENDER_H
+
+/**
+ * \defgroup Decoder VDPAU Decoder and Renderer
+ *
+ * VDPAU HW acceleration has two modules
+ * - VDPAU Decoding
+ * - VDPAU Presentation
+ *
+ * VDPAU decoding module parses all headers using FFmpeg
+ * parsing mechanism and uses VDPAU for the actual decoding.
+ *
+ * As per the current implementation, the actual decoding
+ * and rendering (API calls) are done as part of VDPAU
+ * presentation (vo_vdpau.c) module.
+ *
+ * @{
+ * \defgroup  VDPAU_Decoding VDPAU Decoding
+ * \ingroup Decoder
+ * @{
+ */
+
+#include "vdpau/vdpau.h"
+#include "vdpau/vdpau_x11.h"
+
+/**
+ * \brief The videoSurface is used for render.
+ */
+#define FF_VDPAU_STATE_USED_FOR_RENDER 1
+
+/**
+ * \brief The videoSurface is needed for reference/prediction,
+ * codec manipulates this.
+ */
+#define FF_VDPAU_STATE_USED_FOR_REFERENCE 2
+
+/**
+ * \brief This structure is used as a CALL-BACK between the ffmpeg
+ * decoder (vd_) and presentation (vo_) module.
+ * This is used for defining a video-frame containing surface,
+ * picture-parameter, bitstream informations etc which are passed
+ * between ffmpeg decoder and its clients.
+ */
+struct vdpau_render_state
+    VdpVideoSurface surface; ///< used as rendered surface, never changed.
+
+    int state; ///< Holds FF_VDPAU_STATE_* values
+
+    /** Picture Parameter information for all supported codecs */
+    union _VdpPictureInfo {
+        VdpPictureInfoH264     h264;
+    } info;
+
+    /** Describe size/location of the compressed video data */
+    int bitstreamBuffersAlloced;
+    int bitstreamBuffersUsed;
+    VdpBitstreamBuffer *bitstreamBuffers;
+};
+
+/* @}*/
+
+#endif /* AVCODEC_VDPAU_RENDER_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vdpauvideo.c	Sun Jan 04 23:55:27 2009 +0000
@@ -0,0 +1,183 @@
+/*
+ * Video Decode and Presentation API for UNIX (VDPAU) is used for
+ * HW decode acceleration for MPEG-1/2, H.264 and VC-1.
+ *
+ * Copyright (c) 2008 NVIDIA.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <limits.h>
+#include "avcodec.h"
+#include "h264.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+#include "vdpau_render.h"
+#include "vdpau_internal.h"
+
+/**
+ * \addtogroup VDPAU_Decoding
+ *
+ * @{
+ */
+
+static void VDPAU_h264_set_reference_frames(H264Context *h)
+{
+    MpegEncContext * s = &h->s;
+    struct vdpau_render_state * render, * render_ref;
+    VdpReferenceFrameH264 * rf, * rf2;
+    Picture * pic;
+    int i, list, pic_frame_idx;
+
+    render = (struct vdpau_render_state*)s->current_picture_ptr->data[0];
+    assert(render);
+
+    rf = &render->info.h264.referenceFrames[0];
+#define H264_RF_COUNT FF_ARRAY_ELEMS(render->info.h264.referenceFrames)
+
+    for (list = 0; list < 2; ++list) {
+        Picture **lp = list ? h->long_ref : h->short_ref;
+        int ls = list ? h->long_ref_count : h->short_ref_count;
+
+        for (i = 0; i < ls; ++i) {
+            pic = lp[i];
+            if (!pic || !pic->reference)
+                continue;
+            pic_frame_idx = pic->long_ref ? pic->pic_id : pic->frame_num;
+
+            render_ref = (struct vdpau_render_state*)pic->data[0];
+            assert(render_ref);
+
+            rf2 = &render->info.h264.referenceFrames[0];
+            while (rf2 != rf) {
+                if (
+                    (rf2->surface == render_ref->surface)
+                    && (rf2->is_long_term == pic->long_ref)
+                    && (rf2->frame_idx == pic_frame_idx)
+                )
+                    break;
+                ++rf2;
+            }
+            if (rf2 != rf) {
+                rf2->top_is_reference    |= (pic->reference & PICT_TOP_FIELD)    ? VDP_TRUE : VDP_FALSE;
+                rf2->bottom_is_reference |= (pic->reference & PICT_BOTTOM_FIELD) ? VDP_TRUE : VDP_FALSE;
+                continue;
+            }
+
+            if (rf >= &render->info.h264.referenceFrames[H264_RF_COUNT])
+                continue;
+
+            rf->surface             = render_ref->surface;
+            rf->is_long_term        = pic->long_ref;
+            rf->top_is_reference    = (pic->reference & PICT_TOP_FIELD)    ? VDP_TRUE : VDP_FALSE;
+            rf->bottom_is_reference = (pic->reference & PICT_BOTTOM_FIELD) ? VDP_TRUE : VDP_FALSE;
+            rf->field_order_cnt[0]  = pic->field_poc[0];
+            rf->field_order_cnt[1]  = pic->field_poc[1];
+            rf->frame_idx           = pic_frame_idx;
+
+            ++rf;
+        }
+    }
+
+    for (; rf < &render->info.h264.referenceFrames[H264_RF_COUNT]; ++rf) {
+        rf->surface             = VDP_INVALID_HANDLE;
+        rf->is_long_term        = 0;
+        rf->top_is_reference    = 0;
+        rf->bottom_is_reference = 0;
+        rf->field_order_cnt[0]  = 0;
+        rf->field_order_cnt[1]  = 0;
+        rf->frame_idx           = 0;
+    }
+}
+
+void ff_VDPAU_h264_add_data_chunk(H264Context *h, const uint8_t *buf, int buf_size)
+{
+    MpegEncContext * s = &h->s;
+    struct vdpau_render_state * render;
+
+    render = (struct vdpau_render_state*)s->current_picture_ptr->data[0];
+    assert(render);
+
+    if (!render->bitstreamBuffersUsed)
+        VDPAU_h264_set_reference_frames(h);
+
+    render->bitstreamBuffers= av_fast_realloc(
+        render->bitstreamBuffers,
+        &render->bitstreamBuffersAlloced,
+        sizeof(*render->bitstreamBuffers)*(render->bitstreamBuffersUsed + 1)
+    );
+
+    render->bitstreamBuffers[render->bitstreamBuffersUsed].struct_version  = VDP_BITSTREAM_BUFFER_VERSION;
+    render->bitstreamBuffers[render->bitstreamBuffersUsed].bitstream       = buf;
+    render->bitstreamBuffers[render->bitstreamBuffersUsed].bitstream_bytes = buf_size;
+    render->bitstreamBuffersUsed++;
+}
+
+void ff_VDPAU_h264_picture_complete(H264Context *h)
+{
+    MpegEncContext * s = &h->s;
+    struct vdpau_render_state * render;
+
+    render = (struct vdpau_render_state*)s->current_picture_ptr->data[0];
+    assert(render);
+
+    render->info.h264.slice_count = h->slice_num;
+    if (render->info.h264.slice_count < 1)
+        return;
+
+    for (int i = 0; i < 2; ++i) {
+        int foc = s->current_picture_ptr->field_poc[i];
+        if (foc == INT_MAX)
+            foc = 0;
+        render->info.h264.field_order_cnt[i] = foc;
+    }
+
+    render->info.h264.is_reference                           = s->current_picture_ptr->reference ? VDP_TRUE : VDP_FALSE;
+    render->info.h264.frame_num                              = h->frame_num;
+    render->info.h264.field_pic_flag                         = s->picture_structure != PICT_FRAME;
+    render->info.h264.bottom_field_flag                      = s->picture_structure == PICT_BOTTOM_FIELD;
+    render->info.h264.num_ref_frames                         = h->sps.ref_frame_count;
+    render->info.h264.mb_adaptive_frame_field_flag           = h->sps.mb_aff;
+    render->info.h264.constrained_intra_pred_flag            = h->pps.constrained_intra_pred;
+    render->info.h264.weighted_pred_flag                     = h->pps.weighted_pred;
+    render->info.h264.weighted_bipred_idc                    = h->pps.weighted_bipred_idc;
+    render->info.h264.frame_mbs_only_flag                    = h->sps.frame_mbs_only_flag;
+    render->info.h264.transform_8x8_mode_flag                = h->pps.transform_8x8_mode;
+    render->info.h264.chroma_qp_index_offset                 = h->pps.chroma_qp_index_offset[0];
+    render->info.h264.second_chroma_qp_index_offset          = h->pps.chroma_qp_index_offset[1];
+    render->info.h264.pic_init_qp_minus26                    = h->pps.init_qp - 26;
+    render->info.h264.num_ref_idx_l0_active_minus1           = h->pps.ref_count[0] - 1;
+    render->info.h264.num_ref_idx_l1_active_minus1           = h->pps.ref_count[1] - 1;
+    render->info.h264.log2_max_frame_num_minus4              = h->sps.log2_max_frame_num - 4;
+    render->info.h264.pic_order_cnt_type                     = h->sps.poc_type;
+    render->info.h264.log2_max_pic_order_cnt_lsb_minus4      = h->sps.log2_max_poc_lsb - 4;
+    render->info.h264.delta_pic_order_always_zero_flag       = h->sps.delta_pic_order_always_zero_flag;
+    render->info.h264.direct_8x8_inference_flag              = h->sps.direct_8x8_inference_flag;
+    render->info.h264.entropy_coding_mode_flag               = h->pps.cabac;
+    render->info.h264.pic_order_present_flag                 = h->pps.pic_order_present;
+    render->info.h264.deblocking_filter_control_present_flag = h->pps.deblocking_filter_parameters_present;
+    render->info.h264.redundant_pic_cnt_present_flag         = h->pps.redundant_pic_cnt_present;
+    memcpy(render->info.h264.scaling_lists_4x4, h->pps.scaling_matrix4, sizeof(render->info.h264.scaling_lists_4x4));
+    memcpy(render->info.h264.scaling_lists_8x8, h->pps.scaling_matrix8, sizeof(render->info.h264.scaling_lists_8x8));
+
+    ff_draw_horiz_band(s, 0, s->avctx->height);
+    render->bitstreamBuffersUsed = 0;
+}
+
+/* @}*/