Mercurial > libavcodec.hg
annotate vaapi_h264.c @ 11032:01bd040f8607 libavcodec
Unroll main loop so the edge==0 case is seperate.
This allows many things to be simplified away.
h264 decoder is overall 1% faster with a mbaff sample and
0.1% slower with the cathedral sample, probably because the slow loop
filter code must be loaded into the code cache for each first MB of each
row but isnt used for the following MBs.
author | michael |
---|---|
date | Thu, 28 Jan 2010 01:24:25 +0000 |
parents | 776dba50775c |
children | f5ebc14d90f0 |
rev | line source |
---|---|
10732 | 1 /* |
2 * H.264 HW decode acceleration through VA API | |
3 * | |
4 * Copyright (C) 2008-2009 Splitted-Desktop Systems | |
5 * | |
6 * This file is part of FFmpeg. | |
7 * | |
8 * FFmpeg is free software; you can redistribute it and/or | |
9 * modify it under the terms of the GNU Lesser General Public | |
10 * License as published by the Free Software Foundation; either | |
11 * version 2.1 of the License, or (at your option) any later version. | |
12 * | |
13 * FFmpeg is distributed in the hope that it will be useful, | |
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 * Lesser General Public License for more details. | |
17 * | |
18 * You should have received a copy of the GNU Lesser General Public | |
19 * License along with FFmpeg; if not, write to the Free Software | |
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 */ | |
22 | |
23 #include "vaapi_internal.h" | |
24 #include "h264.h" | |
25 | |
26 /** @file | |
27 * This file implements the glue code between FFmpeg's and VA API's | |
28 * structures for H.264 decoding. | |
29 */ | |
30 | |
31 /** | |
32 * Initializes an empty VA API picture. | |
33 * | |
34 * VA API requires a fixed-size reference picture array. | |
35 */ | |
36 static void init_vaapi_pic(VAPictureH264 *va_pic) | |
37 { | |
38 va_pic->picture_id = VA_INVALID_ID; | |
39 va_pic->flags = VA_PICTURE_H264_INVALID; | |
40 va_pic->TopFieldOrderCnt = 0; | |
41 va_pic->BottomFieldOrderCnt = 0; | |
42 } | |
43 | |
44 /** | |
45 * Translates an FFmpeg Picture into its VA API form. | |
46 * | |
47 * @param[out] va_pic A pointer to VA API's own picture struct | |
48 * @param[in] pic A pointer to the FFmpeg picture struct to convert | |
49 * @param[in] pic_structure The picture field type (as defined in mpegvideo.h), | |
50 * supersedes pic's field type if nonzero. | |
51 */ | |
52 static void fill_vaapi_pic(VAPictureH264 *va_pic, | |
53 Picture *pic, | |
54 int pic_structure) | |
55 { | |
56 if (pic_structure == 0) | |
57 pic_structure = pic->reference; | |
58 | |
59 va_pic->picture_id = ff_vaapi_get_surface_id(pic); | |
60 va_pic->frame_idx = pic->long_ref ? pic->pic_id : pic->frame_num; | |
61 | |
62 va_pic->flags = 0; | |
63 if (pic_structure != PICT_FRAME) | |
64 va_pic->flags |= (pic_structure & PICT_TOP_FIELD) ? VA_PICTURE_H264_TOP_FIELD : VA_PICTURE_H264_BOTTOM_FIELD; | |
65 if (pic->reference) | |
66 va_pic->flags |= pic->long_ref ? VA_PICTURE_H264_LONG_TERM_REFERENCE : VA_PICTURE_H264_SHORT_TERM_REFERENCE; | |
67 | |
68 va_pic->TopFieldOrderCnt = 0; | |
69 if (pic->field_poc[0] != INT_MAX) | |
70 va_pic->TopFieldOrderCnt = pic->field_poc[0]; | |
71 | |
72 va_pic->BottomFieldOrderCnt = 0; | |
73 if (pic->field_poc[1] != INT_MAX) | |
74 va_pic->BottomFieldOrderCnt = pic->field_poc[1]; | |
75 } | |
76 | |
77 /** Decoded Picture Buffer (DPB). */ | |
78 typedef struct DPB { | |
79 int size; ///< Current number of reference frames in the DPB | |
80 int max_size; ///< Max number of reference frames. This is FF_ARRAY_ELEMS(VAPictureParameterBufferH264.ReferenceFrames) | |
81 VAPictureH264 *va_pics; ///< Pointer to VAPictureParameterBufferH264.ReferenceFrames array | |
82 } DPB; | |
83 | |
84 /** | |
85 * Appends picture to the decoded picture buffer, in a VA API form that | |
86 * merges the second field picture attributes with the first, if | |
87 * available. The decoded picture buffer's size must be large enough | |
88 * to receive the new VA API picture object. | |
89 */ | |
90 static int dpb_add(DPB *dpb, Picture *pic) | |
91 { | |
92 int i; | |
93 | |
94 if (dpb->size >= dpb->max_size) | |
95 return -1; | |
96 | |
97 for (i = 0; i < dpb->size; i++) { | |
98 VAPictureH264 * const va_pic = &dpb->va_pics[i]; | |
99 if (va_pic->picture_id == ff_vaapi_get_surface_id(pic)) { | |
100 VAPictureH264 temp_va_pic; | |
101 fill_vaapi_pic(&temp_va_pic, pic, 0); | |
102 | |
103 if ((temp_va_pic.flags ^ va_pic->flags) & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD)) { | |
104 va_pic->flags |= temp_va_pic.flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD); | |
105 /* Merge second field */ | |
106 if (temp_va_pic.flags & VA_PICTURE_H264_TOP_FIELD) { | |
107 va_pic->TopFieldOrderCnt = temp_va_pic.TopFieldOrderCnt; | |
108 } else { | |
109 va_pic->BottomFieldOrderCnt = temp_va_pic.BottomFieldOrderCnt; | |
110 } | |
111 } | |
112 return 0; | |
113 } | |
114 } | |
115 | |
116 fill_vaapi_pic(&dpb->va_pics[dpb->size++], pic, 0); | |
117 return 0; | |
118 } | |
119 | |
120 /** Fills in VA API reference frames array. */ | |
121 static int fill_vaapi_ReferenceFrames(VAPictureParameterBufferH264 *pic_param, | |
122 H264Context *h) | |
123 { | |
124 DPB dpb; | |
125 int i; | |
126 | |
127 dpb.size = 0; | |
128 dpb.max_size = FF_ARRAY_ELEMS(pic_param->ReferenceFrames); | |
129 dpb.va_pics = pic_param->ReferenceFrames; | |
130 for (i = 0; i < dpb.max_size; i++) | |
131 init_vaapi_pic(&dpb.va_pics[i]); | |
132 | |
133 for (i = 0; i < h->short_ref_count; i++) { | |
134 Picture * const pic = h->short_ref[i]; | |
135 if (pic && pic->reference && dpb_add(&dpb, pic) < 0) | |
136 return -1; | |
137 } | |
138 | |
139 for (i = 0; i < h->long_ref_count; i++) { | |
140 Picture * const pic = h->long_ref[i]; | |
141 if (pic && pic->reference && dpb_add(&dpb, pic) < 0) | |
142 return -1; | |
143 } | |
144 return 0; | |
145 } | |
146 | |
147 /** | |
148 * Fills in VA API reference picture lists from the FFmpeg reference | |
149 * picture list. | |
150 * | |
151 * @param[out] RefPicList VA API internal reference picture list | |
152 * @param[in] ref_list A pointer to the FFmpeg reference list | |
153 * @param[in] ref_count The number of reference pictures in ref_list | |
154 */ | |
155 static void fill_vaapi_RefPicList(VAPictureH264 RefPicList[32], | |
156 Picture *ref_list, | |
157 unsigned int ref_count) | |
158 { | |
159 unsigned int i, n = 0; | |
160 for (i = 0; i < ref_count; i++) | |
161 if (ref_list[i].reference) | |
162 fill_vaapi_pic(&RefPicList[n++], &ref_list[i], 0); | |
163 | |
164 for (; n < 32; n++) | |
165 init_vaapi_pic(&RefPicList[n]); | |
166 } | |
167 | |
168 /** | |
169 * Fills in prediction weight table. | |
170 * | |
171 * VA API requires a plain prediction weight table as it does not infer | |
172 * any value. | |
173 * | |
174 * @param[in] h A pointer to the current H.264 context | |
175 * @param[in] list The reference frame list index to use | |
176 * @param[out] luma_weight_flag VA API plain luma weight flag | |
177 * @param[out] luma_weight VA API plain luma weight table | |
178 * @param[out] luma_offset VA API plain luma offset table | |
179 * @param[out] chroma_weight_flag VA API plain chroma weight flag | |
180 * @param[out] chroma_weight VA API plain chroma weight table | |
181 * @param[out] chroma_offset VA API plain chroma offset table | |
182 */ | |
183 static void fill_vaapi_plain_pred_weight_table(H264Context *h, | |
184 int list, | |
185 unsigned char *luma_weight_flag, | |
186 short luma_weight[32], | |
187 short luma_offset[32], | |
188 unsigned char *chroma_weight_flag, | |
189 short chroma_weight[32][2], | |
190 short chroma_offset[32][2]) | |
191 { | |
192 unsigned int i, j; | |
193 | |
194 *luma_weight_flag = h->luma_weight_flag[list]; | |
195 *chroma_weight_flag = h->chroma_weight_flag[list]; | |
196 | |
197 for (i = 0; i < h->ref_count[list]; i++) { | |
198 /* VA API also wants the inferred (default) values, not | |
199 only what is available in the bitstream (7.4.3.2). */ | |
200 if (h->luma_weight_flag[list]) { | |
201 luma_weight[i] = h->luma_weight[list][i]; | |
202 luma_offset[i] = h->luma_offset[list][i]; | |
203 } else { | |
204 luma_weight[i] = 1 << h->luma_log2_weight_denom; | |
205 luma_offset[i] = 0; | |
206 } | |
207 for (j = 0; j < 2; j++) { | |
208 if (h->chroma_weight_flag[list]) { | |
209 chroma_weight[i][j] = h->chroma_weight[list][i][j]; | |
210 chroma_offset[i][j] = h->chroma_offset[list][i][j]; | |
211 } else { | |
212 chroma_weight[i][j] = 1 << h->chroma_log2_weight_denom; | |
213 chroma_offset[i][j] = 0; | |
214 } | |
215 } | |
216 } | |
217 } | |
218 | |
219 /** Initializes and starts decoding a frame with VA API. */ | |
220 static int start_frame(AVCodecContext *avctx, | |
221 av_unused const uint8_t *buffer, | |
222 av_unused uint32_t size) | |
223 { | |
224 H264Context * const h = avctx->priv_data; | |
225 MpegEncContext * const s = &h->s; | |
226 struct vaapi_context * const vactx = avctx->hwaccel_context; | |
227 VAPictureParameterBufferH264 *pic_param; | |
228 VAIQMatrixBufferH264 *iq_matrix; | |
229 | |
230 dprintf(avctx, "start_frame()\n"); | |
231 | |
232 vactx->slice_param_size = sizeof(VASliceParameterBufferH264); | |
233 | |
234 /* Fill in VAPictureParameterBufferH264. */ | |
235 pic_param = ff_vaapi_alloc_pic_param(vactx, sizeof(VAPictureParameterBufferH264)); | |
236 if (!pic_param) | |
237 return -1; | |
238 fill_vaapi_pic(&pic_param->CurrPic, s->current_picture_ptr, s->picture_structure); | |
239 if (fill_vaapi_ReferenceFrames(pic_param, h) < 0) | |
240 return -1; | |
241 pic_param->picture_width_in_mbs_minus1 = s->mb_width - 1; | |
242 pic_param->picture_height_in_mbs_minus1 = s->mb_height - 1; | |
243 pic_param->bit_depth_luma_minus8 = h->sps.bit_depth_luma - 8; | |
244 pic_param->bit_depth_chroma_minus8 = h->sps.bit_depth_chroma - 8; | |
245 pic_param->num_ref_frames = h->sps.ref_frame_count; | |
246 pic_param->seq_fields.value = 0; /* reset all bits */ | |
247 pic_param->seq_fields.bits.chroma_format_idc = h->sps.chroma_format_idc; | |
248 pic_param->seq_fields.bits.residual_colour_transform_flag = h->sps.residual_color_transform_flag; /* XXX: only for 4:4:4 high profile? */ | |
249 pic_param->seq_fields.bits.gaps_in_frame_num_value_allowed_flag = h->sps.gaps_in_frame_num_allowed_flag; | |
250 pic_param->seq_fields.bits.frame_mbs_only_flag = h->sps.frame_mbs_only_flag; | |
251 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag = h->sps.mb_aff; | |
252 pic_param->seq_fields.bits.direct_8x8_inference_flag = h->sps.direct_8x8_inference_flag; | |
253 pic_param->seq_fields.bits.MinLumaBiPredSize8x8 = h->sps.level_idc >= 31; /* A.3.3.2 */ | |
254 pic_param->seq_fields.bits.log2_max_frame_num_minus4 = h->sps.log2_max_frame_num - 4; | |
255 pic_param->seq_fields.bits.pic_order_cnt_type = h->sps.poc_type; | |
256 pic_param->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4 = h->sps.log2_max_poc_lsb - 4; | |
257 pic_param->seq_fields.bits.delta_pic_order_always_zero_flag = h->sps.delta_pic_order_always_zero_flag; | |
258 pic_param->num_slice_groups_minus1 = h->pps.slice_group_count - 1; | |
259 pic_param->slice_group_map_type = h->pps.mb_slice_group_map_type; | |
260 pic_param->slice_group_change_rate_minus1 = 0; /* XXX: unimplemented in FFmpeg */ | |
261 pic_param->pic_init_qp_minus26 = h->pps.init_qp - 26; | |
262 pic_param->pic_init_qs_minus26 = h->pps.init_qs - 26; | |
263 pic_param->chroma_qp_index_offset = h->pps.chroma_qp_index_offset[0]; | |
264 pic_param->second_chroma_qp_index_offset = h->pps.chroma_qp_index_offset[1]; | |
265 pic_param->pic_fields.value = 0; /* reset all bits */ | |
266 pic_param->pic_fields.bits.entropy_coding_mode_flag = h->pps.cabac; | |
267 pic_param->pic_fields.bits.weighted_pred_flag = h->pps.weighted_pred; | |
268 pic_param->pic_fields.bits.weighted_bipred_idc = h->pps.weighted_bipred_idc; | |
269 pic_param->pic_fields.bits.transform_8x8_mode_flag = h->pps.transform_8x8_mode; | |
270 pic_param->pic_fields.bits.field_pic_flag = s->picture_structure != PICT_FRAME; | |
271 pic_param->pic_fields.bits.constrained_intra_pred_flag = h->pps.constrained_intra_pred; | |
272 pic_param->pic_fields.bits.pic_order_present_flag = h->pps.pic_order_present; | |
273 pic_param->pic_fields.bits.deblocking_filter_control_present_flag = h->pps.deblocking_filter_parameters_present; | |
274 pic_param->pic_fields.bits.redundant_pic_cnt_present_flag = h->pps.redundant_pic_cnt_present; | |
275 pic_param->pic_fields.bits.reference_pic_flag = h->nal_ref_idc != 0; | |
276 pic_param->frame_num = h->frame_num; | |
277 | |
278 /* Fill in VAIQMatrixBufferH264. */ | |
279 iq_matrix = ff_vaapi_alloc_iq_matrix(vactx, sizeof(VAIQMatrixBufferH264)); | |
280 if (!iq_matrix) | |
281 return -1; | |
282 memcpy(iq_matrix->ScalingList4x4, h->pps.scaling_matrix4, sizeof(iq_matrix->ScalingList4x4)); | |
283 memcpy(iq_matrix->ScalingList8x8, h->pps.scaling_matrix8, sizeof(iq_matrix->ScalingList8x8)); | |
284 return 0; | |
285 } | |
286 | |
287 /** Ends a hardware decoding based frame. */ | |
288 static int end_frame(AVCodecContext *avctx) | |
289 { | |
290 H264Context * const h = avctx->priv_data; | |
291 | |
292 dprintf(avctx, "end_frame()\n"); | |
293 return ff_vaapi_common_end_frame(&h->s); | |
294 } | |
295 | |
296 /** Decodes the given H.264 slice with VA API. */ | |
297 static int decode_slice(AVCodecContext *avctx, | |
298 const uint8_t *buffer, | |
299 uint32_t size) | |
300 { | |
301 H264Context * const h = avctx->priv_data; | |
302 MpegEncContext * const s = &h->s; | |
303 VASliceParameterBufferH264 *slice_param; | |
304 | |
305 dprintf(avctx, "decode_slice(): buffer %p, size %d\n", buffer, size); | |
306 | |
307 /* Fill in VASliceParameterBufferH264. */ | |
308 slice_param = (VASliceParameterBufferH264 *)ff_vaapi_alloc_slice(avctx->hwaccel_context, buffer, size); | |
309 if (!slice_param) | |
310 return -1; | |
311 slice_param->slice_data_bit_offset = get_bits_count(&h->s.gb) + 8; /* bit buffer started beyond nal_unit_type */ | |
312 slice_param->first_mb_in_slice = (s->mb_y >> FIELD_OR_MBAFF_PICTURE) * s->mb_width + s->mb_x; | |
10829
46a247e54d6e
Export get_slice_type() as ff_h264_get_slice_type().
ramiro
parents:
10732
diff
changeset
|
313 slice_param->slice_type = ff_h264_get_slice_type(h); |
10732 | 314 slice_param->direct_spatial_mv_pred_flag = h->slice_type == FF_B_TYPE ? h->direct_spatial_mv_pred : 0; |
315 slice_param->num_ref_idx_l0_active_minus1 = h->list_count > 0 ? h->ref_count[0] - 1 : 0; | |
316 slice_param->num_ref_idx_l1_active_minus1 = h->list_count > 1 ? h->ref_count[1] - 1 : 0; | |
317 slice_param->cabac_init_idc = h->cabac_init_idc; | |
318 slice_param->slice_qp_delta = s->qscale - h->pps.init_qp; | |
319 slice_param->disable_deblocking_filter_idc = h->deblocking_filter < 2 ? !h->deblocking_filter : h->deblocking_filter; | |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10829
diff
changeset
|
320 slice_param->slice_alpha_c0_offset_div2 = h->slice_alpha_c0_offset / 2 - 26; |
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10829
diff
changeset
|
321 slice_param->slice_beta_offset_div2 = h->slice_beta_offset / 2 - 26; |
10732 | 322 slice_param->luma_log2_weight_denom = h->luma_log2_weight_denom; |
323 slice_param->chroma_log2_weight_denom = h->chroma_log2_weight_denom; | |
324 | |
325 fill_vaapi_RefPicList(slice_param->RefPicList0, h->ref_list[0], h->list_count > 0 ? h->ref_count[0] : 0); | |
326 fill_vaapi_RefPicList(slice_param->RefPicList1, h->ref_list[1], h->list_count > 1 ? h->ref_count[1] : 0); | |
327 | |
328 fill_vaapi_plain_pred_weight_table(h, 0, | |
329 &slice_param->luma_weight_l0_flag, slice_param->luma_weight_l0, slice_param->luma_offset_l0, | |
330 &slice_param->chroma_weight_l0_flag, slice_param->chroma_weight_l0, slice_param->chroma_offset_l0); | |
331 fill_vaapi_plain_pred_weight_table(h, 1, | |
332 &slice_param->luma_weight_l1_flag, slice_param->luma_weight_l1, slice_param->luma_offset_l1, | |
333 &slice_param->chroma_weight_l1_flag, slice_param->chroma_weight_l1, slice_param->chroma_offset_l1); | |
334 return 0; | |
335 } | |
336 | |
337 AVHWAccel h264_vaapi_hwaccel = { | |
338 .name = "h264_vaapi", | |
339 .type = CODEC_TYPE_VIDEO, | |
340 .id = CODEC_ID_H264, | |
341 .pix_fmt = PIX_FMT_VAAPI_VLD, | |
342 .capabilities = 0, | |
343 .start_frame = start_frame, | |
344 .end_frame = end_frame, | |
345 .decode_slice = decode_slice, | |
346 .priv_data_size = 0, | |
347 }; |