Mercurial > libavcodec.hg
annotate h264.h @ 11285:613370892df2 libavcodec
Store intra4x4_pred_mode per row only.
about 5 cpu cycles slower in the local code but should be overall faster
due to reduced cache use. (my sample though has too few intra4x4 blocks
for this to be meassureable easily either way)
author | michael |
---|---|
date | Thu, 25 Feb 2010 14:02:39 +0000 |
parents | aaca4b58880f |
children | db94c9bc5694 |
rev | line source |
---|---|
4975 | 1 /* |
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder | |
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 /** | |
8718
e9d9d946f213
Use full internal pathname in doxygen @file directives.
diego
parents:
8596
diff
changeset
|
23 * @file libavcodec/h264.h |
4975 | 24 * H.264 / AVC / MPEG4 part10 codec. |
25 * @author Michael Niedermayer <michaelni@gmx.at> | |
26 */ | |
27 | |
7760 | 28 #ifndef AVCODEC_H264_H |
29 #define AVCODEC_H264_H | |
4975 | 30 |
11203 | 31 #include "libavutil/intreadwrite.h" |
4975 | 32 #include "dsputil.h" |
33 #include "cabac.h" | |
34 #include "mpegvideo.h" | |
5638
4a26dc4ca11d
Move H.264 intra prediction functions into their own context
kostya
parents:
5231
diff
changeset
|
35 #include "h264pred.h" |
10866 | 36 #include "rectangle.h" |
4975 | 37 |
38 #define interlaced_dct interlaced_dct_is_a_bad_name | |
5129 | 39 #define mb_intra mb_intra_is_not_initialized_see_mb_type |
4975 | 40 |
41 #define LUMA_DC_BLOCK_INDEX 25 | |
42 #define CHROMA_DC_BLOCK_INDEX 26 | |
43 | |
44 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 | |
45 #define COEFF_TOKEN_VLC_BITS 8 | |
46 #define TOTAL_ZEROS_VLC_BITS 9 | |
47 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3 | |
48 #define RUN_VLC_BITS 3 | |
49 #define RUN7_VLC_BITS 6 | |
50 | |
51 #define MAX_SPS_COUNT 32 | |
52 #define MAX_PPS_COUNT 256 | |
53 | |
54 #define MAX_MMCO_COUNT 66 | |
55 | |
7283
39e81fdd8d23
Move the size of the H264Context delayed_pic array into a #define.
astrange
parents:
7178
diff
changeset
|
56 #define MAX_DELAYED_PIC_COUNT 16 |
39e81fdd8d23
Move the size of the H264Context delayed_pic array into a #define.
astrange
parents:
7178
diff
changeset
|
57 |
4975 | 58 /* Compiling in interlaced support reduces the speed |
59 * of progressive decoding by about 2%. */ | |
60 #define ALLOW_INTERLACE | |
61 | |
7407 | 62 #define ALLOW_NOCHROMA |
63 | |
11282 | 64 #define FMO 0 |
65 | |
8079
2d3c7cd7d143
Replace literally hardcoded max slice number by named constant.
michael
parents:
8025
diff
changeset
|
66 /** |
2d3c7cd7d143
Replace literally hardcoded max slice number by named constant.
michael
parents:
8025
diff
changeset
|
67 * The maximum number of slices supported by the decoder. |
2d3c7cd7d143
Replace literally hardcoded max slice number by named constant.
michael
parents:
8025
diff
changeset
|
68 * must be a power of 2 |
2d3c7cd7d143
Replace literally hardcoded max slice number by named constant.
michael
parents:
8025
diff
changeset
|
69 */ |
2d3c7cd7d143
Replace literally hardcoded max slice number by named constant.
michael
parents:
8025
diff
changeset
|
70 #define MAX_SLICES 16 |
2d3c7cd7d143
Replace literally hardcoded max slice number by named constant.
michael
parents:
8025
diff
changeset
|
71 |
4975 | 72 #ifdef ALLOW_INTERLACE |
73 #define MB_MBAFF h->mb_mbaff | |
74 #define MB_FIELD h->mb_field_decoding_flag | |
75 #define FRAME_MBAFF h->mb_aff_frame | |
5801 | 76 #define FIELD_PICTURE (s->picture_structure != PICT_FRAME) |
4975 | 77 #else |
78 #define MB_MBAFF 0 | |
79 #define MB_FIELD 0 | |
80 #define FRAME_MBAFF 0 | |
5767 | 81 #define FIELD_PICTURE 0 |
4975 | 82 #undef IS_INTERLACED |
83 #define IS_INTERLACED(mb_type) 0 | |
84 #endif | |
5781
0b3aa6f4c313
Modifies macroblock addressing and current macroblock y-position for field decoding.
andoma
parents:
5772
diff
changeset
|
85 #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE) |
4975 | 86 |
7407 | 87 #ifdef ALLOW_NOCHROMA |
88 #define CHROMA h->sps.chroma_format_idc | |
89 #else | |
90 #define CHROMA 1 | |
91 #endif | |
92 | |
10893
2aafcafbe1f0
Replace cabac checks in inline functions from h264.h with constants.
michael
parents:
10883
diff
changeset
|
93 #ifndef CABAC |
2aafcafbe1f0
Replace cabac checks in inline functions from h264.h with constants.
michael
parents:
10883
diff
changeset
|
94 #define CABAC h->pps.cabac |
2aafcafbe1f0
Replace cabac checks in inline functions from h264.h with constants.
michael
parents:
10883
diff
changeset
|
95 #endif |
2aafcafbe1f0
Replace cabac checks in inline functions from h264.h with constants.
michael
parents:
10883
diff
changeset
|
96 |
7988
a7dfe657968d
Move nal unit types enum and EXTENDED_SAR #define from h264data.h to h264.h.
michael
parents:
7906
diff
changeset
|
97 #define EXTENDED_SAR 255 |
a7dfe657968d
Move nal unit types enum and EXTENDED_SAR #define from h264data.h to h264.h.
michael
parents:
7906
diff
changeset
|
98 |
8207
2f35f9781c31
Move #defines that are mostly used in h264.c out of h264data.h and into h264.h.
diego
parents:
8107
diff
changeset
|
99 #define MB_TYPE_REF0 MB_TYPE_ACPRED //dirty but it fits in 16 bit |
2f35f9781c31
Move #defines that are mostly used in h264.c out of h264data.h and into h264.h.
diego
parents:
8107
diff
changeset
|
100 #define MB_TYPE_8x8DCT 0x01000000 |
2f35f9781c31
Move #defines that are mostly used in h264.c out of h264data.h and into h264.h.
diego
parents:
8107
diff
changeset
|
101 #define IS_REF0(a) ((a) & MB_TYPE_REF0) |
2f35f9781c31
Move #defines that are mostly used in h264.c out of h264data.h and into h264.h.
diego
parents:
8107
diff
changeset
|
102 #define IS_8x8DCT(a) ((a) & MB_TYPE_8x8DCT) |
2f35f9781c31
Move #defines that are mostly used in h264.c out of h264data.h and into h264.h.
diego
parents:
8107
diff
changeset
|
103 |
10862
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
104 /** |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
105 * Value of Picture.reference when Picture is not a reference picture, but |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
106 * is held for delayed output. |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
107 */ |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
108 #define DELAYED_PIC_REF 4 |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
109 |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
110 |
7988
a7dfe657968d
Move nal unit types enum and EXTENDED_SAR #define from h264data.h to h264.h.
michael
parents:
7906
diff
changeset
|
111 /* NAL unit types */ |
a7dfe657968d
Move nal unit types enum and EXTENDED_SAR #define from h264data.h to h264.h.
michael
parents:
7906
diff
changeset
|
112 enum { |
7990 | 113 NAL_SLICE=1, |
114 NAL_DPA, | |
115 NAL_DPB, | |
116 NAL_DPC, | |
117 NAL_IDR_SLICE, | |
118 NAL_SEI, | |
119 NAL_SPS, | |
120 NAL_PPS, | |
121 NAL_AUD, | |
122 NAL_END_SEQUENCE, | |
123 NAL_END_STREAM, | |
124 NAL_FILLER_DATA, | |
125 NAL_SPS_EXT, | |
126 NAL_AUXILIARY_SLICE=19 | |
7988
a7dfe657968d
Move nal unit types enum and EXTENDED_SAR #define from h264data.h to h264.h.
michael
parents:
7906
diff
changeset
|
127 }; |
a7dfe657968d
Move nal unit types enum and EXTENDED_SAR #define from h264data.h to h264.h.
michael
parents:
7906
diff
changeset
|
128 |
4975 | 129 /** |
8762
4f1567ce75c4
Replace hard-coded SEI type constants with symbolic names
superdump
parents:
8740
diff
changeset
|
130 * SEI message types |
4f1567ce75c4
Replace hard-coded SEI type constants with symbolic names
superdump
parents:
8740
diff
changeset
|
131 */ |
4f1567ce75c4
Replace hard-coded SEI type constants with symbolic names
superdump
parents:
8740
diff
changeset
|
132 typedef enum { |
8972 | 133 SEI_BUFFERING_PERIOD = 0, ///< buffering period (H.264, D.1.1) |
8762
4f1567ce75c4
Replace hard-coded SEI type constants with symbolic names
superdump
parents:
8740
diff
changeset
|
134 SEI_TYPE_PIC_TIMING = 1, ///< picture timing |
4f1567ce75c4
Replace hard-coded SEI type constants with symbolic names
superdump
parents:
8740
diff
changeset
|
135 SEI_TYPE_USER_DATA_UNREGISTERED = 5, ///< unregistered user data |
4f1567ce75c4
Replace hard-coded SEI type constants with symbolic names
superdump
parents:
8740
diff
changeset
|
136 SEI_TYPE_RECOVERY_POINT = 6 ///< recovery point (frame # to decoder sync) |
4f1567ce75c4
Replace hard-coded SEI type constants with symbolic names
superdump
parents:
8740
diff
changeset
|
137 } SEI_Type; |
4f1567ce75c4
Replace hard-coded SEI type constants with symbolic names
superdump
parents:
8740
diff
changeset
|
138 |
4f1567ce75c4
Replace hard-coded SEI type constants with symbolic names
superdump
parents:
8740
diff
changeset
|
139 /** |
8107
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
140 * pic_struct in picture timing SEI message |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
141 */ |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
142 typedef enum { |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
143 SEI_PIC_STRUCT_FRAME = 0, ///< 0: %frame |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
144 SEI_PIC_STRUCT_TOP_FIELD = 1, ///< 1: top field |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
145 SEI_PIC_STRUCT_BOTTOM_FIELD = 2, ///< 2: bottom field |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
146 SEI_PIC_STRUCT_TOP_BOTTOM = 3, ///< 3: top field, bottom field, in that order |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
147 SEI_PIC_STRUCT_BOTTOM_TOP = 4, ///< 4: bottom field, top field, in that order |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
148 SEI_PIC_STRUCT_TOP_BOTTOM_TOP = 5, ///< 5: top field, bottom field, top field repeated, in that order |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
149 SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM = 6, ///< 6: bottom field, top field, bottom field repeated, in that order |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
150 SEI_PIC_STRUCT_FRAME_DOUBLING = 7, ///< 7: %frame doubling |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
151 SEI_PIC_STRUCT_FRAME_TRIPLING = 8 ///< 8: %frame tripling |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
152 } SEI_PicStructType; |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
153 |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
154 /** |
4975 | 155 * Sequence parameter set |
156 */ | |
157 typedef struct SPS{ | |
158 | |
159 int profile_idc; | |
160 int level_idc; | |
7407 | 161 int chroma_format_idc; |
4975 | 162 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag |
163 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4 | |
164 int poc_type; ///< pic_order_cnt_type | |
165 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4 | |
166 int delta_pic_order_always_zero_flag; | |
167 int offset_for_non_ref_pic; | |
168 int offset_for_top_to_bottom_field; | |
169 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle | |
170 int ref_frame_count; ///< num_ref_frames | |
171 int gaps_in_frame_num_allowed_flag; | |
5755
b45894d869da
Cosmetic preparations for h264/PAFF implementation.
andoma
parents:
5642
diff
changeset
|
172 int mb_width; ///< pic_width_in_mbs_minus1 + 1 |
b45894d869da
Cosmetic preparations for h264/PAFF implementation.
andoma
parents:
5642
diff
changeset
|
173 int mb_height; ///< pic_height_in_map_units_minus1 + 1 |
4975 | 174 int frame_mbs_only_flag; |
175 int mb_aff; ///<mb_adaptive_frame_field_flag | |
176 int direct_8x8_inference_flag; | |
177 int crop; ///< frame_cropping_flag | |
6582
7d40da88f0c7
Crop parameters are unsigned, having them negative could be bad and lead
michael
parents:
6488
diff
changeset
|
178 unsigned int crop_left; ///< frame_cropping_rect_left_offset |
7d40da88f0c7
Crop parameters are unsigned, having them negative could be bad and lead
michael
parents:
6488
diff
changeset
|
179 unsigned int crop_right; ///< frame_cropping_rect_right_offset |
7d40da88f0c7
Crop parameters are unsigned, having them negative could be bad and lead
michael
parents:
6488
diff
changeset
|
180 unsigned int crop_top; ///< frame_cropping_rect_top_offset |
7d40da88f0c7
Crop parameters are unsigned, having them negative could be bad and lead
michael
parents:
6488
diff
changeset
|
181 unsigned int crop_bottom; ///< frame_cropping_rect_bottom_offset |
4975 | 182 int vui_parameters_present_flag; |
183 AVRational sar; | |
10837
e5905bfa625d
Export fullrange flag and color information for h.264
conrad
parents:
10829
diff
changeset
|
184 int video_signal_type_present_flag; |
e5905bfa625d
Export fullrange flag and color information for h.264
conrad
parents:
10829
diff
changeset
|
185 int full_range; |
e5905bfa625d
Export fullrange flag and color information for h.264
conrad
parents:
10829
diff
changeset
|
186 int colour_description_present_flag; |
e5905bfa625d
Export fullrange flag and color information for h.264
conrad
parents:
10829
diff
changeset
|
187 enum AVColorPrimaries color_primaries; |
e5905bfa625d
Export fullrange flag and color information for h.264
conrad
parents:
10829
diff
changeset
|
188 enum AVColorTransferCharacteristic color_trc; |
e5905bfa625d
Export fullrange flag and color information for h.264
conrad
parents:
10829
diff
changeset
|
189 enum AVColorSpace colorspace; |
4975 | 190 int timing_info_present_flag; |
191 uint32_t num_units_in_tick; | |
192 uint32_t time_scale; | |
193 int fixed_frame_rate_flag; | |
194 short offset_for_ref_frame[256]; //FIXME dyn aloc? | |
195 int bitstream_restriction_flag; | |
196 int num_reorder_frames; | |
197 int scaling_matrix_present; | |
198 uint8_t scaling_matrix4[6][16]; | |
199 uint8_t scaling_matrix8[2][64]; | |
8107
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
200 int nal_hrd_parameters_present_flag; |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
201 int vcl_hrd_parameters_present_flag; |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
202 int pic_struct_present_flag; |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
203 int time_offset_length; |
8970 | 204 int cpb_cnt; ///< See H.264 E.1.2 |
8965 | 205 int initial_cpb_removal_delay_length; ///< initial_cpb_removal_delay_length_minus1 +1 |
8107
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
206 int cpb_removal_delay_length; ///< cpb_removal_delay_length_minus1 + 1 |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
207 int dpb_output_delay_length; ///< dpb_output_delay_length_minus1 + 1 |
8735
5d7ebbb7e91b
Add fields to H264Context and SPS for upcoming VA API support.
cehoyos
parents:
8718
diff
changeset
|
208 int bit_depth_luma; ///< bit_depth_luma_minus8 + 8 |
5d7ebbb7e91b
Add fields to H264Context and SPS for upcoming VA API support.
cehoyos
parents:
8718
diff
changeset
|
209 int bit_depth_chroma; ///< bit_depth_chroma_minus8 + 8 |
5d7ebbb7e91b
Add fields to H264Context and SPS for upcoming VA API support.
cehoyos
parents:
8718
diff
changeset
|
210 int residual_color_transform_flag; ///< residual_colour_transform_flag |
4975 | 211 }SPS; |
212 | |
213 /** | |
214 * Picture parameter set | |
215 */ | |
216 typedef struct PPS{ | |
217 unsigned int sps_id; | |
218 int cabac; ///< entropy_coding_mode_flag | |
219 int pic_order_present; ///< pic_order_present_flag | |
220 int slice_group_count; ///< num_slice_groups_minus1 + 1 | |
221 int mb_slice_group_map_type; | |
222 unsigned int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1 | |
223 int weighted_pred; ///< weighted_pred_flag | |
224 int weighted_bipred_idc; | |
225 int init_qp; ///< pic_init_qp_minus26 + 26 | |
226 int init_qs; ///< pic_init_qs_minus26 + 26 | |
5231
07a97575d0c4
Add support for streams with different chroma_qp_index_offset
gpoirier
parents:
5226
diff
changeset
|
227 int chroma_qp_index_offset[2]; |
4975 | 228 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag |
229 int constrained_intra_pred; ///< constrained_intra_pred_flag | |
230 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag | |
231 int transform_8x8_mode; ///< transform_8x8_mode_flag | |
232 uint8_t scaling_matrix4[6][16]; | |
233 uint8_t scaling_matrix8[2][64]; | |
7347
612a78c3b128
qscale has a range of 0..51 we thus do not need a 256 entry table and neither need
michael
parents:
7343
diff
changeset
|
234 uint8_t chroma_qp_table[2][64]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table |
5231
07a97575d0c4
Add support for streams with different chroma_qp_index_offset
gpoirier
parents:
5226
diff
changeset
|
235 int chroma_qp_diff; |
4975 | 236 }PPS; |
237 | |
238 /** | |
239 * Memory management control operation opcode. | |
240 */ | |
241 typedef enum MMCOOpcode{ | |
242 MMCO_END=0, | |
243 MMCO_SHORT2UNUSED, | |
244 MMCO_LONG2UNUSED, | |
245 MMCO_SHORT2LONG, | |
246 MMCO_SET_MAX_LONG, | |
247 MMCO_RESET, | |
248 MMCO_LONG, | |
249 } MMCOOpcode; | |
250 | |
251 /** | |
252 * Memory management control operation. | |
253 */ | |
254 typedef struct MMCO{ | |
255 MMCOOpcode opcode; | |
5756
db5a041fd77c
Rename MMCO stuff to prepare for h264/PAFF implementation.
andoma
parents:
5755
diff
changeset
|
256 int short_pic_num; ///< pic_num without wrapping (pic_num & max_pic_num) |
db5a041fd77c
Rename MMCO stuff to prepare for h264/PAFF implementation.
andoma
parents:
5755
diff
changeset
|
257 int long_arg; ///< index, pic_num, or num long refs depending on opcode |
4975 | 258 } MMCO; |
259 | |
260 /** | |
261 * H264Context | |
262 */ | |
263 typedef struct H264Context{ | |
264 MpegEncContext s; | |
265 int nal_ref_idc; | |
266 int nal_unit_type; | |
5174 | 267 uint8_t *rbsp_buffer[2]; |
268 unsigned int rbsp_buffer_size[2]; | |
4975 | 269 |
270 /** | |
271 * Used to parse AVC variant of h264 | |
272 */ | |
273 int is_avc; ///< this flag is != 0 if codec is avc1 | |
274 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) | |
275 | |
5231
07a97575d0c4
Add support for streams with different chroma_qp_index_offset
gpoirier
parents:
5226
diff
changeset
|
276 int chroma_qp[2]; //QPc |
4975 | 277 |
10896
f112b4d030fa
Avoid wasting 4 cpu cycles per MB in redundantly calculating qp_thresh.
michael
parents:
10893
diff
changeset
|
278 int qp_thresh; ///< QP threshold to skip loopfilter |
f112b4d030fa
Avoid wasting 4 cpu cycles per MB in redundantly calculating qp_thresh.
michael
parents:
10893
diff
changeset
|
279 |
4975 | 280 int prev_mb_skipped; |
281 int next_mb_skipped; | |
282 | |
283 //prediction stuff | |
284 int chroma_pred_mode; | |
285 int intra16x16_pred_mode; | |
286 | |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
287 int topleft_mb_xy; |
4975 | 288 int top_mb_xy; |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
289 int topright_mb_xy; |
4975 | 290 int left_mb_xy[2]; |
291 | |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
292 int topleft_type; |
11012
b2fd83b26dd9
Set top & left types for deblock in fill_caches().
michael
parents:
11010
diff
changeset
|
293 int top_type; |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
294 int topright_type; |
11012
b2fd83b26dd9
Set top & left types for deblock in fill_caches().
michael
parents:
11010
diff
changeset
|
295 int left_type[2]; |
b2fd83b26dd9
Set top & left types for deblock in fill_caches().
michael
parents:
11010
diff
changeset
|
296 |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
297 const uint8_t * left_block; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
298 int topleft_partition; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
299 |
4975 | 300 int8_t intra4x4_pred_mode_cache[5*8]; |
11285 | 301 int8_t (*intra4x4_pred_mode); |
5638
4a26dc4ca11d
Move H.264 intra prediction functions into their own context
kostya
parents:
5231
diff
changeset
|
302 H264PredContext hpc; |
4975 | 303 unsigned int topleft_samples_available; |
304 unsigned int top_samples_available; | |
305 unsigned int topright_samples_available; | |
306 unsigned int left_samples_available; | |
307 uint8_t (*top_borders[2])[16+2*8]; | |
308 uint8_t left_border[2*(17+2*9)]; | |
309 | |
310 /** | |
311 * non zero coeff count cache. | |
312 * is 64 if not available. | |
313 */ | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10934
diff
changeset
|
314 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache)[6*8]; |
10909
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
315 |
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
316 /* |
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
317 .UU.YYYY |
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
318 .UU.YYYY |
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
319 .vv.YYYY |
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
320 .VV.YYYY |
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
321 */ |
10906 | 322 uint8_t (*non_zero_count)[32]; |
4975 | 323 |
324 /** | |
325 * Motion vector cache. | |
326 */ | |
11002
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
327 DECLARE_ALIGNED_16(int16_t, mv_cache)[2][5*8][2]; |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10934
diff
changeset
|
328 DECLARE_ALIGNED_8(int8_t, ref_cache)[2][5*8]; |
4975 | 329 #define LIST_NOT_USED -1 //FIXME rename? |
330 #define PART_NOT_AVAILABLE -2 | |
331 | |
332 /** | |
333 * is 1 if the specific list MV&references are set to 0,0,-2. | |
334 */ | |
335 int mv_cache_clean[2]; | |
336 | |
337 /** | |
338 * number of neighbors (top and/or left) that used 8x8 dct | |
339 */ | |
340 int neighbor_transform_size; | |
341 | |
342 /** | |
343 * block_offset[ 0..23] for frame macroblocks | |
344 * block_offset[24..47] for field macroblocks | |
345 */ | |
346 int block_offset[2*(16+8)]; | |
347 | |
348 uint32_t *mb2b_xy; //FIXME are these 4 a good idea? | |
11282 | 349 uint32_t *mb2br_xy; |
4975 | 350 uint32_t *mb2b8_xy; |
351 int b_stride; //FIXME use s->b4_stride | |
352 int b8_stride; | |
353 | |
354 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff | |
355 int mb_uvlinesize; | |
356 | |
357 int emu_edge_width; | |
358 int emu_edge_height; | |
359 | |
360 int halfpel_flag; | |
361 int thirdpel_flag; | |
362 | |
363 int unknown_svq3_flag; | |
364 int next_slice_index; | |
365 | |
5079 | 366 SPS *sps_buffers[MAX_SPS_COUNT]; |
4975 | 367 SPS sps; ///< current sps |
368 | |
5079 | 369 PPS *pps_buffers[MAX_PPS_COUNT]; |
4975 | 370 /** |
371 * current pps | |
372 */ | |
373 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? | |
374 | |
375 uint32_t dequant4_buffer[6][52][16]; | |
376 uint32_t dequant8_buffer[2][52][64]; | |
377 uint32_t (*dequant4_coeff[6])[16]; | |
378 uint32_t (*dequant8_coeff[2])[64]; | |
379 int dequant_coeff_pps; ///< reinit tables when pps changes | |
380 | |
381 int slice_num; | |
8079
2d3c7cd7d143
Replace literally hardcoded max slice number by named constant.
michael
parents:
8025
diff
changeset
|
382 uint16_t *slice_table_base; |
2d3c7cd7d143
Replace literally hardcoded max slice number by named constant.
michael
parents:
8025
diff
changeset
|
383 uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 |
4975 | 384 int slice_type; |
7338 | 385 int slice_type_nos; ///< S free slice type (SI/SP are remapped to I/P) |
4975 | 386 int slice_type_fixed; |
387 | |
388 //interlacing specific flags | |
389 int mb_aff_frame; | |
390 int mb_field_decoding_flag; | |
391 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag | |
392 | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10934
diff
changeset
|
393 DECLARE_ALIGNED_8(uint16_t, sub_mb_type)[4]; |
4975 | 394 |
395 //POC stuff | |
396 int poc_lsb; | |
397 int poc_msb; | |
398 int delta_poc_bottom; | |
399 int delta_poc[2]; | |
400 int frame_num; | |
401 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0 | |
402 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0 | |
403 int frame_num_offset; ///< for POC type 2 | |
404 int prev_frame_num_offset; ///< for POC type 2 | |
405 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2 | |
406 | |
407 /** | |
5772
65b71bd21a4d
Fix h->curr_pic_num for field pictures. Necessary for proper PAFF support.
andoma
parents:
5767
diff
changeset
|
408 * frame_num for frames or 2*frame_num+1 for field pics. |
4975 | 409 */ |
410 int curr_pic_num; | |
411 | |
412 /** | |
413 * max_frame_num or 2*max_frame_num for field pics. | |
414 */ | |
415 int max_pic_num; | |
416 | |
417 //Weighted pred stuff | |
418 int use_weight; | |
419 int use_weight_chroma; | |
420 int luma_log2_weight_denom; | |
421 int chroma_log2_weight_denom; | |
422 int luma_weight[2][48]; | |
423 int luma_offset[2][48]; | |
424 int chroma_weight[2][48][2]; | |
425 int chroma_offset[2][48][2]; | |
426 int implicit_weight[48][48]; | |
427 | |
428 //deblock | |
429 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0 | |
430 int slice_alpha_c0_offset; | |
431 int slice_beta_offset; | |
432 | |
433 int redundant_pic_count; | |
434 | |
435 int direct_spatial_mv_pred; | |
11096 | 436 int col_parity; |
437 int col_fieldoff; | |
4975 | 438 int dist_scale_factor[16]; |
7898
a33287a39a55
Make MBAFF temporal direct mode closer to the spec.
michael
parents:
7760
diff
changeset
|
439 int dist_scale_factor_field[2][32]; |
7906
5be944626072
Another try to fix temporal direct mode references.
michael
parents:
7898
diff
changeset
|
440 int map_col_to_list0[2][16+32]; |
5be944626072
Another try to fix temporal direct mode references.
michael
parents:
7898
diff
changeset
|
441 int map_col_to_list0_field[2][2][16+32]; |
4975 | 442 |
443 /** | |
444 * num_ref_idx_l0/1_active_minus1 + 1 | |
445 */ | |
446 unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode | |
447 unsigned int list_count; | |
10906 | 448 uint8_t *list_counts; ///< Array of list_count per MB specifying the slice type |
4975 | 449 Picture *short_ref[32]; |
450 Picture *long_ref[32]; | |
5755
b45894d869da
Cosmetic preparations for h264/PAFF implementation.
andoma
parents:
5642
diff
changeset
|
451 Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture |
b45894d869da
Cosmetic preparations for h264/PAFF implementation.
andoma
parents:
5642
diff
changeset
|
452 Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs. |
b45894d869da
Cosmetic preparations for h264/PAFF implementation.
andoma
parents:
5642
diff
changeset
|
453 Reordered version of default_ref_list |
b45894d869da
Cosmetic preparations for h264/PAFF implementation.
andoma
parents:
5642
diff
changeset
|
454 according to picture reordering in slice header */ |
8079
2d3c7cd7d143
Replace literally hardcoded max slice number by named constant.
michael
parents:
8025
diff
changeset
|
455 int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1 |
7283
39e81fdd8d23
Move the size of the H264Context delayed_pic array into a #define.
astrange
parents:
7178
diff
changeset
|
456 Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size? |
7310
033d10927d45
Remove delayed_output_pic, I do not understand what this variable was good for.
michael
parents:
7283
diff
changeset
|
457 int outputed_poc; |
4975 | 458 |
459 /** | |
460 * memory management control operations buffer. | |
461 */ | |
462 MMCO mmco[MAX_MMCO_COUNT]; | |
463 int mmco_index; | |
464 | |
465 int long_ref_count; ///< number of actual long term references | |
466 int short_ref_count; ///< number of actual short term references | |
467 | |
468 //data partitioning | |
469 GetBitContext intra_gb; | |
470 GetBitContext inter_gb; | |
471 GetBitContext *intra_gb_ptr; | |
472 GetBitContext *inter_gb_ptr; | |
473 | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10934
diff
changeset
|
474 DECLARE_ALIGNED_16(DCTELEM, mb)[16*24]; |
6488 | 475 DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb |
4975 | 476 |
477 /** | |
478 * Cabac | |
479 */ | |
480 CABACContext cabac; | |
481 uint8_t cabac_state[460]; | |
482 int cabac_init_idc; | |
483 | |
484 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ | |
485 uint16_t *cbp_table; | |
486 int cbp; | |
487 int top_cbp; | |
488 int left_cbp; | |
489 /* chroma_pred_mode for i4x4 or i16x16, else 0 */ | |
490 uint8_t *chroma_pred_mode_table; | |
491 int last_qscale_diff; | |
11277
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
492 uint8_t (*mvd_table[2])[2]; |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
493 DECLARE_ALIGNED_16(uint8_t, mvd_cache)[2][5*8][2]; |
4975 | 494 uint8_t *direct_table; |
495 uint8_t direct_cache[5*8]; | |
496 | |
497 uint8_t zigzag_scan[16]; | |
498 uint8_t zigzag_scan8x8[64]; | |
499 uint8_t zigzag_scan8x8_cavlc[64]; | |
500 uint8_t field_scan[16]; | |
501 uint8_t field_scan8x8[64]; | |
502 uint8_t field_scan8x8_cavlc[64]; | |
503 const uint8_t *zigzag_scan_q0; | |
504 const uint8_t *zigzag_scan8x8_q0; | |
505 const uint8_t *zigzag_scan8x8_cavlc_q0; | |
506 const uint8_t *field_scan_q0; | |
507 const uint8_t *field_scan8x8_q0; | |
508 const uint8_t *field_scan8x8_cavlc_q0; | |
509 | |
510 int x264_build; | |
5642 | 511 |
512 /** | |
513 * @defgroup multithreading Members for slice based multithreading | |
514 * @{ | |
515 */ | |
516 struct H264Context *thread_context[MAX_THREADS]; | |
517 | |
518 /** | |
519 * current slice number, used to initalize slice_num of each thread/context | |
520 */ | |
521 int current_slice; | |
522 | |
523 /** | |
524 * Max number of threads / contexts. | |
525 * This is equal to AVCodecContext.thread_count unless | |
526 * multithreaded decoding is impossible, in which case it is | |
527 * reduced to 1. | |
528 */ | |
529 int max_contexts; | |
530 | |
531 /** | |
532 * 1 if the single thread fallback warning has already been | |
533 * displayed, 0 otherwise. | |
534 */ | |
535 int single_decode_warning; | |
536 | |
537 int last_slice_type; | |
538 /** @} */ | |
539 | |
6783
df0893f4fd86
Store mb_xy in H264Context and only calculate it once per MB.
astrange
parents:
6582
diff
changeset
|
540 int mb_xy; |
df0893f4fd86
Store mb_xy in H264Context and only calculate it once per MB.
astrange
parents:
6582
diff
changeset
|
541 |
8025 | 542 uint32_t svq3_watermark_key; |
8107
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
543 |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
544 /** |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
545 * pic_struct in picture timing SEI message |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
546 */ |
e61f76efc9f3
h264: Implement decoding of picture timing SEI message.
andoma
parents:
8079
diff
changeset
|
547 SEI_PicStructType sei_pic_struct; |
8353
add1a0d65370
Do not recalculate constant part of is_complex for every MB.
michael
parents:
8207
diff
changeset
|
548 |
8783
e91ea98d868a
Add SEI recovery point frame counter to H264Context and use it when
cehoyos
parents:
8762
diff
changeset
|
549 /** |
9831
febe1855b64a
Add field prev_interlaced_frame to H264Context to be able to flag soft telecine
cehoyos
parents:
9384
diff
changeset
|
550 * Complement sei_pic_struct |
febe1855b64a
Add field prev_interlaced_frame to H264Context to be able to flag soft telecine
cehoyos
parents:
9384
diff
changeset
|
551 * SEI_PIC_STRUCT_TOP_BOTTOM and SEI_PIC_STRUCT_BOTTOM_TOP indicate interlaced frames. |
febe1855b64a
Add field prev_interlaced_frame to H264Context to be able to flag soft telecine
cehoyos
parents:
9384
diff
changeset
|
552 * However, soft telecined frames may have these values. |
febe1855b64a
Add field prev_interlaced_frame to H264Context to be able to flag soft telecine
cehoyos
parents:
9384
diff
changeset
|
553 * This is used in an attempt to flag soft telecine progressive. |
febe1855b64a
Add field prev_interlaced_frame to H264Context to be able to flag soft telecine
cehoyos
parents:
9384
diff
changeset
|
554 */ |
febe1855b64a
Add field prev_interlaced_frame to H264Context to be able to flag soft telecine
cehoyos
parents:
9384
diff
changeset
|
555 int prev_interlaced_frame; |
febe1855b64a
Add field prev_interlaced_frame to H264Context to be able to flag soft telecine
cehoyos
parents:
9384
diff
changeset
|
556 |
febe1855b64a
Add field prev_interlaced_frame to H264Context to be able to flag soft telecine
cehoyos
parents:
9384
diff
changeset
|
557 /** |
9128
793cf8c68c4f
Add support for ct_type to correctly detect interlaced flag
schreter
parents:
9071
diff
changeset
|
558 * Bit set of clock types for fields/frames in picture timing SEI message. |
793cf8c68c4f
Add support for ct_type to correctly detect interlaced flag
schreter
parents:
9071
diff
changeset
|
559 * For each found ct_type, appropriate bit is set (e.g., bit 1 for |
793cf8c68c4f
Add support for ct_type to correctly detect interlaced flag
schreter
parents:
9071
diff
changeset
|
560 * interlaced). |
793cf8c68c4f
Add support for ct_type to correctly detect interlaced flag
schreter
parents:
9071
diff
changeset
|
561 */ |
793cf8c68c4f
Add support for ct_type to correctly detect interlaced flag
schreter
parents:
9071
diff
changeset
|
562 int sei_ct_type; |
793cf8c68c4f
Add support for ct_type to correctly detect interlaced flag
schreter
parents:
9071
diff
changeset
|
563 |
793cf8c68c4f
Add support for ct_type to correctly detect interlaced flag
schreter
parents:
9071
diff
changeset
|
564 /** |
8966 | 565 * dpb_output_delay in picture timing SEI message, see H.264 C.2.2 |
566 */ | |
567 int sei_dpb_output_delay; | |
568 | |
569 /** | |
8967 | 570 * cpb_removal_delay in picture timing SEI message, see H.264 C.1.2 |
571 */ | |
572 int sei_cpb_removal_delay; | |
573 | |
574 /** | |
8783
e91ea98d868a
Add SEI recovery point frame counter to H264Context and use it when
cehoyos
parents:
8762
diff
changeset
|
575 * recovery_frame_cnt from SEI message |
e91ea98d868a
Add SEI recovery point frame counter to H264Context and use it when
cehoyos
parents:
8762
diff
changeset
|
576 * |
e91ea98d868a
Add SEI recovery point frame counter to H264Context and use it when
cehoyos
parents:
8762
diff
changeset
|
577 * Set to -1 if no recovery point SEI message found or to number of frames |
e91ea98d868a
Add SEI recovery point frame counter to H264Context and use it when
cehoyos
parents:
8762
diff
changeset
|
578 * before playback synchronizes. Frames having recovery point are key |
e91ea98d868a
Add SEI recovery point frame counter to H264Context and use it when
cehoyos
parents:
8762
diff
changeset
|
579 * frames. |
e91ea98d868a
Add SEI recovery point frame counter to H264Context and use it when
cehoyos
parents:
8762
diff
changeset
|
580 */ |
e91ea98d868a
Add SEI recovery point frame counter to H264Context and use it when
cehoyos
parents:
8762
diff
changeset
|
581 int sei_recovery_frame_cnt; |
e91ea98d868a
Add SEI recovery point frame counter to H264Context and use it when
cehoyos
parents:
8762
diff
changeset
|
582 |
8353
add1a0d65370
Do not recalculate constant part of is_complex for every MB.
michael
parents:
8207
diff
changeset
|
583 int is_complex; |
8735
5d7ebbb7e91b
Add fields to H264Context and SPS for upcoming VA API support.
cehoyos
parents:
8718
diff
changeset
|
584 |
5d7ebbb7e91b
Add fields to H264Context and SPS for upcoming VA API support.
cehoyos
parents:
8718
diff
changeset
|
585 int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag |
5d7ebbb7e91b
Add fields to H264Context and SPS for upcoming VA API support.
cehoyos
parents:
8718
diff
changeset
|
586 int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag |
8972 | 587 |
588 // Timestamp stuff | |
589 int sei_buffering_period_present; ///< Buffering period SEI flag | |
590 int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs | |
4975 | 591 }H264Context; |
592 | |
10859
762e6bb0ba40
Split (picture|seq) parameter set decoding out of h264.c.
michael
parents:
10858
diff
changeset
|
593 |
762e6bb0ba40
Split (picture|seq) parameter set decoding out of h264.c.
michael
parents:
10858
diff
changeset
|
594 extern const uint8_t ff_h264_chroma_qp[52]; |
762e6bb0ba40
Split (picture|seq) parameter set decoding out of h264.c.
michael
parents:
10858
diff
changeset
|
595 |
11031 | 596 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); |
597 | |
598 void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); | |
10859
762e6bb0ba40
Split (picture|seq) parameter set decoding out of h264.c.
michael
parents:
10858
diff
changeset
|
599 |
8996
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
600 /** |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
601 * Decode SEI |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
602 */ |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
603 int ff_h264_decode_sei(H264Context *h); |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
604 |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
605 /** |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
606 * Decode SPS |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
607 */ |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
608 int ff_h264_decode_seq_parameter_set(H264Context *h); |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
609 |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
610 /** |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
611 * Decode PPS |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
612 */ |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
613 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length); |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
614 |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
615 /** |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
616 * Decodes a network abstraction layer unit. |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
617 * @param consumed is the number of bytes used as input |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
618 * @param length is the length of the array |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
619 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing? |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
620 * @returns decoded bytes, might be src+1 if no escapes |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
621 */ |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
622 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length); |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
623 |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
624 /** |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
625 * identifies the exact end of the bitstream |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
626 * @return the length of the trailing, or 0 if damaged |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
627 */ |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
628 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src); |
e65778184ded
Make the following H264 functions available to the parser:
cehoyos
parents:
8972
diff
changeset
|
629 |
9380
54e650136c87
Add a ff_h264_free_context function and call it from the H.264 parser.
reimar
parents:
9128
diff
changeset
|
630 /** |
54e650136c87
Add a ff_h264_free_context function and call it from the H.264 parser.
reimar
parents:
9128
diff
changeset
|
631 * frees any data that may have been allocated in the H264 context like SPS, PPS etc. |
54e650136c87
Add a ff_h264_free_context function and call it from the H.264 parser.
reimar
parents:
9128
diff
changeset
|
632 */ |
9384
828d15d322ed
Add forgotten void return type to ff_h264_free_context
reimar
parents:
9380
diff
changeset
|
633 av_cold void ff_h264_free_context(H264Context *h); |
9380
54e650136c87
Add a ff_h264_free_context function and call it from the H.264 parser.
reimar
parents:
9128
diff
changeset
|
634 |
10829
46a247e54d6e
Export get_slice_type() as ff_h264_get_slice_type().
ramiro
parents:
9831
diff
changeset
|
635 /** |
46a247e54d6e
Export get_slice_type() as ff_h264_get_slice_type().
ramiro
parents:
9831
diff
changeset
|
636 * reconstructs bitstream slice_type. |
46a247e54d6e
Export get_slice_type() as ff_h264_get_slice_type().
ramiro
parents:
9831
diff
changeset
|
637 */ |
10987
85de0c8a19b7
Added a missing const to ff_h264_get_slice_type().
fenrir
parents:
10985
diff
changeset
|
638 int ff_h264_get_slice_type(const H264Context *h); |
10829
46a247e54d6e
Export get_slice_type() as ff_h264_get_slice_type().
ramiro
parents:
9831
diff
changeset
|
639 |
10852 | 640 /** |
641 * allocates tables. | |
642 * needs width/height | |
643 */ | |
644 int ff_h264_alloc_tables(H264Context *h); | |
645 | |
646 /** | |
10862
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
647 * fills the default_ref_list. |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
648 */ |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
649 int ff_h264_fill_default_ref_list(H264Context *h); |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
650 |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
651 int ff_h264_decode_ref_pic_list_reordering(H264Context *h); |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
652 void ff_h264_fill_mbaff_ref_list(H264Context *h); |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
653 void ff_h264_remove_all_refs(H264Context *h); |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
654 |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
655 /** |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
656 * Executes the reference picture marking (memory management control operations). |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
657 */ |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
658 int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count); |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
659 |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
660 int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb); |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
661 |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
662 |
d9c084a0c22b
Split all the reference picture handling off h264.c.
michael
parents:
10859
diff
changeset
|
663 /** |
10852 | 664 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. |
665 */ | |
10863
974ac220c93a
Move check_intra4x4_pred_mode() back from h264.h to h264.c, the function is just
michael
parents:
10862
diff
changeset
|
666 int ff_h264_check_intra4x4_pred_mode(H264Context *h); |
974ac220c93a
Move check_intra4x4_pred_mode() back from h264.h to h264.c, the function is just
michael
parents:
10862
diff
changeset
|
667 |
974ac220c93a
Move check_intra4x4_pred_mode() back from h264.h to h264.c, the function is just
michael
parents:
10862
diff
changeset
|
668 /** |
974ac220c93a
Move check_intra4x4_pred_mode() back from h264.h to h264.c, the function is just
michael
parents:
10862
diff
changeset
|
669 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. |
974ac220c93a
Move check_intra4x4_pred_mode() back from h264.h to h264.c, the function is just
michael
parents:
10862
diff
changeset
|
670 */ |
10852 | 671 int ff_h264_check_intra_pred_mode(H264Context *h, int mode); |
672 | |
673 void ff_h264_write_back_intra_pred_mode(H264Context *h); | |
674 void ff_h264_hl_decode_mb(H264Context *h); | |
675 int ff_h264_frame_start(H264Context *h); | |
676 av_cold int ff_h264_decode_init(AVCodecContext *avctx); | |
677 av_cold int ff_h264_decode_end(AVCodecContext *avctx); | |
10866 | 678 av_cold void ff_h264_decode_init_vlc(void); |
679 | |
680 /** | |
681 * decodes a macroblock | |
682 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed | |
683 */ | |
684 int ff_h264_decode_mb_cavlc(H264Context *h); | |
10852 | 685 |
10869 | 686 /** |
687 * decodes a CABAC coded macroblock | |
688 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed | |
689 */ | |
690 int ff_h264_decode_mb_cabac(H264Context *h); | |
691 | |
692 void ff_h264_init_cabac_states(H264Context *h); | |
693 | |
10857
b20434143fd5
Split direct mode (macro)block decoding off h264.c.
michael
parents:
10854
diff
changeset
|
694 void ff_h264_direct_dist_scale_factor(H264Context * const h); |
b20434143fd5
Split direct mode (macro)block decoding off h264.c.
michael
parents:
10854
diff
changeset
|
695 void ff_h264_direct_ref_list_init(H264Context * const h); |
b20434143fd5
Split direct mode (macro)block decoding off h264.c.
michael
parents:
10854
diff
changeset
|
696 void ff_h264_pred_direct_motion(H264Context * const h, int *mb_type); |
b20434143fd5
Split direct mode (macro)block decoding off h264.c.
michael
parents:
10854
diff
changeset
|
697 |
10854 | 698 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); |
699 void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); | |
700 | |
10858 | 701 /** |
702 * Reset SEI values at the beginning of the frame. | |
703 * | |
704 * @param h H.264 context. | |
705 */ | |
706 void ff_h264_reset_sei(H264Context *h); | |
707 | |
708 | |
10852 | 709 /* |
710 o-o o-o | |
711 / / / | |
712 o-o o-o | |
713 ,---' | |
714 o-o o-o | |
715 / / / | |
716 o-o o-o | |
717 */ | |
718 //This table must be here because scan8[constant] must be known at compiletime | |
719 static const uint8_t scan8[16 + 2*4]={ | |
720 4+1*8, 5+1*8, 4+2*8, 5+2*8, | |
721 6+1*8, 7+1*8, 6+2*8, 7+2*8, | |
722 4+3*8, 5+3*8, 4+4*8, 5+4*8, | |
723 6+3*8, 7+3*8, 6+4*8, 7+4*8, | |
724 1+1*8, 2+1*8, | |
725 1+2*8, 2+2*8, | |
726 1+4*8, 2+4*8, | |
727 1+5*8, 2+5*8, | |
728 }; | |
729 | |
730 static av_always_inline uint32_t pack16to32(int a, int b){ | |
731 #if HAVE_BIGENDIAN | |
732 return (b&0xFFFF) + (a<<16); | |
733 #else | |
734 return (a&0xFFFF) + (b<<16); | |
735 #endif | |
736 } | |
737 | |
11277
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
738 static av_always_inline uint16_t pack8to16(int a, int b){ |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
739 #if HAVE_BIGENDIAN |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
740 return (b&0xFF) + (a<<8); |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
741 #else |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
742 return (a&0xFF) + (b<<8); |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
743 #endif |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
744 } |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
745 |
10852 | 746 /** |
10854 | 747 * gets the chroma qp. |
748 */ | |
749 static inline int get_chroma_qp(H264Context *h, int t, int qscale){ | |
750 return h->pps.chroma_qp_table[t][qscale]; | |
751 } | |
752 | |
10866 | 753 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my); |
754 | |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
755 static void fill_decode_neighbors(H264Context *h, int mb_type){ |
10866 | 756 MpegEncContext * const s = &h->s; |
757 const int mb_xy= h->mb_xy; | |
758 int topleft_xy, top_xy, topright_xy, left_xy[2]; | |
10909
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
759 static const uint8_t left_block_options[4][16]={ |
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
760 {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8}, |
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
761 {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8}, |
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
762 {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}, |
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
763 {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8} |
10866 | 764 }; |
765 | |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
766 h->topleft_partition= -1; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
767 |
11008
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
768 top_xy = mb_xy - (s->mb_stride << MB_FIELD); |
10866 | 769 |
770 /* Wow, what a mess, why didn't they simplify the interlacing & intra | |
771 * stuff, I can't imagine that these complex rules are worth it. */ | |
772 | |
773 topleft_xy = top_xy - 1; | |
774 topright_xy= top_xy + 1; | |
775 left_xy[1] = left_xy[0] = mb_xy-1; | |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
776 h->left_block = left_block_options[0]; |
10866 | 777 if(FRAME_MBAFF){ |
11008
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
778 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); |
10866 | 779 const int curr_mb_field_flag = IS_INTERLACED(mb_type); |
11008
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
780 if(s->mb_y&1){ |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
781 if (left_mb_field_flag != curr_mb_field_flag) { |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
782 left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1; |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
783 if (curr_mb_field_flag) { |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
784 left_xy[1] += s->mb_stride; |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
785 h->left_block = left_block_options[3]; |
11008
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
786 } else { |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
787 topleft_xy += s->mb_stride; |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
788 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
789 h->topleft_partition = 0; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
790 h->left_block = left_block_options[1]; |
11008
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
791 } |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
792 } |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
793 }else{ |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
794 if(curr_mb_field_flag){ |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
795 topleft_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy - 1]>>7)&1)-1); |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
796 topright_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy + 1]>>7)&1)-1); |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
797 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
798 } |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
799 if (left_mb_field_flag != curr_mb_field_flag) { |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
800 left_xy[1] = left_xy[0] = mb_xy - 1; |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
801 if (curr_mb_field_flag) { |
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
802 left_xy[1] += s->mb_stride; |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
803 h->left_block = left_block_options[3]; |
11008
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
804 } else { |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
805 h->left_block = left_block_options[2]; |
11008
ec192d9ebac5
Optimize mb neighbor initialization for MBAFF in fill_caches().
michael
parents:
11002
diff
changeset
|
806 } |
10866 | 807 } |
808 } | |
809 } | |
810 | |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
811 h->topleft_mb_xy = topleft_xy; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
812 h->top_mb_xy = top_xy; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
813 h->topright_mb_xy= topright_xy; |
10866 | 814 h->left_mb_xy[0] = left_xy[0]; |
815 h->left_mb_xy[1] = left_xy[1]; | |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
816 //FIXME do we need all in the context? |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
817 h->topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
818 h->top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
819 h->topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
820 h->left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
821 h->left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
822 } |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
823 |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
824 static void fill_decode_caches(H264Context *h, int mb_type){ |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
825 MpegEncContext * const s = &h->s; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
826 int topleft_xy, top_xy, topright_xy, left_xy[2]; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
827 int topleft_type, top_type, topright_type, left_type[2]; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
828 const uint8_t * left_block= h->left_block; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
829 int i; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
830 |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
831 topleft_xy = h->topleft_mb_xy ; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
832 top_xy = h->top_mb_xy ; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
833 topright_xy = h->topright_mb_xy; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
834 left_xy[0] = h->left_mb_xy[0] ; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
835 left_xy[1] = h->left_mb_xy[1] ; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
836 topleft_type = h->topleft_type ; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
837 top_type = h->top_type ; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
838 topright_type= h->topright_type ; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
839 left_type[0] = h->left_type[0] ; |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
840 left_type[1] = h->left_type[1] ; |
10866 | 841 |
11158
81c3f88f460f
Skiped MBs dont need the cbp stuff so skip initing that.
michael
parents:
11156
diff
changeset
|
842 if(!IS_SKIP(mb_type)){ |
10928 | 843 if(IS_INTRA(mb_type)){ |
844 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; | |
845 h->topleft_samples_available= | |
846 h->top_samples_available= | |
847 h->left_samples_available= 0xFFFF; | |
848 h->topright_samples_available= 0xEEEA; | |
10866 | 849 |
10928 | 850 if(!(top_type & type_mask)){ |
851 h->topleft_samples_available= 0xB3FF; | |
852 h->top_samples_available= 0x33FF; | |
853 h->topright_samples_available= 0x26EA; | |
854 } | |
855 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){ | |
856 if(IS_INTERLACED(mb_type)){ | |
857 if(!(left_type[0] & type_mask)){ | |
858 h->topleft_samples_available&= 0xDFFF; | |
859 h->left_samples_available&= 0x5FFF; | |
860 } | |
861 if(!(left_type[1] & type_mask)){ | |
862 h->topleft_samples_available&= 0xFF5F; | |
863 h->left_samples_available&= 0xFF5F; | |
864 } | |
865 }else{ | |
866 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num | |
867 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0; | |
868 assert(left_xy[0] == left_xy[1]); | |
869 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){ | |
870 h->topleft_samples_available&= 0xDF5F; | |
871 h->left_samples_available&= 0x5F5F; | |
872 } | |
10866 | 873 } |
874 }else{ | |
10928 | 875 if(!(left_type[0] & type_mask)){ |
10866 | 876 h->topleft_samples_available&= 0xDF5F; |
877 h->left_samples_available&= 0x5F5F; | |
878 } | |
879 } | |
880 | |
10928 | 881 if(!(topleft_type & type_mask)) |
882 h->topleft_samples_available&= 0x7FFF; | |
10866 | 883 |
10928 | 884 if(!(topright_type & type_mask)) |
885 h->topright_samples_available&= 0xFBFF; | |
10866 | 886 |
10928 | 887 if(IS_INTRA4x4(mb_type)){ |
888 if(IS_INTRA4x4(top_type)){ | |
11285 | 889 int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[top_xy]; |
890 h->intra4x4_pred_mode_cache[4+8*0]= mode[4]; | |
891 h->intra4x4_pred_mode_cache[5+8*0]= mode[5]; | |
892 h->intra4x4_pred_mode_cache[6+8*0]= mode[6]; | |
893 h->intra4x4_pred_mode_cache[7+8*0]= mode[3]; | |
10866 | 894 }else{ |
895 int pred; | |
10928 | 896 if(!(top_type & type_mask)) |
10866 | 897 pred= -1; |
898 else{ | |
899 pred= 2; | |
900 } | |
10928 | 901 h->intra4x4_pred_mode_cache[4+8*0]= |
902 h->intra4x4_pred_mode_cache[5+8*0]= | |
903 h->intra4x4_pred_mode_cache[6+8*0]= | |
904 h->intra4x4_pred_mode_cache[7+8*0]= pred; | |
905 } | |
906 for(i=0; i<2; i++){ | |
907 if(IS_INTRA4x4(left_type[i])){ | |
11285 | 908 int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[i]]; |
909 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[left_block[0+2*i]]; | |
910 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[left_block[1+2*i]]; | |
10928 | 911 }else{ |
912 int pred; | |
913 if(!(left_type[i] & type_mask)) | |
914 pred= -1; | |
915 else{ | |
916 pred= 2; | |
917 } | |
918 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= | |
919 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; | |
920 } | |
10866 | 921 } |
922 } | |
923 } | |
924 | |
925 | |
926 /* | |
927 0 . T T. T T T T | |
928 1 L . .L . . . . | |
929 2 L . .L . . . . | |
930 3 . T TL . . . . | |
931 4 L . .L . . . . | |
932 5 L . .. . . . . | |
933 */ | |
934 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) | |
935 if(top_type){ | |
11203 | 936 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); |
10928 | 937 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; |
938 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; | |
10866 | 939 |
10928 | 940 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8]; |
941 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8]; | |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
942 }else { |
10928 | 943 h->non_zero_count_cache[1+8*0]= |
944 h->non_zero_count_cache[2+8*0]= | |
10866 | 945 |
10928 | 946 h->non_zero_count_cache[1+8*3]= |
947 h->non_zero_count_cache[2+8*3]= | |
11203 | 948 AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040); |
10866 | 949 } |
950 | |
951 for (i=0; i<2; i++) { | |
952 if(left_type[i]){ | |
10909
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
953 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]]; |
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10908
diff
changeset
|
954 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]]; |
10928 | 955 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]]; |
956 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]]; | |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
957 }else{ |
10928 | 958 h->non_zero_count_cache[3+8*1 + 2*8*i]= |
959 h->non_zero_count_cache[3+8*2 + 2*8*i]= | |
960 h->non_zero_count_cache[0+8*1 + 8*i]= | |
961 h->non_zero_count_cache[0+8*4 + 8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64; | |
10866 | 962 } |
963 } | |
964 | |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
965 if( CABAC ) { |
10866 | 966 // top_cbp |
967 if(top_type) { | |
968 h->top_cbp = h->cbp_table[top_xy]; | |
969 } else if(IS_INTRA(mb_type)) { | |
11166
5bd834bd759b
Remove slice_table checks from decode_cabac_mb_cbp_luma() and set left/top_cbp so
michael
parents:
11165
diff
changeset
|
970 h->top_cbp = 0x1CF; |
10866 | 971 } else { |
11166
5bd834bd759b
Remove slice_table checks from decode_cabac_mb_cbp_luma() and set left/top_cbp so
michael
parents:
11165
diff
changeset
|
972 h->top_cbp = 0x00F; |
10866 | 973 } |
974 // left_cbp | |
975 if (left_type[0]) { | |
976 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0; | |
977 } else if(IS_INTRA(mb_type)) { | |
11166
5bd834bd759b
Remove slice_table checks from decode_cabac_mb_cbp_luma() and set left/top_cbp so
michael
parents:
11165
diff
changeset
|
978 h->left_cbp = 0x1CF; |
10866 | 979 } else { |
11166
5bd834bd759b
Remove slice_table checks from decode_cabac_mb_cbp_luma() and set left/top_cbp so
michael
parents:
11165
diff
changeset
|
980 h->left_cbp = 0x00F; |
10866 | 981 } |
982 if (left_type[0]) { | |
983 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1; | |
984 } | |
985 if (left_type[1]) { | |
986 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3; | |
987 } | |
988 } | |
11158
81c3f88f460f
Skiped MBs dont need the cbp stuff so skip initing that.
michael
parents:
11156
diff
changeset
|
989 } |
10866 | 990 |
991 #if 1 | |
11144
0f7dcfca7b2e
Dont calculate any surrounding MVs for temporal MBs
michael
parents:
11102
diff
changeset
|
992 if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){ |
10866 | 993 int list; |
994 for(list=0; list<h->list_count; list++){ | |
11145
8805efb4c3fd
Move setting MB_TYPE_L0L1 for direct MBs up, this is simpler.
michael
parents:
11144
diff
changeset
|
995 if(!USES_LIST(mb_type, list)){ |
10866 | 996 /*if(!h->mv_cache_clean[list]){ |
997 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? | |
998 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); | |
999 h->mv_cache_clean[list]= 1; | |
1000 }*/ | |
1001 continue; | |
1002 } | |
11144
0f7dcfca7b2e
Dont calculate any surrounding MVs for temporal MBs
michael
parents:
11102
diff
changeset
|
1003 assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)); |
0f7dcfca7b2e
Dont calculate any surrounding MVs for temporal MBs
michael
parents:
11102
diff
changeset
|
1004 |
10866 | 1005 h->mv_cache_clean[list]= 0; |
1006 | |
1007 if(USES_LIST(top_type, list)){ | |
1008 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; | |
1009 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; | |
11002
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
1010 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); |
10928 | 1011 h->ref_cache[list][scan8[0] + 0 - 1*8]= |
1012 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; | |
1013 h->ref_cache[list][scan8[0] + 2 - 1*8]= | |
1014 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; | |
10866 | 1015 }else{ |
11002
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
1016 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); |
11203 | 1017 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101); |
10866 | 1018 } |
1019 | |
1020 for(i=0; i<2; i++){ | |
1021 int cache_idx = scan8[0] - 1 + i*2*8; | |
1022 if(USES_LIST(left_type[i], list)){ | |
1023 const int b_xy= h->mb2b_xy[left_xy[i]] + 3; | |
1024 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; | |
11203 | 1025 AV_COPY32(h->mv_cache[list][cache_idx ], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]); |
1026 AV_COPY32(h->mv_cache[list][cache_idx+8], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]); | |
10928 | 1027 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]; |
1028 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]; | |
10866 | 1029 }else{ |
11203 | 1030 AV_ZERO32(h->mv_cache [list][cache_idx ]); |
1031 AV_ZERO32(h->mv_cache [list][cache_idx+8]); | |
10866 | 1032 h->ref_cache[list][cache_idx ]= |
11019 | 1033 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
10866 | 1034 } |
1035 } | |
1036 | |
1037 if(USES_LIST(topleft_type, list)){ | |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
1038 const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride); |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
1039 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (h->topleft_partition & h->b8_stride); |
11203 | 1040 AV_COPY32(h->mv_cache[list][scan8[0] - 1 - 1*8], s->current_picture.motion_val[list][b_xy]); |
10866 | 1041 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; |
1042 }else{ | |
11203 | 1043 AV_ZERO32(h->mv_cache[list][scan8[0] - 1 - 1*8]); |
10866 | 1044 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
1045 } | |
1046 | |
1047 if(USES_LIST(topright_type, list)){ | |
1048 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; | |
1049 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; | |
11203 | 1050 AV_COPY32(h->mv_cache[list][scan8[0] + 4 - 1*8], s->current_picture.motion_val[list][b_xy]); |
10866 | 1051 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; |
1052 }else{ | |
11203 | 1053 AV_ZERO32(h->mv_cache [list][scan8[0] + 4 - 1*8]); |
10866 | 1054 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
1055 } | |
1056 | |
11159 | 1057 if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF) |
10866 | 1058 continue; |
1059 | |
11156
81ef4df2c774
Also skip direct/mvd_cache init for skiped blocks.
michael
parents:
11155
diff
changeset
|
1060 if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) { |
10866 | 1061 h->ref_cache[list][scan8[5 ]+1] = |
1062 h->ref_cache[list][scan8[7 ]+1] = | |
1063 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else) | |
1064 h->ref_cache[list][scan8[4 ]] = | |
1065 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; | |
11203 | 1066 AV_ZERO32(h->mv_cache [list][scan8[5 ]+1]); |
1067 AV_ZERO32(h->mv_cache [list][scan8[7 ]+1]); | |
1068 AV_ZERO32(h->mv_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else) | |
1069 AV_ZERO32(h->mv_cache [list][scan8[4 ]]); | |
1070 AV_ZERO32(h->mv_cache [list][scan8[12]]); | |
10866 | 1071 |
11155 | 1072 if( CABAC ) { |
10866 | 1073 /* XXX beurk, Load mvd */ |
1074 if(USES_LIST(top_type, list)){ | |
11283
853e93a50fe5
Cut the size of mvd_table by yet another factor of 2.
michael
parents:
11282
diff
changeset
|
1075 const int b_xy= h->mb2br_xy[top_xy]; |
11277
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
1076 AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]); |
10866 | 1077 }else{ |
11277
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
1078 AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]); |
10866 | 1079 } |
1080 if(USES_LIST(left_type[0], list)){ | |
11283
853e93a50fe5
Cut the size of mvd_table by yet another factor of 2.
michael
parents:
11282
diff
changeset
|
1081 const int b_xy= h->mb2br_xy[left_xy[0]] + 6; |
853e93a50fe5
Cut the size of mvd_table by yet another factor of 2.
michael
parents:
11282
diff
changeset
|
1082 AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy - left_block[0]]); |
853e93a50fe5
Cut the size of mvd_table by yet another factor of 2.
michael
parents:
11282
diff
changeset
|
1083 AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy - left_block[1]]); |
10866 | 1084 }else{ |
11277
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
1085 AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]); |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
1086 AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]); |
10866 | 1087 } |
1088 if(USES_LIST(left_type[1], list)){ | |
11283
853e93a50fe5
Cut the size of mvd_table by yet another factor of 2.
michael
parents:
11282
diff
changeset
|
1089 const int b_xy= h->mb2br_xy[left_xy[1]] + 6; |
853e93a50fe5
Cut the size of mvd_table by yet another factor of 2.
michael
parents:
11282
diff
changeset
|
1090 AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy - left_block[2]]); |
853e93a50fe5
Cut the size of mvd_table by yet another factor of 2.
michael
parents:
11282
diff
changeset
|
1091 AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy - left_block[3]]); |
10866 | 1092 }else{ |
11277
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
1093 AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]); |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
1094 AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]); |
10866 | 1095 } |
11277
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
1096 AV_ZERO16(h->mvd_cache [list][scan8[5 ]+1]); |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
1097 AV_ZERO16(h->mvd_cache [list][scan8[7 ]+1]); |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
1098 AV_ZERO16(h->mvd_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else) |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
1099 AV_ZERO16(h->mvd_cache [list][scan8[4 ]]); |
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
1100 AV_ZERO16(h->mvd_cache [list][scan8[12]]); |
10866 | 1101 if(h->slice_type_nos == FF_B_TYPE){ |
11167 | 1102 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1); |
10866 | 1103 |
1104 if(IS_DIRECT(top_type)){ | |
11203 | 1105 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_DIRECT2>>1)); |
10866 | 1106 }else if(IS_8X8(top_type)){ |
1107 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; | |
1108 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy]; | |
1109 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1]; | |
1110 }else{ | |
11203 | 1111 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1)); |
10866 | 1112 } |
1113 | |
1114 if(IS_DIRECT(left_type[0])) | |
11167 | 1115 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1; |
10866 | 1116 else if(IS_8X8(left_type[0])) |
1117 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)]; | |
1118 else | |
11167 | 1119 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_16x16>>1; |
10866 | 1120 |
1121 if(IS_DIRECT(left_type[1])) | |
11167 | 1122 h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_DIRECT2>>1; |
10866 | 1123 else if(IS_8X8(left_type[1])) |
1124 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)]; | |
1125 else | |
11167 | 1126 h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_16x16>>1; |
10866 | 1127 } |
1128 } | |
11155 | 1129 } |
10866 | 1130 if(FRAME_MBAFF){ |
1131 #define MAP_MVS\ | |
1132 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ | |
1133 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\ | |
1134 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\ | |
1135 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ | |
1136 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ | |
1137 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ | |
1138 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\ | |
1139 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\ | |
1140 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\ | |
1141 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1]) | |
1142 if(MB_FIELD){ | |
1143 #define MAP_F2F(idx, mb_type)\ | |
1144 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ | |
1145 h->ref_cache[list][idx] <<= 1;\ | |
1146 h->mv_cache[list][idx][1] /= 2;\ | |
11268
e817a3c2ec2e
Replace /2 by faster >>1 as the mvd values are now all positive.
michael
parents:
11267
diff
changeset
|
1147 h->mvd_cache[list][idx][1] >>=1;\ |
10866 | 1148 } |
1149 MAP_MVS | |
1150 #undef MAP_F2F | |
1151 }else{ | |
1152 #define MAP_F2F(idx, mb_type)\ | |
1153 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ | |
1154 h->ref_cache[list][idx] >>= 1;\ | |
1155 h->mv_cache[list][idx][1] <<= 1;\ | |
1156 h->mvd_cache[list][idx][1] <<= 1;\ | |
1157 } | |
1158 MAP_MVS | |
1159 #undef MAP_F2F | |
1160 } | |
1161 } | |
1162 } | |
1163 } | |
1164 #endif | |
1165 | |
10928 | 1166 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]); |
10907 | 1167 } |
1168 | |
10910 | 1169 /** |
1170 * | |
1171 * @returns non zero if the loop filter can be skiped | |
1172 */ | |
1173 static int fill_filter_caches(H264Context *h, int mb_type){ | |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1174 MpegEncContext * const s = &h->s; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1175 const int mb_xy= h->mb_xy; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1176 int top_xy, left_xy[2]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1177 int top_type, left_type[2]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1178 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1179 top_xy = mb_xy - (s->mb_stride << MB_FIELD); |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1180 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1181 //FIXME deblocking could skip the intra and nnz parts. |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1182 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1183 /* Wow, what a mess, why didn't they simplify the interlacing & intra |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1184 * stuff, I can't imagine that these complex rules are worth it. */ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1185 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1186 left_xy[1] = left_xy[0] = mb_xy-1; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1187 if(FRAME_MBAFF){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1188 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1189 const int curr_mb_field_flag = IS_INTERLACED(mb_type); |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1190 if(s->mb_y&1){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1191 if (left_mb_field_flag != curr_mb_field_flag) { |
11023 | 1192 left_xy[0] -= s->mb_stride; |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1193 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1194 }else{ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1195 if(curr_mb_field_flag){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1196 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1197 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1198 if (left_mb_field_flag != curr_mb_field_flag) { |
11023 | 1199 left_xy[1] += s->mb_stride; |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1200 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1201 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1202 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1203 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1204 h->top_mb_xy = top_xy; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1205 h->left_mb_xy[0] = left_xy[0]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1206 h->left_mb_xy[1] = left_xy[1]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1207 { |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1208 //for sufficiently low qp, filtering wouldn't do anything |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1209 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1210 int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1211 int qp = s->current_picture.qscale_table[mb_xy]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1212 if(qp <= qp_thresh |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1213 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh) |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1214 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1215 if(!FRAME_MBAFF) |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1216 return 1; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1217 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh) |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1218 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh)) |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1219 return 1; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1220 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1221 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1222 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1223 if(h->deblocking_filter == 2){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1224 h->top_type = top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1225 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1226 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1227 }else{ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1228 h->top_type = top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1229 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1230 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1231 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1232 if(IS_INTRA(mb_type)) |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1233 return 0; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1234 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1235 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1236 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); |
11203 | 1237 AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]); |
1238 AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]); | |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1239 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1240 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1241 h->cbp= h->cbp_table[mb_xy]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1242 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1243 { |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1244 int list; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1245 for(list=0; list<h->list_count; list++){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1246 int8_t *ref; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1247 int y, b_stride; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1248 int16_t (*mv_dst)[2]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1249 int16_t (*mv_src)[2]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1250 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1251 if(!USES_LIST(mb_type, list)){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1252 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); |
11203 | 1253 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
1254 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u); | |
1255 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u); | |
1256 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u); | |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1257 continue; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1258 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1259 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1260 ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1261 { |
11165 | 1262 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); |
11203 | 1263 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
1264 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); | |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1265 ref += h->b8_stride; |
11203 | 1266 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
1267 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); | |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1268 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1269 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1270 b_stride = h->b_stride; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1271 mv_dst = &h->mv_cache[list][scan8[0]]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1272 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1273 for(y=0; y<4; y++){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1274 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride); |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1275 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1276 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1277 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1278 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1279 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1280 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1281 /* |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1282 0 . T T. T T T T |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1283 1 L . .L . . . . |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1284 2 L . .L . . . . |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1285 3 . T TL . . . . |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1286 4 L . .L . . . . |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1287 5 L . .. . . . . |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1288 */ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1289 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1290 if(top_type){ |
11203 | 1291 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1292 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1293 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1294 if(left_type[0]){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1295 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1296 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1297 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1298 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1299 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1300 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1301 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1302 if(!CABAC && h->pps.transform_8x8_mode){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1303 if(IS_8x8DCT(top_type)){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1304 h->non_zero_count_cache[4+8*0]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1305 h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1306 h->non_zero_count_cache[6+8*0]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1307 h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1308 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1309 if(IS_8x8DCT(left_type[0])){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1310 h->non_zero_count_cache[3+8*1]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1311 h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1312 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1313 if(IS_8x8DCT(left_type[1])){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1314 h->non_zero_count_cache[3+8*3]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1315 h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1316 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1317 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1318 if(IS_8x8DCT(mb_type)){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1319 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1320 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1321 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1322 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1323 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1324 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1325 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1326 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1327 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1328 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1329 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1330 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1331 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1332 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1333 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1334 int list; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1335 for(list=0; list<h->list_count; list++){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1336 if(USES_LIST(top_type, list)){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1337 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1338 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; |
11165 | 1339 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1340 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1341 h->ref_cache[list][scan8[0] + 0 - 1*8]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1342 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1343 h->ref_cache[list][scan8[0] + 2 - 1*8]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1344 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1345 }else{ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1346 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); |
11203 | 1347 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1348 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1349 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1350 if(!IS_INTERLACED(mb_type^left_type[0])){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1351 if(USES_LIST(left_type[0], list)){ |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1352 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1353 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; |
11165 | 1354 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); |
11203 | 1355 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]); |
1356 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]); | |
1357 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]); | |
1358 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]); | |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1359 h->ref_cache[list][scan8[0] - 1 + 0 ]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1360 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1361 h->ref_cache[list][scan8[0] - 1 +16 ]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1362 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]]; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1363 }else{ |
11203 | 1364 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]); |
1365 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]); | |
1366 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]); | |
1367 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]); | |
11022
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1368 h->ref_cache[list][scan8[0] - 1 + 0 ]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1369 h->ref_cache[list][scan8[0] - 1 + 8 ]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1370 h->ref_cache[list][scan8[0] - 1 + 16 ]= |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1371 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1372 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1373 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1374 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1375 } |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1376 |
44529d4292ce
Split fill_caches() between loopfilter & decode, the 2 no longer where common
michael
parents:
11020
diff
changeset
|
1377 return 0; |
10907 | 1378 } |
1379 | |
10866 | 1380 /** |
1381 * gets the predicted intra4x4 prediction mode. | |
1382 */ | |
1383 static inline int pred_intra_mode(H264Context *h, int n){ | |
1384 const int index8= scan8[n]; | |
1385 const int left= h->intra4x4_pred_mode_cache[index8 - 1]; | |
1386 const int top = h->intra4x4_pred_mode_cache[index8 - 8]; | |
1387 const int min= FFMIN(left, top); | |
1388 | |
1389 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min); | |
1390 | |
1391 if(min<0) return DC_PRED; | |
1392 else return min; | |
1393 } | |
1394 | |
1395 static inline void write_back_non_zero_count(H264Context *h){ | |
1396 const int mb_xy= h->mb_xy; | |
1397 | |
11002
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
1398 AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]); |
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
1399 AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]); |
11203 | 1400 AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]); |
1401 AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]); | |
11002
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
1402 AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]); |
10866 | 1403 } |
1404 | |
1405 static inline void write_back_motion(H264Context *h, int mb_type){ | |
1406 MpegEncContext * const s = &h->s; | |
1407 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; | |
1408 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride; | |
1409 int list; | |
1410 | |
1411 if(!USES_LIST(mb_type, 0)) | |
1412 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1); | |
1413 | |
1414 for(list=0; list<h->list_count; list++){ | |
11002
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
1415 int y, b_stride; |
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
1416 int16_t (*mv_dst)[2]; |
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
1417 int16_t (*mv_src)[2]; |
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
1418 |
10866 | 1419 if(!USES_LIST(mb_type, list)) |
1420 continue; | |
1421 | |
11002
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
1422 b_stride = h->b_stride; |
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
1423 mv_dst = &s->current_picture.motion_val[list][b_xy]; |
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
1424 mv_src = &h->mv_cache[list][scan8[0]]; |
10866 | 1425 for(y=0; y<4; y++){ |
11002
1c8892d7a090
H.264: Use 64-/128-bit write-combining macros for copies
astrange
parents:
10987
diff
changeset
|
1426 AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y); |
10866 | 1427 } |
10893
2aafcafbe1f0
Replace cabac checks in inline functions from h264.h with constants.
michael
parents:
10883
diff
changeset
|
1428 if( CABAC ) { |
11283
853e93a50fe5
Cut the size of mvd_table by yet another factor of 2.
michael
parents:
11282
diff
changeset
|
1429 uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]]; |
11277
c12d6c6c027e
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
michael
parents:
11268
diff
changeset
|
1430 uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; |
10866 | 1431 if(IS_SKIP(mb_type)) |
11283
853e93a50fe5
Cut the size of mvd_table by yet another factor of 2.
michael
parents:
11282
diff
changeset
|
1432 AV_ZERO128(mvd_dst); |
853e93a50fe5
Cut the size of mvd_table by yet another factor of 2.
michael
parents:
11282
diff
changeset
|
1433 else{ |
853e93a50fe5
Cut the size of mvd_table by yet another factor of 2.
michael
parents:
11282
diff
changeset
|
1434 AV_COPY64(mvd_dst, mvd_src + 8*3); |
11284
aaca4b58880f
unroll tiny and trivial loop. Same speed but clearer.
michael
parents:
11283
diff
changeset
|
1435 AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0); |
aaca4b58880f
unroll tiny and trivial loop. Same speed but clearer.
michael
parents:
11283
diff
changeset
|
1436 AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1); |
aaca4b58880f
unroll tiny and trivial loop. Same speed but clearer.
michael
parents:
11283
diff
changeset
|
1437 AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2); |
10866 | 1438 } |
1439 } | |
1440 | |
1441 { | |
1442 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; | |
1443 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]]; | |
1444 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]]; | |
1445 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]]; | |
1446 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]]; | |
1447 } | |
1448 } | |
1449 | |
10893
2aafcafbe1f0
Replace cabac checks in inline functions from h264.h with constants.
michael
parents:
10883
diff
changeset
|
1450 if(h->slice_type_nos == FF_B_TYPE && CABAC){ |
10866 | 1451 if(IS_8X8(mb_type)){ |
1452 uint8_t *direct_table = &h->direct_table[b8_xy]; | |
11167 | 1453 direct_table[1+0*h->b8_stride] = h->sub_mb_type[1]>>1; |
1454 direct_table[0+1*h->b8_stride] = h->sub_mb_type[2]>>1; | |
1455 direct_table[1+1*h->b8_stride] = h->sub_mb_type[3]>>1; | |
10866 | 1456 } |
1457 } | |
1458 } | |
1459 | |
1460 static inline int get_dct8x8_allowed(H264Context *h){ | |
1461 if(h->sps.direct_8x8_inference_flag) | |
11203 | 1462 return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); |
10866 | 1463 else |
11203 | 1464 return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); |
10866 | 1465 } |
1466 | |
1467 /** | |
1468 * decodes a P_SKIP or B_SKIP macroblock | |
1469 */ | |
1470 static void decode_mb_skip(H264Context *h){ | |
1471 MpegEncContext * const s = &h->s; | |
1472 const int mb_xy= h->mb_xy; | |
1473 int mb_type=0; | |
1474 | |
10906 | 1475 memset(h->non_zero_count[mb_xy], 0, 32); |
10866 | 1476 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui |
1477 | |
1478 if(MB_FIELD) | |
1479 mb_type|= MB_TYPE_INTERLACED; | |
1480 | |
1481 if( h->slice_type_nos == FF_B_TYPE ) | |
1482 { | |
1483 // just for fill_caches. pred_direct_motion will set the real mb_type | |
11145
8805efb4c3fd
Move setting MB_TYPE_L0L1 for direct MBs up, this is simpler.
michael
parents:
11144
diff
changeset
|
1484 mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
1485 if(h->direct_spatial_mv_pred){ |
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
1486 fill_decode_neighbors(h, mb_type); |
10907 | 1487 fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
1488 } |
10866 | 1489 ff_h264_pred_direct_motion(h, &mb_type); |
1490 mb_type|= MB_TYPE_SKIP; | |
1491 } | |
1492 else | |
1493 { | |
1494 int mx, my; | |
1495 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; | |
1496 | |
11183
d1a855cb0a0c
Split setting neighboring MBs from fill_decode_caches()
michael
parents:
11167
diff
changeset
|
1497 fill_decode_neighbors(h, mb_type); |
10907 | 1498 fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... |
10866 | 1499 pred_pskip_motion(h, &mx, &my); |
1500 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); | |
1501 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); | |
1502 } | |
1503 | |
1504 write_back_motion(h, mb_type); | |
1505 s->current_picture.mb_type[mb_xy]= mb_type; | |
1506 s->current_picture.qscale_table[mb_xy]= s->qscale; | |
1507 h->slice_table[ mb_xy ]= h->slice_num; | |
1508 h->prev_mb_skipped= 1; | |
1509 } | |
1510 | |
10883
4701b9355d93
Add forgotten include of h264_mvpred.h to h264.h.
michael
parents:
10869
diff
changeset
|
1511 #include "h264_mvpred.h" //For pred_pskip_motion() |
4701b9355d93
Add forgotten include of h264_mvpred.h to h264.h.
michael
parents:
10869
diff
changeset
|
1512 |
7760 | 1513 #endif /* AVCODEC_H264_H */ |