Mercurial > libavcodec.hg
annotate h264_loopfilter.c @ 10960:10759fd39860 libavcodec
Gcc idiocy fixes related to filter_mb_edge*.
Change order of operands as gcc uses a hardcoded register per operand it seems
even for static functions
thus reducing unneeded moved (now functions try to pass the same argument in
the same spot).
Change signed int to unsigned int for array indexes as signed requires signed
extension while unsigned is free.
move the +52 up and merge it where it will end as a lea instruction, gcc always
splits the 52 out there turning the free +52 into an expensive one otherwise.
The changed code becomes a little faster.
author | michael |
---|---|
date | Fri, 22 Jan 2010 01:59:17 +0000 |
parents | 304db572a69a |
children | 34a65026fa06 |
rev | line source |
---|---|
10854 | 1 /* |
2 * H.26L/H.264/AVC/JVT/14496-10/... loop filter | |
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 /** | |
23 * @file libavcodec/h264_loopfilter.c | |
24 * H.264 / AVC / MPEG4 part10 loop filter. | |
25 * @author Michael Niedermayer <michaelni@gmx.at> | |
26 */ | |
27 | |
28 #include "internal.h" | |
29 #include "dsputil.h" | |
30 #include "avcodec.h" | |
31 #include "mpegvideo.h" | |
32 #include "h264.h" | |
33 #include "mathops.h" | |
34 #include "rectangle.h" | |
35 | |
36 //#undef NDEBUG | |
37 #include <assert.h> | |
38 | |
39 /* Deblocking filter (p153) */ | |
40 static const uint8_t alpha_table[52*3] = { | |
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
46 0, 0, 0, 0, 0, 0, 4, 4, 5, 6, | |
47 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, | |
48 25, 28, 32, 36, 40, 45, 50, 56, 63, 71, | |
49 80, 90,101,113,127,144,162,182,203,226, | |
50 255,255, | |
51 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
52 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
53 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
54 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
55 }; | |
56 static const uint8_t beta_table[52*3] = { | |
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
62 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, | |
63 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, | |
64 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, | |
65 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, | |
66 18, 18, | |
67 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
68 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
69 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
70 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
71 }; | |
72 static const uint8_t tc0_table[52*3][4] = { | |
73 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
74 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
75 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
76 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
77 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
78 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
79 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
80 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
81 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
82 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
83 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
84 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 }, | |
85 {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 }, | |
86 {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, | |
87 {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 }, | |
88 {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 }, | |
89 {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 }, | |
90 {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 }, | |
91 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
92 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
93 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
94 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
95 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
96 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
97 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
98 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
99 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
100 }; | |
101 | |
10960 | 102 static void av_noinline filter_mb_edgev( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h) { |
103 const unsigned int index_a = 52 + qp + h->slice_alpha_c0_offset; | |
104 const int alpha = alpha_table[index_a]; | |
10854 | 105 const int beta = (beta_table+52)[qp + h->slice_beta_offset]; |
106 if (alpha ==0 || beta == 0) return; | |
107 | |
108 if( bS[0] < 4 ) { | |
109 int8_t tc[4]; | |
10960 | 110 tc[0] = tc0_table[index_a][bS[0]]; |
111 tc[1] = tc0_table[index_a][bS[1]]; | |
112 tc[2] = tc0_table[index_a][bS[2]]; | |
113 tc[3] = tc0_table[index_a][bS[3]]; | |
10854 | 114 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); |
115 } else { | |
116 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta); | |
117 } | |
118 } | |
10960 | 119 static void av_noinline filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { |
120 const unsigned int index_a = 52 + qp + h->slice_alpha_c0_offset; | |
121 const int alpha = alpha_table[index_a]; | |
10854 | 122 const int beta = (beta_table+52)[qp + h->slice_beta_offset]; |
123 if (alpha ==0 || beta == 0) return; | |
124 | |
125 if( bS[0] < 4 ) { | |
126 int8_t tc[4]; | |
10960 | 127 tc[0] = tc0_table[index_a][bS[0]]+1; |
128 tc[1] = tc0_table[index_a][bS[1]]+1; | |
129 tc[2] = tc0_table[index_a][bS[2]]+1; | |
130 tc[3] = tc0_table[index_a][bS[3]]+1; | |
10854 | 131 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); |
132 } else { | |
133 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); | |
134 } | |
135 } | |
136 | |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
137 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) { |
10854 | 138 int i; |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
139 for( i = 0; i < 8; i++, pix += stride) { |
10854 | 140 int index_a; |
141 int alpha; | |
142 int beta; | |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
143 const int bS_index = (i >> 1) * bsi; |
10854 | 144 |
145 if( bS[bS_index] == 0 ) { | |
146 continue; | |
147 } | |
148 | |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
149 index_a = qp + h->slice_alpha_c0_offset; |
10854 | 150 alpha = (alpha_table+52)[index_a]; |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
151 beta = (beta_table+52)[qp + h->slice_beta_offset]; |
10854 | 152 |
153 if( bS[bS_index] < 4 ) { | |
154 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]]; | |
155 const int p0 = pix[-1]; | |
156 const int p1 = pix[-2]; | |
157 const int p2 = pix[-3]; | |
158 const int q0 = pix[0]; | |
159 const int q1 = pix[1]; | |
160 const int q2 = pix[2]; | |
161 | |
162 if( FFABS( p0 - q0 ) < alpha && | |
163 FFABS( p1 - p0 ) < beta && | |
164 FFABS( q1 - q0 ) < beta ) { | |
165 int tc = tc0; | |
166 int i_delta; | |
167 | |
168 if( FFABS( p2 - p0 ) < beta ) { | |
169 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 ); | |
170 tc++; | |
171 } | |
172 if( FFABS( q2 - q0 ) < beta ) { | |
173 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 ); | |
174 tc++; | |
175 } | |
176 | |
177 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); | |
178 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ | |
179 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ | |
180 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); | |
181 } | |
182 }else{ | |
183 const int p0 = pix[-1]; | |
184 const int p1 = pix[-2]; | |
185 const int p2 = pix[-3]; | |
186 | |
187 const int q0 = pix[0]; | |
188 const int q1 = pix[1]; | |
189 const int q2 = pix[2]; | |
190 | |
191 if( FFABS( p0 - q0 ) < alpha && | |
192 FFABS( p1 - p0 ) < beta && | |
193 FFABS( q1 - q0 ) < beta ) { | |
194 | |
195 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ | |
196 if( FFABS( p2 - p0 ) < beta) | |
197 { | |
198 const int p3 = pix[-4]; | |
199 /* p0', p1', p2' */ | |
200 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; | |
201 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; | |
202 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; | |
203 } else { | |
204 /* p0' */ | |
205 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | |
206 } | |
207 if( FFABS( q2 - q0 ) < beta) | |
208 { | |
209 const int q3 = pix[3]; | |
210 /* q0', q1', q2' */ | |
211 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; | |
212 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; | |
213 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; | |
214 } else { | |
215 /* q0' */ | |
216 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | |
217 } | |
218 }else{ | |
219 /* p0', q0' */ | |
220 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | |
221 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | |
222 } | |
223 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); | |
224 } | |
225 } | |
226 } | |
227 } | |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
228 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) { |
10854 | 229 int i; |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
230 for( i = 0; i < 4; i++, pix += stride) { |
10854 | 231 int index_a; |
232 int alpha; | |
233 int beta; | |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
234 const int bS_index = i*bsi; |
10854 | 235 |
236 if( bS[bS_index] == 0 ) { | |
237 continue; | |
238 } | |
239 | |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
240 index_a = qp + h->slice_alpha_c0_offset; |
10854 | 241 alpha = (alpha_table+52)[index_a]; |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
242 beta = (beta_table+52)[qp + h->slice_beta_offset]; |
10854 | 243 |
244 if( bS[bS_index] < 4 ) { | |
245 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1; | |
246 const int p0 = pix[-1]; | |
247 const int p1 = pix[-2]; | |
248 const int q0 = pix[0]; | |
249 const int q1 = pix[1]; | |
250 | |
251 if( FFABS( p0 - q0 ) < alpha && | |
252 FFABS( p1 - p0 ) < beta && | |
253 FFABS( q1 - q0 ) < beta ) { | |
254 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); | |
255 | |
256 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ | |
257 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ | |
258 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); | |
259 } | |
260 }else{ | |
261 const int p0 = pix[-1]; | |
262 const int p1 = pix[-2]; | |
263 const int q0 = pix[0]; | |
264 const int q1 = pix[1]; | |
265 | |
266 if( FFABS( p0 - q0 ) < alpha && | |
267 FFABS( p1 - p0 ) < beta && | |
268 FFABS( q1 - q0 ) < beta ) { | |
269 | |
270 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ | |
271 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ | |
272 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); | |
273 } | |
274 } | |
275 } | |
276 } | |
277 | |
10960 | 278 static void av_noinline filter_mb_edgeh( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { |
279 const unsigned int index_a = 52 + qp + h->slice_alpha_c0_offset; | |
280 const int alpha = alpha_table[index_a]; | |
10854 | 281 const int beta = (beta_table+52)[qp + h->slice_beta_offset]; |
282 if (alpha ==0 || beta == 0) return; | |
283 | |
284 if( bS[0] < 4 ) { | |
285 int8_t tc[4]; | |
10960 | 286 tc[0] = tc0_table[index_a][bS[0]]; |
287 tc[1] = tc0_table[index_a][bS[1]]; | |
288 tc[2] = tc0_table[index_a][bS[2]]; | |
289 tc[3] = tc0_table[index_a][bS[3]]; | |
10854 | 290 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); |
291 } else { | |
292 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta); | |
293 } | |
294 } | |
295 | |
10960 | 296 static void av_noinline filter_mb_edgech( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { |
297 const unsigned int index_a = 52 + qp + h->slice_alpha_c0_offset; | |
298 const int alpha = alpha_table[index_a]; | |
10854 | 299 const int beta = (beta_table+52)[qp + h->slice_beta_offset]; |
300 if (alpha ==0 || beta == 0) return; | |
301 | |
302 if( bS[0] < 4 ) { | |
303 int8_t tc[4]; | |
10960 | 304 tc[0] = tc0_table[index_a][bS[0]]+1; |
305 tc[1] = tc0_table[index_a][bS[1]]+1; | |
306 tc[2] = tc0_table[index_a][bS[2]]+1; | |
307 tc[3] = tc0_table[index_a][bS[3]]+1; | |
10854 | 308 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); |
309 } else { | |
310 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); | |
311 } | |
312 } | |
313 | |
314 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { | |
315 MpegEncContext * const s = &h->s; | |
316 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD; | |
317 int mb_xy, mb_type; | |
318 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; | |
319 | |
320 mb_xy = h->mb_xy; | |
321 | |
322 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff || | |
10922 | 323 (h->deblocking_filter == 2 && (h->slice_num != h->slice_table[h->top_mb_xy] || |
324 h->slice_num != h->slice_table[mb_xy - 1]))) { | |
10854 | 325 ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); |
326 return; | |
327 } | |
328 assert(!FRAME_MBAFF); | |
329 | |
330 mb_type = s->current_picture.mb_type[mb_xy]; | |
331 qp = s->current_picture.qscale_table[mb_xy]; | |
332 qp0 = s->current_picture.qscale_table[mb_xy-1]; | |
333 qp1 = s->current_picture.qscale_table[h->top_mb_xy]; | |
334 qpc = get_chroma_qp( h, 0, qp ); | |
335 qpc0 = get_chroma_qp( h, 0, qp0 ); | |
336 qpc1 = get_chroma_qp( h, 0, qp1 ); | |
337 qp0 = (qp + qp0 + 1) >> 1; | |
338 qp1 = (qp + qp1 + 1) >> 1; | |
339 qpc0 = (qpc + qpc0 + 1) >> 1; | |
340 qpc1 = (qpc + qpc1 + 1) >> 1; | |
341 qp_thresh = 15 - h->slice_alpha_c0_offset; | |
342 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh && | |
343 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh) | |
344 return; | |
345 | |
346 if( IS_INTRA(mb_type) ) { | |
347 int16_t bS4[4] = {4,4,4,4}; | |
348 int16_t bS3[4] = {3,3,3,3}; | |
349 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4; | |
350 if( IS_8x8DCT(mb_type) ) { | |
10960 | 351 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h); |
352 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); | |
353 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); | |
354 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); | |
10854 | 355 } else { |
10960 | 356 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h); |
357 filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h); | |
358 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); | |
359 filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h); | |
360 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); | |
361 filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h); | |
362 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); | |
363 filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h); | |
10854 | 364 } |
10960 | 365 filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h); |
366 filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h); | |
367 filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h); | |
368 filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h); | |
369 filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); | |
370 filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h); | |
371 filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); | |
372 filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); | |
10854 | 373 return; |
374 } else { | |
375 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]); | |
376 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS; | |
377 int edges; | |
378 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) { | |
379 edges = 4; | |
380 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL; | |
381 } else { | |
382 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : | |
383 (mb_type & MB_TYPE_16x8) ? 1 : 0; | |
384 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) | |
385 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16)) | |
386 ? 3 : 0; | |
387 int step = IS_8x8DCT(mb_type) ? 2 : 1; | |
388 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; | |
389 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, | |
390 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE); | |
391 } | |
392 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) ) | |
393 bSv[0][0] = 0x0004000400040004ULL; | |
394 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) ) | |
395 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL; | |
396 | |
397 #define FILTER(hv,dir,edge)\ | |
398 if(bSv[dir][edge]) {\ | |
10960 | 399 filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\ |
10854 | 400 if(!(edge&1)) {\ |
10960 | 401 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ |
402 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ | |
10854 | 403 }\ |
404 } | |
405 if( edges == 1 ) { | |
406 FILTER(v,0,0); | |
407 FILTER(h,1,0); | |
408 } else if( IS_8x8DCT(mb_type) ) { | |
409 FILTER(v,0,0); | |
410 FILTER(v,0,2); | |
411 FILTER(h,1,0); | |
412 FILTER(h,1,2); | |
413 } else { | |
414 FILTER(v,0,0); | |
415 FILTER(v,0,1); | |
416 FILTER(v,0,2); | |
417 FILTER(v,0,3); | |
418 FILTER(h,1,0); | |
419 FILTER(h,1,1); | |
420 FILTER(h,1,2); | |
421 FILTER(h,1,3); | |
422 } | |
423 #undef FILTER | |
424 } | |
425 } | |
426 | |
427 | |
428 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) { | |
429 MpegEncContext * const s = &h->s; | |
430 int edge; | |
431 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; | |
432 const int mbm_type = s->current_picture.mb_type[mbm_xy]; | |
433 | |
434 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP)) | |
435 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4; | |
436 // how often to recheck mv-based bS when iterating between edges | |
10958
304db572a69a
Make calculation of mask_edge free of branches, faster of course but probably
michael
parents:
10950
diff
changeset
|
437 static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1}, |
304db572a69a
Make calculation of mask_edge free of branches, faster of course but probably
michael
parents:
10950
diff
changeset
|
438 {0,3,1,1,3,3,3,3}}; |
304db572a69a
Make calculation of mask_edge free of branches, faster of course but probably
michael
parents:
10950
diff
changeset
|
439 const int mask_edge = mask_edge_tab[dir][(mb_type>>3)&7]; |
10854 | 440 // how often to recheck mv-based bS when iterating along each edge |
441 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)); | |
10942 | 442 int start = h->slice_table[mbm_xy] == 0xFFFF |
443 || first_vertical_edge_done | |
444 || (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_num); | |
10854 | 445 |
446 | |
447 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0 | |
10946 | 448 && IS_INTERLACED(mbm_type&~mb_type) |
10854 | 449 ) { |
450 // This is a special case in the norm where the filtering must | |
451 // be done twice (one each of the field) even if we are in a | |
452 // frame macroblock. | |
453 // | |
454 unsigned int tmp_linesize = 2 * linesize; | |
455 unsigned int tmp_uvlinesize = 2 * uvlinesize; | |
456 int mbn_xy = mb_xy - 2 * s->mb_stride; | |
10949
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
457 int j; |
10854 | 458 |
459 for(j=0; j<2; j++, mbn_xy += s->mb_stride){ | |
10950
4776a56132e1
H.264: Declare bS with DECLARE_ALIGNED_8 for uint64_t casts.
astrange
parents:
10949
diff
changeset
|
460 DECLARE_ALIGNED_8(int16_t, bS[4]); |
10949
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
461 int qp; |
10946 | 462 if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) { |
10947 | 463 *(uint64_t*)bS= 0x0003000300030003ULL; |
10854 | 464 } else { |
10949
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
465 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 4+3*8; |
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
466 int i; |
10854 | 467 for( i = 0; i < 4; i++ ) { |
10949
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
468 bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]); |
10854 | 469 } |
470 } | |
471 // Do not use s->qscale as luma quantizer because it has not the same | |
472 // value in IPCM macroblocks. | |
473 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; | |
474 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); | |
475 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } | |
10960 | 476 filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h ); |
477 filter_mb_edgech( &img_cb[j*uvlinesize], tmp_uvlinesize, bS, | |
478 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h); | |
479 filter_mb_edgech( &img_cr[j*uvlinesize], tmp_uvlinesize, bS, | |
480 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h); | |
10854 | 481 } |
482 | |
483 start = 1; | |
484 } | |
485 | |
486 /* Calculate bS */ | |
487 for( edge = start; edge < edges; edge++ ) { | |
488 /* mbn_xy: neighbor macroblock */ | |
489 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; | |
490 const int mbn_type = s->current_picture.mb_type[mbn_xy]; | |
10950
4776a56132e1
H.264: Declare bS with DECLARE_ALIGNED_8 for uint64_t casts.
astrange
parents:
10949
diff
changeset
|
491 DECLARE_ALIGNED_8(int16_t, bS[4]); |
10854 | 492 int qp; |
493 | |
494 if( (edge&1) && IS_8x8DCT(mb_type) ) | |
495 continue; | |
496 | |
10945
0d93bbc17950
Simplify and optimize intra code in h264_loopfilter.c
michael
parents:
10942
diff
changeset
|
497 if( IS_INTRA(mb_type|mbn_type)) { |
0d93bbc17950
Simplify and optimize intra code in h264_loopfilter.c
michael
parents:
10942
diff
changeset
|
498 *(uint64_t*)bS= 0x0003000300030003ULL; |
10854 | 499 if (edge == 0) { |
10945
0d93bbc17950
Simplify and optimize intra code in h264_loopfilter.c
michael
parents:
10942
diff
changeset
|
500 if ( (!IS_INTERLACED(mb_type|mbm_type)) |
10854 | 501 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0)) |
10945
0d93bbc17950
Simplify and optimize intra code in h264_loopfilter.c
michael
parents:
10942
diff
changeset
|
502 ) |
0d93bbc17950
Simplify and optimize intra code in h264_loopfilter.c
michael
parents:
10942
diff
changeset
|
503 *(uint64_t*)bS= 0x0004000400040004ULL; |
10854 | 504 } |
505 } else { | |
506 int i, l; | |
507 int mv_done; | |
508 | |
509 if( edge & mask_edge ) { | |
10947 | 510 *(uint64_t*)bS= 0; |
10854 | 511 mv_done = 1; |
512 } | |
513 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) { | |
10947 | 514 *(uint64_t*)bS= 0x0001000100010001ULL; |
10854 | 515 mv_done = 1; |
516 } | |
517 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) { | |
518 int b_idx= 8 + 4 + edge * (dir ? 8:1); | |
519 int bn_idx= b_idx - (dir ? 8:1); | |
520 int v = 0; | |
521 | |
522 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) { | |
10913
497929e9d912
Perform reference remapping at fill_cache() time instead of in the
michael
parents:
10910
diff
changeset
|
523 v |= h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] | |
10901
2a5c3d89201d
Another microopt, 4 cpu cycles for avoidance of FFABS().
michael
parents:
10899
diff
changeset
|
524 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | |
10854 | 525 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit; |
526 } | |
527 | |
528 if(h->slice_type_nos == FF_B_TYPE && v){ | |
529 v=0; | |
530 for( l = 0; !v && l < 2; l++ ) { | |
531 int ln= 1-l; | |
10913
497929e9d912
Perform reference remapping at fill_cache() time instead of in the
michael
parents:
10910
diff
changeset
|
532 v |= h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] | |
10901
2a5c3d89201d
Another microopt, 4 cpu cycles for avoidance of FFABS().
michael
parents:
10899
diff
changeset
|
533 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | |
10854 | 534 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit; |
535 } | |
536 } | |
537 | |
538 bS[0] = bS[1] = bS[2] = bS[3] = v; | |
539 mv_done = 1; | |
540 } | |
541 else | |
542 mv_done = 0; | |
543 | |
544 for( i = 0; i < 4; i++ ) { | |
545 int x = dir == 0 ? edge : i; | |
546 int y = dir == 0 ? i : edge; | |
547 int b_idx= 8 + 4 + x + 8*y; | |
548 int bn_idx= b_idx - (dir ? 8:1); | |
549 | |
550 if( h->non_zero_count_cache[b_idx] | | |
551 h->non_zero_count_cache[bn_idx] ) { | |
552 bS[i] = 2; | |
553 } | |
554 else if(!mv_done) | |
555 { | |
556 bS[i] = 0; | |
557 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) { | |
10913
497929e9d912
Perform reference remapping at fill_cache() time instead of in the
michael
parents:
10910
diff
changeset
|
558 if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] | |
10902
1e41e6ab9a18
Apply last 2 optimizations to similar code i forgot.
michael
parents:
10901
diff
changeset
|
559 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | |
10854 | 560 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) { |
561 bS[i] = 1; | |
562 break; | |
563 } | |
564 } | |
565 | |
566 if(h->slice_type_nos == FF_B_TYPE && bS[i]){ | |
567 bS[i] = 0; | |
568 for( l = 0; l < 2; l++ ) { | |
569 int ln= 1-l; | |
10913
497929e9d912
Perform reference remapping at fill_cache() time instead of in the
michael
parents:
10910
diff
changeset
|
570 if( h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] | |
10902
1e41e6ab9a18
Apply last 2 optimizations to similar code i forgot.
michael
parents:
10901
diff
changeset
|
571 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | |
10854 | 572 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) { |
573 bS[i] = 1; | |
574 break; | |
575 } | |
576 } | |
577 } | |
578 } | |
579 } | |
580 | |
581 if(bS[0]+bS[1]+bS[2]+bS[3] == 0) | |
582 continue; | |
583 } | |
584 | |
585 /* Filter edge */ | |
586 // Do not use s->qscale as luma quantizer because it has not the same | |
587 // value in IPCM macroblocks. | |
588 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; | |
10906 | 589 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]); |
10854 | 590 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); |
10904 | 591 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } |
10854 | 592 if( dir == 0 ) { |
10960 | 593 filter_mb_edgev( &img_y[4*edge], linesize, bS, qp, h ); |
10854 | 594 if( (edge&1) == 0 ) { |
10948
c80f0dfbf47d
Only calculate the second chroma qp if it differs from the firstin the main
michael
parents:
10947
diff
changeset
|
595 int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; |
10960 | 596 filter_mb_edgecv( &img_cb[2*edge], uvlinesize, bS, qp, h); |
10948
c80f0dfbf47d
Only calculate the second chroma qp if it differs from the firstin the main
michael
parents:
10947
diff
changeset
|
597 if(h->pps.chroma_qp_diff) |
c80f0dfbf47d
Only calculate the second chroma qp if it differs from the firstin the main
michael
parents:
10947
diff
changeset
|
598 qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; |
10960 | 599 filter_mb_edgecv( &img_cr[2*edge], uvlinesize, bS, qp, h); |
10854 | 600 } |
601 } else { | |
10960 | 602 filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h ); |
10854 | 603 if( (edge&1) == 0 ) { |
10948
c80f0dfbf47d
Only calculate the second chroma qp if it differs from the firstin the main
michael
parents:
10947
diff
changeset
|
604 int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; |
10960 | 605 filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, qp, h); |
10948
c80f0dfbf47d
Only calculate the second chroma qp if it differs from the firstin the main
michael
parents:
10947
diff
changeset
|
606 if(h->pps.chroma_qp_diff) |
c80f0dfbf47d
Only calculate the second chroma qp if it differs from the firstin the main
michael
parents:
10947
diff
changeset
|
607 qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; |
10960 | 608 filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, qp, h); |
10854 | 609 } |
610 } | |
611 } | |
612 } | |
613 | |
614 void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { | |
615 MpegEncContext * const s = &h->s; | |
616 const int mb_xy= mb_x + mb_y*s->mb_stride; | |
617 const int mb_type = s->current_picture.mb_type[mb_xy]; | |
618 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4; | |
619 int first_vertical_edge_done = 0; | |
620 av_unused int dir; | |
10906 | 621 int list; |
10854 | 622 |
623 if (FRAME_MBAFF | |
624 // left mb is in picture | |
625 && h->slice_table[mb_xy-1] != 0xFFFF | |
626 // and current and left pair do not have the same interlaced type | |
10946 | 627 && IS_INTERLACED(mb_type^s->current_picture.mb_type[mb_xy-1]) |
10854 | 628 // and left mb is in the same slice if deblocking_filter == 2 |
10922 | 629 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_num)) { |
10854 | 630 /* First vertical edge is different in MBAFF frames |
631 * There are 8 different bS to compute and 2 different Qp | |
632 */ | |
633 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride; | |
634 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride }; | |
10950
4776a56132e1
H.264: Declare bS with DECLARE_ALIGNED_8 for uint64_t casts.
astrange
parents:
10949
diff
changeset
|
635 DECLARE_ALIGNED_8(int16_t, bS[8]); |
10854 | 636 int qp[2]; |
637 int bqp[2]; | |
638 int rqp[2]; | |
639 int mb_qp, mbn0_qp, mbn1_qp; | |
640 int i; | |
641 first_vertical_edge_done = 1; | |
642 | |
643 if( IS_INTRA(mb_type) ) | |
10947 | 644 *(uint64_t*)&bS[0]= |
645 *(uint64_t*)&bS[4]= 0x0004000400040004ULL; | |
10854 | 646 else { |
647 for( i = 0; i < 8; i++ ) { | |
648 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1]; | |
649 | |
650 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) | |
651 bS[i] = 4; | |
652 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 || | |
653 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ? | |
654 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2)) | |
655 : | |
10909
f4cf3960b8c6
Reorganize how values are stored in h->non_zero_count.
michael
parents:
10906
diff
changeset
|
656 h->non_zero_count[mbn_xy][7+(MB_FIELD ? (i&3) : (i>>2)+(mb_y&1)*2)*8])) |
10854 | 657 bS[i] = 2; |
658 else | |
659 bS[i] = 1; | |
660 } | |
661 } | |
662 | |
663 mb_qp = s->current_picture.qscale_table[mb_xy]; | |
664 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]]; | |
665 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]]; | |
666 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1; | |
667 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) + | |
668 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1; | |
669 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) + | |
670 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1; | |
671 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1; | |
672 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) + | |
673 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1; | |
674 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) + | |
675 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1; | |
676 | |
677 /* Filter edge */ | |
678 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize); | |
679 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } | |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
680 if(MB_FIELD){ |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
681 filter_mb_mbaff_edgev ( h, img_y , linesize, bS , 1, qp [0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
682 filter_mb_mbaff_edgev ( h, img_y + 8* linesize, linesize, bS+4, 1, qp [1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
683 filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
684 filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
685 filter_mb_mbaff_edgecv( h, img_cr, uvlinesize, bS , 1, rqp[0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
686 filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
687 }else{ |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
688 filter_mb_mbaff_edgev ( h, img_y , 2* linesize, bS , 2, qp [0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
689 filter_mb_mbaff_edgev ( h, img_y + linesize, 2* linesize, bS+1, 2, qp [1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
690 filter_mb_mbaff_edgecv( h, img_cb, 2*uvlinesize, bS , 2, bqp[0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
691 filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
692 filter_mb_mbaff_edgecv( h, img_cr, 2*uvlinesize, bS , 2, rqp[0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
693 filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
694 } |
10854 | 695 } |
696 | |
697 #if CONFIG_SMALL | |
698 for( dir = 0; dir < 2; dir++ ) | |
699 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir); | |
700 #else | |
701 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0); | |
702 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1); | |
703 #endif | |
704 } |