Mercurial > libavcodec.hg
annotate h264_loopfilter.c @ 11032:01bd040f8607 libavcodec
Unroll main loop so the edge==0 case is seperate.
This allows many things to be simplified away.
h264 decoder is overall 1% faster with a mbaff sample and
0.1% slower with the cathedral sample, probably because the slow loop
filter code must be loaded into the code cache for each first MB of each
row but isnt used for the following MBs.
author | michael |
---|---|
date | Thu, 28 Jan 2010 01:24:25 +0000 |
parents | f5678fb91140 |
children | b5577677b97d |
rev | line source |
---|---|
10854 | 1 /* |
2 * H.26L/H.264/AVC/JVT/14496-10/... loop filter | |
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 /** | |
23 * @file libavcodec/h264_loopfilter.c | |
24 * H.264 / AVC / MPEG4 part10 loop filter. | |
25 * @author Michael Niedermayer <michaelni@gmx.at> | |
26 */ | |
27 | |
28 #include "internal.h" | |
29 #include "dsputil.h" | |
30 #include "avcodec.h" | |
31 #include "mpegvideo.h" | |
32 #include "h264.h" | |
33 #include "mathops.h" | |
34 #include "rectangle.h" | |
35 | |
36 //#undef NDEBUG | |
37 #include <assert.h> | |
38 | |
39 /* Deblocking filter (p153) */ | |
40 static const uint8_t alpha_table[52*3] = { | |
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
46 0, 0, 0, 0, 0, 0, 4, 4, 5, 6, | |
47 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, | |
48 25, 28, 32, 36, 40, 45, 50, 56, 63, 71, | |
49 80, 90,101,113,127,144,162,182,203,226, | |
50 255,255, | |
51 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
52 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
53 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
54 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
55 }; | |
56 static const uint8_t beta_table[52*3] = { | |
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
62 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, | |
63 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, | |
64 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, | |
65 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, | |
66 18, 18, | |
67 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
68 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
69 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
70 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
71 }; | |
72 static const uint8_t tc0_table[52*3][4] = { | |
73 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
74 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
75 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
76 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
77 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
78 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
79 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
80 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
81 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
82 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
83 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
84 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 }, | |
85 {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 }, | |
86 {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, | |
87 {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 }, | |
88 {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 }, | |
89 {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 }, | |
90 {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 }, | |
91 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
92 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
93 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
94 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
95 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
96 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
97 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
98 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
99 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
100 }; | |
101 | |
10960 | 102 static void av_noinline filter_mb_edgev( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h) { |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
103 const unsigned int index_a = qp + h->slice_alpha_c0_offset; |
10960 | 104 const int alpha = alpha_table[index_a]; |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
105 const int beta = beta_table[qp + h->slice_beta_offset]; |
10854 | 106 if (alpha ==0 || beta == 0) return; |
107 | |
108 if( bS[0] < 4 ) { | |
109 int8_t tc[4]; | |
10960 | 110 tc[0] = tc0_table[index_a][bS[0]]; |
111 tc[1] = tc0_table[index_a][bS[1]]; | |
112 tc[2] = tc0_table[index_a][bS[2]]; | |
113 tc[3] = tc0_table[index_a][bS[3]]; | |
10854 | 114 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); |
115 } else { | |
116 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta); | |
117 } | |
118 } | |
10960 | 119 static void av_noinline filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
120 const unsigned int index_a = qp + h->slice_alpha_c0_offset; |
10960 | 121 const int alpha = alpha_table[index_a]; |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
122 const int beta = beta_table[qp + h->slice_beta_offset]; |
10854 | 123 if (alpha ==0 || beta == 0) return; |
124 | |
125 if( bS[0] < 4 ) { | |
126 int8_t tc[4]; | |
10960 | 127 tc[0] = tc0_table[index_a][bS[0]]+1; |
128 tc[1] = tc0_table[index_a][bS[1]]+1; | |
129 tc[2] = tc0_table[index_a][bS[2]]+1; | |
130 tc[3] = tc0_table[index_a][bS[3]]+1; | |
10854 | 131 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); |
132 } else { | |
133 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); | |
134 } | |
135 } | |
136 | |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
137 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) { |
10854 | 138 int i; |
10970 | 139 int index_a = qp + h->slice_alpha_c0_offset; |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
140 int alpha = alpha_table[index_a]; |
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
141 int beta = beta_table[qp + h->slice_beta_offset]; |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
142 for( i = 0; i < 8; i++, pix += stride) { |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
143 const int bS_index = (i >> 1) * bsi; |
10854 | 144 |
145 if( bS[bS_index] == 0 ) { | |
146 continue; | |
147 } | |
148 | |
149 if( bS[bS_index] < 4 ) { | |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
150 const int tc0 = tc0_table[index_a][bS[bS_index]]; |
10854 | 151 const int p0 = pix[-1]; |
152 const int p1 = pix[-2]; | |
153 const int p2 = pix[-3]; | |
154 const int q0 = pix[0]; | |
155 const int q1 = pix[1]; | |
156 const int q2 = pix[2]; | |
157 | |
158 if( FFABS( p0 - q0 ) < alpha && | |
159 FFABS( p1 - p0 ) < beta && | |
160 FFABS( q1 - q0 ) < beta ) { | |
161 int tc = tc0; | |
162 int i_delta; | |
163 | |
164 if( FFABS( p2 - p0 ) < beta ) { | |
10970 | 165 if(tc0) |
10854 | 166 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 ); |
167 tc++; | |
168 } | |
169 if( FFABS( q2 - q0 ) < beta ) { | |
10970 | 170 if(tc0) |
10854 | 171 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 ); |
172 tc++; | |
173 } | |
174 | |
175 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); | |
176 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ | |
177 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ | |
178 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); | |
179 } | |
180 }else{ | |
181 const int p0 = pix[-1]; | |
182 const int p1 = pix[-2]; | |
183 const int p2 = pix[-3]; | |
184 | |
185 const int q0 = pix[0]; | |
186 const int q1 = pix[1]; | |
187 const int q2 = pix[2]; | |
188 | |
189 if( FFABS( p0 - q0 ) < alpha && | |
190 FFABS( p1 - p0 ) < beta && | |
191 FFABS( q1 - q0 ) < beta ) { | |
192 | |
193 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ | |
194 if( FFABS( p2 - p0 ) < beta) | |
195 { | |
196 const int p3 = pix[-4]; | |
197 /* p0', p1', p2' */ | |
198 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; | |
199 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; | |
200 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; | |
201 } else { | |
202 /* p0' */ | |
203 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | |
204 } | |
205 if( FFABS( q2 - q0 ) < beta) | |
206 { | |
207 const int q3 = pix[3]; | |
208 /* q0', q1', q2' */ | |
209 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; | |
210 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; | |
211 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; | |
212 } else { | |
213 /* q0' */ | |
214 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | |
215 } | |
216 }else{ | |
217 /* p0', q0' */ | |
218 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | |
219 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | |
220 } | |
221 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); | |
222 } | |
223 } | |
224 } | |
225 } | |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
226 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) { |
10854 | 227 int i; |
10970 | 228 int index_a = qp + h->slice_alpha_c0_offset; |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
229 int alpha = alpha_table[index_a]; |
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
230 int beta = beta_table[qp + h->slice_beta_offset]; |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
231 for( i = 0; i < 4; i++, pix += stride) { |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
232 const int bS_index = i*bsi; |
10854 | 233 |
234 if( bS[bS_index] == 0 ) { | |
235 continue; | |
236 } | |
237 | |
238 if( bS[bS_index] < 4 ) { | |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
239 const int tc = tc0_table[index_a][bS[bS_index]] + 1; |
10854 | 240 const int p0 = pix[-1]; |
241 const int p1 = pix[-2]; | |
242 const int q0 = pix[0]; | |
243 const int q1 = pix[1]; | |
244 | |
245 if( FFABS( p0 - q0 ) < alpha && | |
246 FFABS( p1 - p0 ) < beta && | |
247 FFABS( q1 - q0 ) < beta ) { | |
248 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); | |
249 | |
250 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ | |
251 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ | |
252 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); | |
253 } | |
254 }else{ | |
255 const int p0 = pix[-1]; | |
256 const int p1 = pix[-2]; | |
257 const int q0 = pix[0]; | |
258 const int q1 = pix[1]; | |
259 | |
260 if( FFABS( p0 - q0 ) < alpha && | |
261 FFABS( p1 - p0 ) < beta && | |
262 FFABS( q1 - q0 ) < beta ) { | |
263 | |
264 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ | |
265 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ | |
266 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); | |
267 } | |
268 } | |
269 } | |
270 } | |
271 | |
10960 | 272 static void av_noinline filter_mb_edgeh( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
273 const unsigned int index_a = qp + h->slice_alpha_c0_offset; |
10960 | 274 const int alpha = alpha_table[index_a]; |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
275 const int beta = beta_table[qp + h->slice_beta_offset]; |
10854 | 276 if (alpha ==0 || beta == 0) return; |
277 | |
278 if( bS[0] < 4 ) { | |
279 int8_t tc[4]; | |
10960 | 280 tc[0] = tc0_table[index_a][bS[0]]; |
281 tc[1] = tc0_table[index_a][bS[1]]; | |
282 tc[2] = tc0_table[index_a][bS[2]]; | |
283 tc[3] = tc0_table[index_a][bS[3]]; | |
10854 | 284 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); |
285 } else { | |
286 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta); | |
287 } | |
288 } | |
289 | |
10960 | 290 static void av_noinline filter_mb_edgech( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
291 const unsigned int index_a = qp + h->slice_alpha_c0_offset; |
10960 | 292 const int alpha = alpha_table[index_a]; |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
293 const int beta = beta_table[qp + h->slice_beta_offset]; |
10854 | 294 if (alpha ==0 || beta == 0) return; |
295 | |
296 if( bS[0] < 4 ) { | |
297 int8_t tc[4]; | |
10960 | 298 tc[0] = tc0_table[index_a][bS[0]]+1; |
299 tc[1] = tc0_table[index_a][bS[1]]+1; | |
300 tc[2] = tc0_table[index_a][bS[2]]+1; | |
301 tc[3] = tc0_table[index_a][bS[3]]+1; | |
10854 | 302 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); |
303 } else { | |
304 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); | |
305 } | |
306 } | |
307 | |
308 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { | |
309 MpegEncContext * const s = &h->s; | |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
310 int mb_xy; |
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
311 int mb_type; |
10854 | 312 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; |
313 | |
314 mb_xy = h->mb_xy; | |
315 | |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
316 if(!h->top_type || !h->left_type[0] || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) { |
10854 | 317 ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); |
318 return; | |
319 } | |
320 assert(!FRAME_MBAFF); | |
321 | |
322 mb_type = s->current_picture.mb_type[mb_xy]; | |
323 qp = s->current_picture.qscale_table[mb_xy]; | |
324 qp0 = s->current_picture.qscale_table[mb_xy-1]; | |
325 qp1 = s->current_picture.qscale_table[h->top_mb_xy]; | |
326 qpc = get_chroma_qp( h, 0, qp ); | |
327 qpc0 = get_chroma_qp( h, 0, qp0 ); | |
328 qpc1 = get_chroma_qp( h, 0, qp1 ); | |
329 qp0 = (qp + qp0 + 1) >> 1; | |
330 qp1 = (qp + qp1 + 1) >> 1; | |
331 qpc0 = (qpc + qpc0 + 1) >> 1; | |
332 qpc1 = (qpc + qpc1 + 1) >> 1; | |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
333 qp_thresh = 15+52 - h->slice_alpha_c0_offset; |
10854 | 334 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh && |
335 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh) | |
336 return; | |
337 | |
338 if( IS_INTRA(mb_type) ) { | |
339 int16_t bS4[4] = {4,4,4,4}; | |
340 int16_t bS3[4] = {3,3,3,3}; | |
341 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4; | |
342 if( IS_8x8DCT(mb_type) ) { | |
10960 | 343 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h); |
344 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); | |
345 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); | |
346 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); | |
10854 | 347 } else { |
10960 | 348 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h); |
349 filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h); | |
350 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); | |
351 filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h); | |
352 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); | |
353 filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h); | |
354 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); | |
355 filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h); | |
10854 | 356 } |
10960 | 357 filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h); |
358 filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h); | |
359 filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h); | |
360 filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h); | |
361 filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); | |
362 filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h); | |
363 filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); | |
364 filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); | |
10854 | 365 return; |
366 } else { | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10960
diff
changeset
|
367 DECLARE_ALIGNED_8(int16_t, bS)[2][4][4]; |
10854 | 368 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS; |
369 int edges; | |
370 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) { | |
371 edges = 4; | |
372 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL; | |
373 } else { | |
374 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : | |
375 (mb_type & MB_TYPE_16x8) ? 1 : 0; | |
376 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) | |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
377 && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) |
10854 | 378 ? 3 : 0; |
379 int step = IS_8x8DCT(mb_type) ? 2 : 1; | |
380 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; | |
381 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, | |
10984
541acd292c48
Remove all uses of slice_type* from the loop filter, also remove its
michael
parents:
10979
diff
changeset
|
382 h->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE); |
10854 | 383 } |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
384 if( IS_INTRA(h->left_type[0]) ) |
10854 | 385 bSv[0][0] = 0x0004000400040004ULL; |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
386 if( IS_INTRA(h->top_type) ) |
10854 | 387 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL; |
388 | |
389 #define FILTER(hv,dir,edge)\ | |
390 if(bSv[dir][edge]) {\ | |
10960 | 391 filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\ |
10854 | 392 if(!(edge&1)) {\ |
10960 | 393 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ |
394 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ | |
10854 | 395 }\ |
396 } | |
397 if( edges == 1 ) { | |
398 FILTER(v,0,0); | |
399 FILTER(h,1,0); | |
400 } else if( IS_8x8DCT(mb_type) ) { | |
401 FILTER(v,0,0); | |
402 FILTER(v,0,2); | |
403 FILTER(h,1,0); | |
404 FILTER(h,1,2); | |
405 } else { | |
406 FILTER(v,0,0); | |
407 FILTER(v,0,1); | |
408 FILTER(v,0,2); | |
409 FILTER(v,0,3); | |
410 FILTER(h,1,0); | |
411 FILTER(h,1,1); | |
412 FILTER(h,1,2); | |
413 FILTER(h,1,3); | |
414 } | |
415 #undef FILTER | |
416 } | |
417 } | |
418 | |
419 | |
420 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) { | |
421 MpegEncContext * const s = &h->s; | |
422 int edge; | |
423 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; | |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
424 const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type; |
10854 | 425 |
426 // how often to recheck mv-based bS when iterating between edges | |
10958
304db572a69a
Make calculation of mask_edge free of branches, faster of course but probably
michael
parents:
10950
diff
changeset
|
427 static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1}, |
304db572a69a
Make calculation of mask_edge free of branches, faster of course but probably
michael
parents:
10950
diff
changeset
|
428 {0,3,1,1,3,3,3,3}}; |
304db572a69a
Make calculation of mask_edge free of branches, faster of course but probably
michael
parents:
10950
diff
changeset
|
429 const int mask_edge = mask_edge_tab[dir][(mb_type>>3)&7]; |
10973
214adf5e303b
Set edges based on cbp and mv partitioning, not just skiped MBs.
michael
parents:
10970
diff
changeset
|
430 const int edges = mask_edge== 3 && !(h->cbp&15) ? 1 : 4; |
214adf5e303b
Set edges based on cbp and mv partitioning, not just skiped MBs.
michael
parents:
10970
diff
changeset
|
431 |
10854 | 432 // how often to recheck mv-based bS when iterating along each edge |
433 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)); | |
10942 | 434 int start = h->slice_table[mbm_xy] == 0xFFFF |
435 || first_vertical_edge_done | |
436 || (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_num); | |
10854 | 437 |
438 | |
439 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0 | |
10946 | 440 && IS_INTERLACED(mbm_type&~mb_type) |
10854 | 441 ) { |
442 // This is a special case in the norm where the filtering must | |
443 // be done twice (one each of the field) even if we are in a | |
444 // frame macroblock. | |
445 // | |
446 unsigned int tmp_linesize = 2 * linesize; | |
447 unsigned int tmp_uvlinesize = 2 * uvlinesize; | |
448 int mbn_xy = mb_xy - 2 * s->mb_stride; | |
10949
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
449 int j; |
10854 | 450 |
451 for(j=0; j<2; j++, mbn_xy += s->mb_stride){ | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10960
diff
changeset
|
452 DECLARE_ALIGNED_8(int16_t, bS)[4]; |
10949
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
453 int qp; |
10946 | 454 if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) { |
10947 | 455 *(uint64_t*)bS= 0x0003000300030003ULL; |
10854 | 456 } else { |
10949
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
457 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 4+3*8; |
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
458 int i; |
10854 | 459 for( i = 0; i < 4; i++ ) { |
10949
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
460 bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]); |
10854 | 461 } |
462 } | |
463 // Do not use s->qscale as luma quantizer because it has not the same | |
464 // value in IPCM macroblocks. | |
465 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; | |
466 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); | |
467 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } | |
10960 | 468 filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h ); |
469 filter_mb_edgech( &img_cb[j*uvlinesize], tmp_uvlinesize, bS, | |
470 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h); | |
471 filter_mb_edgech( &img_cr[j*uvlinesize], tmp_uvlinesize, bS, | |
472 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h); | |
10854 | 473 } |
474 | |
475 start = 1; | |
476 } | |
477 | |
478 /* Calculate bS */ | |
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
479 if(start==0) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
480 DECLARE_ALIGNED_8(int16_t, bS)[4]; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
481 int qp; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
482 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
483 if( IS_INTRA(mb_type|mbm_type)) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
484 *(uint64_t*)bS= 0x0003000300030003ULL; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
485 if ( (!IS_INTERLACED(mb_type|mbm_type)) |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
486 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0)) |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
487 ) |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
488 *(uint64_t*)bS= 0x0004000400040004ULL; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
489 } else { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
490 int i, l; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
491 int mv_done; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
492 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
493 if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbm_type)) { //FIXME not posible left |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
494 *(uint64_t*)bS= 0x0001000100010001ULL; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
495 mv_done = 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
496 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
497 else if( mask_par0 && ((mbm_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
498 int b_idx= 8 + 4; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
499 int bn_idx= b_idx - (dir ? 8:1); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
500 int v = 0; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
501 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
502 for( l = 0; !v && l < h->list_count; l++ ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
503 v |= h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
504 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
505 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
506 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
507 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
508 if(h->list_count==2 && v){ |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
509 v=0; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
510 for( l = 0; !v && l < 2; l++ ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
511 int ln= 1-l; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
512 v |= h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
513 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
514 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
515 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
516 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
517 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
518 bS[0] = bS[1] = bS[2] = bS[3] = v; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
519 mv_done = 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
520 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
521 else |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
522 mv_done = 0; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
523 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
524 for( i = 0; i < 4; i++ ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
525 int x = dir == 0 ? 0 : i; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
526 int y = dir == 0 ? i : 0; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
527 int b_idx= 8 + 4 + x + 8*y; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
528 int bn_idx= b_idx - (dir ? 8:1); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
529 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
530 if( h->non_zero_count_cache[b_idx] | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
531 h->non_zero_count_cache[bn_idx] ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
532 bS[i] = 2; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
533 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
534 else if(!mv_done) |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
535 { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
536 bS[i] = 0; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
537 for( l = 0; l < h->list_count; l++ ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
538 if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
539 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
540 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
541 bS[i] = 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
542 break; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
543 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
544 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
545 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
546 if(h->list_count == 2 && bS[i]){ |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
547 bS[i] = 0; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
548 for( l = 0; l < 2; l++ ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
549 int ln= 1-l; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
550 if( h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
551 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
552 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
553 bS[i] = 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
554 break; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
555 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
556 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
557 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
558 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
559 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
560 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
561 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
562 /* Filter edge */ |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
563 // Do not use s->qscale as luma quantizer because it has not the same |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
564 // value in IPCM macroblocks. |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
565 if(bS[0]+bS[1]+bS[2]+bS[3]){ |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
566 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbm_xy] + 1 ) >> 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
567 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
568 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
569 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
570 if( dir == 0 ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
571 filter_mb_edgev( &img_y[0], linesize, bS, qp, h ); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
572 { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
573 int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
574 filter_mb_edgecv( &img_cb[0], uvlinesize, bS, qp, h); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
575 if(h->pps.chroma_qp_diff) |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
576 qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
577 filter_mb_edgecv( &img_cr[0], uvlinesize, bS, qp, h); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
578 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
579 } else { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
580 filter_mb_edgeh( &img_y[0], linesize, bS, qp, h ); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
581 { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
582 int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
583 filter_mb_edgech( &img_cb[0], uvlinesize, bS, qp, h); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
584 if(h->pps.chroma_qp_diff) |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
585 qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
586 filter_mb_edgech( &img_cr[0], uvlinesize, bS, qp, h); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
587 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
588 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
589 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
590 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
591 /* Calculate bS */ |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
592 for( edge = 1; edge < edges; edge++ ) { |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10960
diff
changeset
|
593 DECLARE_ALIGNED_8(int16_t, bS)[4]; |
10854 | 594 int qp; |
595 | |
10969
2e8fbfc278d5
Optmize 8x8dct check used to skip some borders in the loop filter.
michael
parents:
10961
diff
changeset
|
596 if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type) |
10854 | 597 continue; |
598 | |
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
599 if( IS_INTRA(mb_type)) { |
10945
0d93bbc17950
Simplify and optimize intra code in h264_loopfilter.c
michael
parents:
10942
diff
changeset
|
600 *(uint64_t*)bS= 0x0003000300030003ULL; |
10854 | 601 } else { |
602 int i, l; | |
603 int mv_done; | |
604 | |
605 if( edge & mask_edge ) { | |
10947 | 606 *(uint64_t*)bS= 0; |
10854 | 607 mv_done = 1; |
608 } | |
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
609 else if( mask_par0 ) { |
10854 | 610 int b_idx= 8 + 4 + edge * (dir ? 8:1); |
611 int bn_idx= b_idx - (dir ? 8:1); | |
612 int v = 0; | |
613 | |
10984
541acd292c48
Remove all uses of slice_type* from the loop filter, also remove its
michael
parents:
10979
diff
changeset
|
614 for( l = 0; !v && l < h->list_count; l++ ) { |
10913
497929e9d912
Perform reference remapping at fill_cache() time instead of in the
michael
parents:
10910
diff
changeset
|
615 v |= h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] | |
10901
2a5c3d89201d
Another microopt, 4 cpu cycles for avoidance of FFABS().
michael
parents:
10899
diff
changeset
|
616 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | |
10854 | 617 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit; |
618 } | |
619 | |
10984
541acd292c48
Remove all uses of slice_type* from the loop filter, also remove its
michael
parents:
10979
diff
changeset
|
620 if(h->list_count==2 && v){ |
10854 | 621 v=0; |
622 for( l = 0; !v && l < 2; l++ ) { | |
623 int ln= 1-l; | |
10913
497929e9d912
Perform reference remapping at fill_cache() time instead of in the
michael
parents:
10910
diff
changeset
|
624 v |= h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] | |
10901
2a5c3d89201d
Another microopt, 4 cpu cycles for avoidance of FFABS().
michael
parents:
10899
diff
changeset
|
625 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | |
10854 | 626 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit; |
627 } | |
628 } | |
629 | |
630 bS[0] = bS[1] = bS[2] = bS[3] = v; | |
631 mv_done = 1; | |
632 } | |
633 else | |
634 mv_done = 0; | |
635 | |
636 for( i = 0; i < 4; i++ ) { | |
637 int x = dir == 0 ? edge : i; | |
638 int y = dir == 0 ? i : edge; | |
639 int b_idx= 8 + 4 + x + 8*y; | |
640 int bn_idx= b_idx - (dir ? 8:1); | |
641 | |
642 if( h->non_zero_count_cache[b_idx] | | |
643 h->non_zero_count_cache[bn_idx] ) { | |
644 bS[i] = 2; | |
645 } | |
646 else if(!mv_done) | |
647 { | |
648 bS[i] = 0; | |
10984
541acd292c48
Remove all uses of slice_type* from the loop filter, also remove its
michael
parents:
10979
diff
changeset
|
649 for( l = 0; l < h->list_count; l++ ) { |
10913
497929e9d912
Perform reference remapping at fill_cache() time instead of in the
michael
parents:
10910
diff
changeset
|
650 if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] | |
10902
1e41e6ab9a18
Apply last 2 optimizations to similar code i forgot.
michael
parents:
10901
diff
changeset
|
651 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | |
10854 | 652 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) { |
653 bS[i] = 1; | |
654 break; | |
655 } | |
656 } | |
657 | |
10984
541acd292c48
Remove all uses of slice_type* from the loop filter, also remove its
michael
parents:
10979
diff
changeset
|
658 if(h->list_count == 2 && bS[i]){ |
10854 | 659 bS[i] = 0; |
660 for( l = 0; l < 2; l++ ) { | |
661 int ln= 1-l; | |
10913
497929e9d912
Perform reference remapping at fill_cache() time instead of in the
michael
parents:
10910
diff
changeset
|
662 if( h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] | |
10902
1e41e6ab9a18
Apply last 2 optimizations to similar code i forgot.
michael
parents:
10901
diff
changeset
|
663 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | |
10854 | 664 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) { |
665 bS[i] = 1; | |
666 break; | |
667 } | |
668 } | |
669 } | |
670 } | |
671 } | |
672 | |
673 if(bS[0]+bS[1]+bS[2]+bS[3] == 0) | |
674 continue; | |
675 } | |
676 | |
677 /* Filter edge */ | |
678 // Do not use s->qscale as luma quantizer because it has not the same | |
679 // value in IPCM macroblocks. | |
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
680 qp = s->current_picture.qscale_table[mb_xy]; |
10906 | 681 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]); |
10854 | 682 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); |
10904 | 683 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } |
10854 | 684 if( dir == 0 ) { |
10960 | 685 filter_mb_edgev( &img_y[4*edge], linesize, bS, qp, h ); |
10854 | 686 if( (edge&1) == 0 ) { |
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
687 filter_mb_edgecv( &img_cb[2*edge], uvlinesize, bS, h->chroma_qp[0], h); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
688 filter_mb_edgecv( &img_cr[2*edge], uvlinesize, bS, h->chroma_qp[1], h); |
10854 | 689 } |
690 } else { | |
10960 | 691 filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h ); |
10854 | 692 if( (edge&1) == 0 ) { |
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
693 filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
694 filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h); |
10854 | 695 } |
696 } | |
697 } | |
698 } | |
699 | |
700 void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { | |
701 MpegEncContext * const s = &h->s; | |
702 const int mb_xy= mb_x + mb_y*s->mb_stride; | |
703 const int mb_type = s->current_picture.mb_type[mb_xy]; | |
704 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4; | |
705 int first_vertical_edge_done = 0; | |
706 av_unused int dir; | |
10906 | 707 int list; |
10854 | 708 |
709 if (FRAME_MBAFF | |
710 // and current and left pair do not have the same interlaced type | |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
711 && IS_INTERLACED(mb_type^h->left_type[0]) |
11026 | 712 // and left mb is in available to us |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
713 && h->left_type[0]) { |
10854 | 714 /* First vertical edge is different in MBAFF frames |
715 * There are 8 different bS to compute and 2 different Qp | |
716 */ | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10960
diff
changeset
|
717 DECLARE_ALIGNED_8(int16_t, bS)[8]; |
10854 | 718 int qp[2]; |
719 int bqp[2]; | |
720 int rqp[2]; | |
721 int mb_qp, mbn0_qp, mbn1_qp; | |
722 int i; | |
723 first_vertical_edge_done = 1; | |
724 | |
725 if( IS_INTRA(mb_type) ) | |
10947 | 726 *(uint64_t*)&bS[0]= |
727 *(uint64_t*)&bS[4]= 0x0004000400040004ULL; | |
10854 | 728 else { |
11025
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
729 static const uint8_t offset[2][2][8]={ |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
730 { |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
731 {7+8*0, 7+8*0, 7+8*0, 7+8*0, 7+8*1, 7+8*1, 7+8*1, 7+8*1}, |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
732 {7+8*2, 7+8*2, 7+8*2, 7+8*2, 7+8*3, 7+8*3, 7+8*3, 7+8*3}, |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
733 },{ |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
734 {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3}, |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
735 {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3}, |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
736 } |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
737 }; |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
738 const uint8_t *off= offset[MB_FIELD][mb_y&1]; |
10854 | 739 for( i = 0; i < 8; i++ ) { |
11021
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
740 int j= MB_FIELD ? i>>2 : i&1; |
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
741 int mbn_xy = h->left_mb_xy[j]; |
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
742 int mbn_type= h->left_type[j]; |
10854 | 743 |
11021
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
744 if( IS_INTRA( mbn_type ) ) |
10854 | 745 bS[i] = 4; |
11021
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
746 else{ |
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
747 bS[i] = 1 + !!(h->non_zero_count_cache[12+8*(i>>1)] | |
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
748 ((!h->pps.cabac && IS_8x8DCT(mbn_type)) ? |
10854 | 749 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2)) |
750 : | |
11025
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
751 h->non_zero_count[mbn_xy][ off[i] ])); |
11021
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
752 } |
10854 | 753 } |
754 } | |
755 | |
756 mb_qp = s->current_picture.qscale_table[mb_xy]; | |
11015
d844c58b985a
Use left_mb_xy from fill_caches instead of recalculating it.
michael
parents:
11013
diff
changeset
|
757 mbn0_qp = s->current_picture.qscale_table[h->left_mb_xy[0]]; |
d844c58b985a
Use left_mb_xy from fill_caches instead of recalculating it.
michael
parents:
11013
diff
changeset
|
758 mbn1_qp = s->current_picture.qscale_table[h->left_mb_xy[1]]; |
10854 | 759 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1; |
760 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) + | |
761 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1; | |
762 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) + | |
763 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1; | |
764 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1; | |
765 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) + | |
766 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1; | |
767 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) + | |
768 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1; | |
769 | |
770 /* Filter edge */ | |
771 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize); | |
772 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } | |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
773 if(MB_FIELD){ |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
774 filter_mb_mbaff_edgev ( h, img_y , linesize, bS , 1, qp [0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
775 filter_mb_mbaff_edgev ( h, img_y + 8* linesize, linesize, bS+4, 1, qp [1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
776 filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
777 filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
778 filter_mb_mbaff_edgecv( h, img_cr, uvlinesize, bS , 1, rqp[0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
779 filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
780 }else{ |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
781 filter_mb_mbaff_edgev ( h, img_y , 2* linesize, bS , 2, qp [0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
782 filter_mb_mbaff_edgev ( h, img_y + linesize, 2* linesize, bS+1, 2, qp [1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
783 filter_mb_mbaff_edgecv( h, img_cb, 2*uvlinesize, bS , 2, bqp[0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
784 filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
785 filter_mb_mbaff_edgecv( h, img_cr, 2*uvlinesize, bS , 2, rqp[0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
786 filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
787 } |
10854 | 788 } |
789 | |
790 #if CONFIG_SMALL | |
791 for( dir = 0; dir < 2; dir++ ) | |
792 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir); | |
793 #else | |
794 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0); | |
795 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1); | |
796 #endif | |
797 } |