Mercurial > libavcodec.hg
annotate h264_loopfilter.c @ 11034:fd5921186064 libavcodec
Make the fast loop filter path work with unavailable left MBs.
This prevents the issue with having to switch between slow and
fast code paths in each row.
0.5% faster loopfilter for cathedral
author | michael |
---|---|
date | Thu, 28 Jan 2010 02:15:25 +0000 |
parents | b5577677b97d |
children | 4debec8a15fa |
rev | line source |
---|---|
10854 | 1 /* |
2 * H.26L/H.264/AVC/JVT/14496-10/... loop filter | |
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 /** | |
23 * @file libavcodec/h264_loopfilter.c | |
24 * H.264 / AVC / MPEG4 part10 loop filter. | |
25 * @author Michael Niedermayer <michaelni@gmx.at> | |
26 */ | |
27 | |
28 #include "internal.h" | |
29 #include "dsputil.h" | |
30 #include "avcodec.h" | |
31 #include "mpegvideo.h" | |
32 #include "h264.h" | |
33 #include "mathops.h" | |
34 #include "rectangle.h" | |
35 | |
36 //#undef NDEBUG | |
37 #include <assert.h> | |
38 | |
39 /* Deblocking filter (p153) */ | |
40 static const uint8_t alpha_table[52*3] = { | |
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
46 0, 0, 0, 0, 0, 0, 4, 4, 5, 6, | |
47 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, | |
48 25, 28, 32, 36, 40, 45, 50, 56, 63, 71, | |
49 80, 90,101,113,127,144,162,182,203,226, | |
50 255,255, | |
51 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
52 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
53 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
54 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
55 }; | |
56 static const uint8_t beta_table[52*3] = { | |
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
62 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, | |
63 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, | |
64 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, | |
65 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, | |
66 18, 18, | |
67 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
68 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
69 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
70 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
71 }; | |
72 static const uint8_t tc0_table[52*3][4] = { | |
73 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
74 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
75 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
76 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
77 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
78 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
79 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
80 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
81 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
82 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
83 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
84 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 }, | |
85 {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 }, | |
86 {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, | |
87 {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 }, | |
88 {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 }, | |
89 {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 }, | |
90 {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 }, | |
91 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
92 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
93 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
94 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
95 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
96 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
97 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
98 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
99 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
100 }; | |
101 | |
10960 | 102 static void av_noinline filter_mb_edgev( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h) { |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
103 const unsigned int index_a = qp + h->slice_alpha_c0_offset; |
10960 | 104 const int alpha = alpha_table[index_a]; |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
105 const int beta = beta_table[qp + h->slice_beta_offset]; |
10854 | 106 if (alpha ==0 || beta == 0) return; |
107 | |
108 if( bS[0] < 4 ) { | |
109 int8_t tc[4]; | |
10960 | 110 tc[0] = tc0_table[index_a][bS[0]]; |
111 tc[1] = tc0_table[index_a][bS[1]]; | |
112 tc[2] = tc0_table[index_a][bS[2]]; | |
113 tc[3] = tc0_table[index_a][bS[3]]; | |
10854 | 114 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); |
115 } else { | |
116 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta); | |
117 } | |
118 } | |
10960 | 119 static void av_noinline filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
120 const unsigned int index_a = qp + h->slice_alpha_c0_offset; |
10960 | 121 const int alpha = alpha_table[index_a]; |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
122 const int beta = beta_table[qp + h->slice_beta_offset]; |
10854 | 123 if (alpha ==0 || beta == 0) return; |
124 | |
125 if( bS[0] < 4 ) { | |
126 int8_t tc[4]; | |
10960 | 127 tc[0] = tc0_table[index_a][bS[0]]+1; |
128 tc[1] = tc0_table[index_a][bS[1]]+1; | |
129 tc[2] = tc0_table[index_a][bS[2]]+1; | |
130 tc[3] = tc0_table[index_a][bS[3]]+1; | |
10854 | 131 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); |
132 } else { | |
133 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); | |
134 } | |
135 } | |
136 | |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
137 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) { |
10854 | 138 int i; |
10970 | 139 int index_a = qp + h->slice_alpha_c0_offset; |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
140 int alpha = alpha_table[index_a]; |
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
141 int beta = beta_table[qp + h->slice_beta_offset]; |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
142 for( i = 0; i < 8; i++, pix += stride) { |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
143 const int bS_index = (i >> 1) * bsi; |
10854 | 144 |
145 if( bS[bS_index] == 0 ) { | |
146 continue; | |
147 } | |
148 | |
149 if( bS[bS_index] < 4 ) { | |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
150 const int tc0 = tc0_table[index_a][bS[bS_index]]; |
10854 | 151 const int p0 = pix[-1]; |
152 const int p1 = pix[-2]; | |
153 const int p2 = pix[-3]; | |
154 const int q0 = pix[0]; | |
155 const int q1 = pix[1]; | |
156 const int q2 = pix[2]; | |
157 | |
158 if( FFABS( p0 - q0 ) < alpha && | |
159 FFABS( p1 - p0 ) < beta && | |
160 FFABS( q1 - q0 ) < beta ) { | |
161 int tc = tc0; | |
162 int i_delta; | |
163 | |
164 if( FFABS( p2 - p0 ) < beta ) { | |
10970 | 165 if(tc0) |
10854 | 166 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 ); |
167 tc++; | |
168 } | |
169 if( FFABS( q2 - q0 ) < beta ) { | |
10970 | 170 if(tc0) |
10854 | 171 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 ); |
172 tc++; | |
173 } | |
174 | |
175 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); | |
176 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ | |
177 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ | |
178 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); | |
179 } | |
180 }else{ | |
181 const int p0 = pix[-1]; | |
182 const int p1 = pix[-2]; | |
183 const int p2 = pix[-3]; | |
184 | |
185 const int q0 = pix[0]; | |
186 const int q1 = pix[1]; | |
187 const int q2 = pix[2]; | |
188 | |
189 if( FFABS( p0 - q0 ) < alpha && | |
190 FFABS( p1 - p0 ) < beta && | |
191 FFABS( q1 - q0 ) < beta ) { | |
192 | |
193 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ | |
194 if( FFABS( p2 - p0 ) < beta) | |
195 { | |
196 const int p3 = pix[-4]; | |
197 /* p0', p1', p2' */ | |
198 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; | |
199 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; | |
200 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; | |
201 } else { | |
202 /* p0' */ | |
203 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | |
204 } | |
205 if( FFABS( q2 - q0 ) < beta) | |
206 { | |
207 const int q3 = pix[3]; | |
208 /* q0', q1', q2' */ | |
209 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; | |
210 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; | |
211 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; | |
212 } else { | |
213 /* q0' */ | |
214 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | |
215 } | |
216 }else{ | |
217 /* p0', q0' */ | |
218 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | |
219 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | |
220 } | |
221 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); | |
222 } | |
223 } | |
224 } | |
225 } | |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
226 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) { |
10854 | 227 int i; |
10970 | 228 int index_a = qp + h->slice_alpha_c0_offset; |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
229 int alpha = alpha_table[index_a]; |
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
230 int beta = beta_table[qp + h->slice_beta_offset]; |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
231 for( i = 0; i < 4; i++, pix += stride) { |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
232 const int bS_index = i*bsi; |
10854 | 233 |
234 if( bS[bS_index] == 0 ) { | |
235 continue; | |
236 } | |
237 | |
238 if( bS[bS_index] < 4 ) { | |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
239 const int tc = tc0_table[index_a][bS[bS_index]] + 1; |
10854 | 240 const int p0 = pix[-1]; |
241 const int p1 = pix[-2]; | |
242 const int q0 = pix[0]; | |
243 const int q1 = pix[1]; | |
244 | |
245 if( FFABS( p0 - q0 ) < alpha && | |
246 FFABS( p1 - p0 ) < beta && | |
247 FFABS( q1 - q0 ) < beta ) { | |
248 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); | |
249 | |
250 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ | |
251 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ | |
252 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); | |
253 } | |
254 }else{ | |
255 const int p0 = pix[-1]; | |
256 const int p1 = pix[-2]; | |
257 const int q0 = pix[0]; | |
258 const int q1 = pix[1]; | |
259 | |
260 if( FFABS( p0 - q0 ) < alpha && | |
261 FFABS( p1 - p0 ) < beta && | |
262 FFABS( q1 - q0 ) < beta ) { | |
263 | |
264 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ | |
265 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ | |
266 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); | |
267 } | |
268 } | |
269 } | |
270 } | |
271 | |
10960 | 272 static void av_noinline filter_mb_edgeh( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
273 const unsigned int index_a = qp + h->slice_alpha_c0_offset; |
10960 | 274 const int alpha = alpha_table[index_a]; |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
275 const int beta = beta_table[qp + h->slice_beta_offset]; |
10854 | 276 if (alpha ==0 || beta == 0) return; |
277 | |
278 if( bS[0] < 4 ) { | |
279 int8_t tc[4]; | |
10960 | 280 tc[0] = tc0_table[index_a][bS[0]]; |
281 tc[1] = tc0_table[index_a][bS[1]]; | |
282 tc[2] = tc0_table[index_a][bS[2]]; | |
283 tc[3] = tc0_table[index_a][bS[3]]; | |
10854 | 284 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); |
285 } else { | |
286 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta); | |
287 } | |
288 } | |
289 | |
10960 | 290 static void av_noinline filter_mb_edgech( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
291 const unsigned int index_a = qp + h->slice_alpha_c0_offset; |
10960 | 292 const int alpha = alpha_table[index_a]; |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
293 const int beta = beta_table[qp + h->slice_beta_offset]; |
10854 | 294 if (alpha ==0 || beta == 0) return; |
295 | |
296 if( bS[0] < 4 ) { | |
297 int8_t tc[4]; | |
10960 | 298 tc[0] = tc0_table[index_a][bS[0]]+1; |
299 tc[1] = tc0_table[index_a][bS[1]]+1; | |
300 tc[2] = tc0_table[index_a][bS[2]]+1; | |
301 tc[3] = tc0_table[index_a][bS[3]]+1; | |
10854 | 302 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); |
303 } else { | |
304 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); | |
305 } | |
306 } | |
307 | |
308 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { | |
309 MpegEncContext * const s = &h->s; | |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
310 int mb_xy; |
11034
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
311 int mb_type, left_type; |
10854 | 312 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; |
313 | |
314 mb_xy = h->mb_xy; | |
315 | |
11034
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
316 if(!h->top_type || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) { |
10854 | 317 ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); |
318 return; | |
319 } | |
320 assert(!FRAME_MBAFF); | |
11034
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
321 left_type= h->left_type[0]; |
10854 | 322 |
323 mb_type = s->current_picture.mb_type[mb_xy]; | |
324 qp = s->current_picture.qscale_table[mb_xy]; | |
325 qp0 = s->current_picture.qscale_table[mb_xy-1]; | |
326 qp1 = s->current_picture.qscale_table[h->top_mb_xy]; | |
327 qpc = get_chroma_qp( h, 0, qp ); | |
328 qpc0 = get_chroma_qp( h, 0, qp0 ); | |
329 qpc1 = get_chroma_qp( h, 0, qp1 ); | |
330 qp0 = (qp + qp0 + 1) >> 1; | |
331 qp1 = (qp + qp1 + 1) >> 1; | |
332 qpc0 = (qpc + qpc0 + 1) >> 1; | |
333 qpc1 = (qpc + qpc1 + 1) >> 1; | |
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
334 qp_thresh = 15+52 - h->slice_alpha_c0_offset; |
10854 | 335 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh && |
336 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh) | |
337 return; | |
338 | |
339 if( IS_INTRA(mb_type) ) { | |
340 int16_t bS4[4] = {4,4,4,4}; | |
341 int16_t bS3[4] = {3,3,3,3}; | |
342 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4; | |
343 if( IS_8x8DCT(mb_type) ) { | |
11034
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
344 if(left_type) |
10960 | 345 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h); |
346 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); | |
347 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); | |
348 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); | |
10854 | 349 } else { |
11034
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
350 if(left_type) |
10960 | 351 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h); |
352 filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h); | |
353 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); | |
354 filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h); | |
355 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); | |
356 filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h); | |
357 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); | |
358 filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h); | |
10854 | 359 } |
11034
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
360 if(left_type){ |
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
361 filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h); |
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
362 filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h); |
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
363 } |
10960 | 364 filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h); |
365 filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h); | |
366 filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); | |
367 filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h); | |
368 filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); | |
369 filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); | |
10854 | 370 return; |
371 } else { | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10960
diff
changeset
|
372 DECLARE_ALIGNED_8(int16_t, bS)[2][4][4]; |
10854 | 373 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS; |
374 int edges; | |
375 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) { | |
376 edges = 4; | |
377 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL; | |
378 } else { | |
379 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : | |
380 (mb_type & MB_TYPE_16x8) ? 1 : 0; | |
381 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) | |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
382 && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) |
10854 | 383 ? 3 : 0; |
384 int step = IS_8x8DCT(mb_type) ? 2 : 1; | |
385 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; | |
386 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, | |
10984
541acd292c48
Remove all uses of slice_type* from the loop filter, also remove its
michael
parents:
10979
diff
changeset
|
387 h->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE); |
10854 | 388 } |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
389 if( IS_INTRA(h->left_type[0]) ) |
10854 | 390 bSv[0][0] = 0x0004000400040004ULL; |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
391 if( IS_INTRA(h->top_type) ) |
10854 | 392 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL; |
393 | |
394 #define FILTER(hv,dir,edge)\ | |
395 if(bSv[dir][edge]) {\ | |
10960 | 396 filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\ |
10854 | 397 if(!(edge&1)) {\ |
10960 | 398 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ |
399 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ | |
10854 | 400 }\ |
401 } | |
402 if( edges == 1 ) { | |
11034
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
403 if(left_type) |
10854 | 404 FILTER(v,0,0); |
405 FILTER(h,1,0); | |
406 } else if( IS_8x8DCT(mb_type) ) { | |
11034
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
407 if(left_type) |
10854 | 408 FILTER(v,0,0); |
409 FILTER(v,0,2); | |
410 FILTER(h,1,0); | |
411 FILTER(h,1,2); | |
412 } else { | |
11034
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
413 if(left_type) |
10854 | 414 FILTER(v,0,0); |
415 FILTER(v,0,1); | |
416 FILTER(v,0,2); | |
417 FILTER(v,0,3); | |
418 FILTER(h,1,0); | |
419 FILTER(h,1,1); | |
420 FILTER(h,1,2); | |
421 FILTER(h,1,3); | |
422 } | |
423 #undef FILTER | |
424 } | |
425 } | |
426 | |
427 | |
428 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) { | |
429 MpegEncContext * const s = &h->s; | |
430 int edge; | |
431 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; | |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
432 const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type; |
10854 | 433 |
434 // how often to recheck mv-based bS when iterating between edges | |
10958
304db572a69a
Make calculation of mask_edge free of branches, faster of course but probably
michael
parents:
10950
diff
changeset
|
435 static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1}, |
304db572a69a
Make calculation of mask_edge free of branches, faster of course but probably
michael
parents:
10950
diff
changeset
|
436 {0,3,1,1,3,3,3,3}}; |
304db572a69a
Make calculation of mask_edge free of branches, faster of course but probably
michael
parents:
10950
diff
changeset
|
437 const int mask_edge = mask_edge_tab[dir][(mb_type>>3)&7]; |
10973
214adf5e303b
Set edges based on cbp and mv partitioning, not just skiped MBs.
michael
parents:
10970
diff
changeset
|
438 const int edges = mask_edge== 3 && !(h->cbp&15) ? 1 : 4; |
214adf5e303b
Set edges based on cbp and mv partitioning, not just skiped MBs.
michael
parents:
10970
diff
changeset
|
439 |
10854 | 440 // how often to recheck mv-based bS when iterating along each edge |
441 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)); | |
11033 | 442 |
443 if(!(h->slice_table[mbm_xy] == 0xFFFF | |
10942 | 444 || first_vertical_edge_done |
11033 | 445 || (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_num))){ |
10854 | 446 |
11033 | 447 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) |
10946 | 448 && IS_INTERLACED(mbm_type&~mb_type) |
10854 | 449 ) { |
450 // This is a special case in the norm where the filtering must | |
451 // be done twice (one each of the field) even if we are in a | |
452 // frame macroblock. | |
453 // | |
454 unsigned int tmp_linesize = 2 * linesize; | |
455 unsigned int tmp_uvlinesize = 2 * uvlinesize; | |
456 int mbn_xy = mb_xy - 2 * s->mb_stride; | |
10949
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
457 int j; |
10854 | 458 |
459 for(j=0; j<2; j++, mbn_xy += s->mb_stride){ | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10960
diff
changeset
|
460 DECLARE_ALIGNED_8(int16_t, bS)[4]; |
10949
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
461 int qp; |
10946 | 462 if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) { |
10947 | 463 *(uint64_t*)bS= 0x0003000300030003ULL; |
10854 | 464 } else { |
10949
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
465 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 4+3*8; |
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
466 int i; |
10854 | 467 for( i = 0; i < 4; i++ ) { |
10949
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
468 bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]); |
10854 | 469 } |
470 } | |
471 // Do not use s->qscale as luma quantizer because it has not the same | |
472 // value in IPCM macroblocks. | |
473 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; | |
474 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); | |
475 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } | |
10960 | 476 filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h ); |
477 filter_mb_edgech( &img_cb[j*uvlinesize], tmp_uvlinesize, bS, | |
478 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h); | |
479 filter_mb_edgech( &img_cr[j*uvlinesize], tmp_uvlinesize, bS, | |
480 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h); | |
10854 | 481 } |
11033 | 482 }else{ |
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
483 DECLARE_ALIGNED_8(int16_t, bS)[4]; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
484 int qp; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
485 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
486 if( IS_INTRA(mb_type|mbm_type)) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
487 *(uint64_t*)bS= 0x0003000300030003ULL; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
488 if ( (!IS_INTERLACED(mb_type|mbm_type)) |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
489 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0)) |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
490 ) |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
491 *(uint64_t*)bS= 0x0004000400040004ULL; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
492 } else { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
493 int i, l; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
494 int mv_done; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
495 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
496 if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbm_type)) { //FIXME not posible left |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
497 *(uint64_t*)bS= 0x0001000100010001ULL; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
498 mv_done = 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
499 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
500 else if( mask_par0 && ((mbm_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
501 int b_idx= 8 + 4; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
502 int bn_idx= b_idx - (dir ? 8:1); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
503 int v = 0; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
504 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
505 for( l = 0; !v && l < h->list_count; l++ ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
506 v |= h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
507 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
508 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
509 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
510 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
511 if(h->list_count==2 && v){ |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
512 v=0; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
513 for( l = 0; !v && l < 2; l++ ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
514 int ln= 1-l; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
515 v |= h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
516 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
517 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
518 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
519 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
520 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
521 bS[0] = bS[1] = bS[2] = bS[3] = v; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
522 mv_done = 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
523 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
524 else |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
525 mv_done = 0; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
526 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
527 for( i = 0; i < 4; i++ ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
528 int x = dir == 0 ? 0 : i; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
529 int y = dir == 0 ? i : 0; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
530 int b_idx= 8 + 4 + x + 8*y; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
531 int bn_idx= b_idx - (dir ? 8:1); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
532 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
533 if( h->non_zero_count_cache[b_idx] | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
534 h->non_zero_count_cache[bn_idx] ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
535 bS[i] = 2; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
536 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
537 else if(!mv_done) |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
538 { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
539 bS[i] = 0; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
540 for( l = 0; l < h->list_count; l++ ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
541 if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
542 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
543 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
544 bS[i] = 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
545 break; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
546 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
547 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
548 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
549 if(h->list_count == 2 && bS[i]){ |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
550 bS[i] = 0; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
551 for( l = 0; l < 2; l++ ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
552 int ln= 1-l; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
553 if( h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
554 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
555 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
556 bS[i] = 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
557 break; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
558 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
559 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
560 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
561 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
562 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
563 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
564 |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
565 /* Filter edge */ |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
566 // Do not use s->qscale as luma quantizer because it has not the same |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
567 // value in IPCM macroblocks. |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
568 if(bS[0]+bS[1]+bS[2]+bS[3]){ |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
569 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbm_xy] + 1 ) >> 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
570 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
571 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
572 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
573 if( dir == 0 ) { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
574 filter_mb_edgev( &img_y[0], linesize, bS, qp, h ); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
575 { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
576 int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
577 filter_mb_edgecv( &img_cb[0], uvlinesize, bS, qp, h); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
578 if(h->pps.chroma_qp_diff) |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
579 qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
580 filter_mb_edgecv( &img_cr[0], uvlinesize, bS, qp, h); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
581 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
582 } else { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
583 filter_mb_edgeh( &img_y[0], linesize, bS, qp, h ); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
584 { |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
585 int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
586 filter_mb_edgech( &img_cb[0], uvlinesize, bS, qp, h); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
587 if(h->pps.chroma_qp_diff) |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
588 qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
589 filter_mb_edgech( &img_cr[0], uvlinesize, bS, qp, h); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
590 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
591 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
592 } |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
593 } |
11033 | 594 } |
595 | |
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
596 /* Calculate bS */ |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
597 for( edge = 1; edge < edges; edge++ ) { |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10960
diff
changeset
|
598 DECLARE_ALIGNED_8(int16_t, bS)[4]; |
10854 | 599 int qp; |
600 | |
10969
2e8fbfc278d5
Optmize 8x8dct check used to skip some borders in the loop filter.
michael
parents:
10961
diff
changeset
|
601 if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type) |
10854 | 602 continue; |
603 | |
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
604 if( IS_INTRA(mb_type)) { |
10945
0d93bbc17950
Simplify and optimize intra code in h264_loopfilter.c
michael
parents:
10942
diff
changeset
|
605 *(uint64_t*)bS= 0x0003000300030003ULL; |
10854 | 606 } else { |
607 int i, l; | |
608 int mv_done; | |
609 | |
610 if( edge & mask_edge ) { | |
10947 | 611 *(uint64_t*)bS= 0; |
10854 | 612 mv_done = 1; |
613 } | |
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
614 else if( mask_par0 ) { |
10854 | 615 int b_idx= 8 + 4 + edge * (dir ? 8:1); |
616 int bn_idx= b_idx - (dir ? 8:1); | |
617 int v = 0; | |
618 | |
10984
541acd292c48
Remove all uses of slice_type* from the loop filter, also remove its
michael
parents:
10979
diff
changeset
|
619 for( l = 0; !v && l < h->list_count; l++ ) { |
10913
497929e9d912
Perform reference remapping at fill_cache() time instead of in the
michael
parents:
10910
diff
changeset
|
620 v |= h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] | |
10901
2a5c3d89201d
Another microopt, 4 cpu cycles for avoidance of FFABS().
michael
parents:
10899
diff
changeset
|
621 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | |
10854 | 622 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit; |
623 } | |
624 | |
10984
541acd292c48
Remove all uses of slice_type* from the loop filter, also remove its
michael
parents:
10979
diff
changeset
|
625 if(h->list_count==2 && v){ |
10854 | 626 v=0; |
627 for( l = 0; !v && l < 2; l++ ) { | |
628 int ln= 1-l; | |
10913
497929e9d912
Perform reference remapping at fill_cache() time instead of in the
michael
parents:
10910
diff
changeset
|
629 v |= h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] | |
10901
2a5c3d89201d
Another microopt, 4 cpu cycles for avoidance of FFABS().
michael
parents:
10899
diff
changeset
|
630 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | |
10854 | 631 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit; |
632 } | |
633 } | |
634 | |
635 bS[0] = bS[1] = bS[2] = bS[3] = v; | |
636 mv_done = 1; | |
637 } | |
638 else | |
639 mv_done = 0; | |
640 | |
641 for( i = 0; i < 4; i++ ) { | |
642 int x = dir == 0 ? edge : i; | |
643 int y = dir == 0 ? i : edge; | |
644 int b_idx= 8 + 4 + x + 8*y; | |
645 int bn_idx= b_idx - (dir ? 8:1); | |
646 | |
647 if( h->non_zero_count_cache[b_idx] | | |
648 h->non_zero_count_cache[bn_idx] ) { | |
649 bS[i] = 2; | |
650 } | |
651 else if(!mv_done) | |
652 { | |
653 bS[i] = 0; | |
10984
541acd292c48
Remove all uses of slice_type* from the loop filter, also remove its
michael
parents:
10979
diff
changeset
|
654 for( l = 0; l < h->list_count; l++ ) { |
10913
497929e9d912
Perform reference remapping at fill_cache() time instead of in the
michael
parents:
10910
diff
changeset
|
655 if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] | |
10902
1e41e6ab9a18
Apply last 2 optimizations to similar code i forgot.
michael
parents:
10901
diff
changeset
|
656 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | |
10854 | 657 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) { |
658 bS[i] = 1; | |
659 break; | |
660 } | |
661 } | |
662 | |
10984
541acd292c48
Remove all uses of slice_type* from the loop filter, also remove its
michael
parents:
10979
diff
changeset
|
663 if(h->list_count == 2 && bS[i]){ |
10854 | 664 bS[i] = 0; |
665 for( l = 0; l < 2; l++ ) { | |
666 int ln= 1-l; | |
10913
497929e9d912
Perform reference remapping at fill_cache() time instead of in the
michael
parents:
10910
diff
changeset
|
667 if( h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] | |
10902
1e41e6ab9a18
Apply last 2 optimizations to similar code i forgot.
michael
parents:
10901
diff
changeset
|
668 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | |
10854 | 669 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) { |
670 bS[i] = 1; | |
671 break; | |
672 } | |
673 } | |
674 } | |
675 } | |
676 } | |
677 | |
678 if(bS[0]+bS[1]+bS[2]+bS[3] == 0) | |
679 continue; | |
680 } | |
681 | |
682 /* Filter edge */ | |
683 // Do not use s->qscale as luma quantizer because it has not the same | |
684 // value in IPCM macroblocks. | |
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
685 qp = s->current_picture.qscale_table[mb_xy]; |
10906 | 686 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]); |
10854 | 687 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); |
10904 | 688 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } |
10854 | 689 if( dir == 0 ) { |
10960 | 690 filter_mb_edgev( &img_y[4*edge], linesize, bS, qp, h ); |
10854 | 691 if( (edge&1) == 0 ) { |
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
692 filter_mb_edgecv( &img_cb[2*edge], uvlinesize, bS, h->chroma_qp[0], h); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
693 filter_mb_edgecv( &img_cr[2*edge], uvlinesize, bS, h->chroma_qp[1], h); |
10854 | 694 } |
695 } else { | |
10960 | 696 filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h ); |
10854 | 697 if( (edge&1) == 0 ) { |
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
698 filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h); |
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
699 filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h); |
10854 | 700 } |
701 } | |
702 } | |
703 } | |
704 | |
705 void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { | |
706 MpegEncContext * const s = &h->s; | |
707 const int mb_xy= mb_x + mb_y*s->mb_stride; | |
708 const int mb_type = s->current_picture.mb_type[mb_xy]; | |
709 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4; | |
710 int first_vertical_edge_done = 0; | |
711 av_unused int dir; | |
10906 | 712 int list; |
10854 | 713 |
714 if (FRAME_MBAFF | |
715 // and current and left pair do not have the same interlaced type | |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
716 && IS_INTERLACED(mb_type^h->left_type[0]) |
11026 | 717 // and left mb is in available to us |
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
718 && h->left_type[0]) { |
10854 | 719 /* First vertical edge is different in MBAFF frames |
720 * There are 8 different bS to compute and 2 different Qp | |
721 */ | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10960
diff
changeset
|
722 DECLARE_ALIGNED_8(int16_t, bS)[8]; |
10854 | 723 int qp[2]; |
724 int bqp[2]; | |
725 int rqp[2]; | |
726 int mb_qp, mbn0_qp, mbn1_qp; | |
727 int i; | |
728 first_vertical_edge_done = 1; | |
729 | |
730 if( IS_INTRA(mb_type) ) | |
10947 | 731 *(uint64_t*)&bS[0]= |
732 *(uint64_t*)&bS[4]= 0x0004000400040004ULL; | |
10854 | 733 else { |
11025
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
734 static const uint8_t offset[2][2][8]={ |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
735 { |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
736 {7+8*0, 7+8*0, 7+8*0, 7+8*0, 7+8*1, 7+8*1, 7+8*1, 7+8*1}, |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
737 {7+8*2, 7+8*2, 7+8*2, 7+8*2, 7+8*3, 7+8*3, 7+8*3, 7+8*3}, |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
738 },{ |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
739 {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3}, |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
740 {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3}, |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
741 } |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
742 }; |
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
743 const uint8_t *off= offset[MB_FIELD][mb_y&1]; |
10854 | 744 for( i = 0; i < 8; i++ ) { |
11021
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
745 int j= MB_FIELD ? i>>2 : i&1; |
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
746 int mbn_xy = h->left_mb_xy[j]; |
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
747 int mbn_type= h->left_type[j]; |
10854 | 748 |
11021
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
749 if( IS_INTRA( mbn_type ) ) |
10854 | 750 bS[i] = 4; |
11021
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
751 else{ |
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
752 bS[i] = 1 + !!(h->non_zero_count_cache[12+8*(i>>1)] | |
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
753 ((!h->pps.cabac && IS_8x8DCT(mbn_type)) ? |
10854 | 754 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2)) |
755 : | |
11025
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
756 h->non_zero_count[mbn_xy][ off[i] ])); |
11021
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
757 } |
10854 | 758 } |
759 } | |
760 | |
761 mb_qp = s->current_picture.qscale_table[mb_xy]; | |
11015
d844c58b985a
Use left_mb_xy from fill_caches instead of recalculating it.
michael
parents:
11013
diff
changeset
|
762 mbn0_qp = s->current_picture.qscale_table[h->left_mb_xy[0]]; |
d844c58b985a
Use left_mb_xy from fill_caches instead of recalculating it.
michael
parents:
11013
diff
changeset
|
763 mbn1_qp = s->current_picture.qscale_table[h->left_mb_xy[1]]; |
10854 | 764 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1; |
765 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) + | |
766 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1; | |
767 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) + | |
768 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1; | |
769 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1; | |
770 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) + | |
771 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1; | |
772 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) + | |
773 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1; | |
774 | |
775 /* Filter edge */ | |
776 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize); | |
777 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } | |
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
778 if(MB_FIELD){ |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
779 filter_mb_mbaff_edgev ( h, img_y , linesize, bS , 1, qp [0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
780 filter_mb_mbaff_edgev ( h, img_y + 8* linesize, linesize, bS+4, 1, qp [1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
781 filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
782 filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
783 filter_mb_mbaff_edgecv( h, img_cr, uvlinesize, bS , 1, rqp[0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
784 filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
785 }else{ |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
786 filter_mb_mbaff_edgev ( h, img_y , 2* linesize, bS , 2, qp [0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
787 filter_mb_mbaff_edgev ( h, img_y + linesize, 2* linesize, bS+1, 2, qp [1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
788 filter_mb_mbaff_edgecv( h, img_cb, 2*uvlinesize, bS , 2, bqp[0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
789 filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
790 filter_mb_mbaff_edgecv( h, img_cr, 2*uvlinesize, bS , 2, rqp[0] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
791 filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] ); |
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
792 } |
10854 | 793 } |
794 | |
795 #if CONFIG_SMALL | |
796 for( dir = 0; dir < 2; dir++ ) | |
797 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir); | |
798 #else | |
799 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0); | |
800 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1); | |
801 #endif | |
802 } |