Mercurial > libavcodec.hg
annotate h264_mvpred.h @ 12197:fbf4d5b1b664 libavcodec
Remove FF_MM_SSE2/3 flags for CPUs where this is generally not faster than
regular MMX code. Examples of this are the Core1 CPU. Instead, set a new flag,
FF_MM_SSE2/3SLOW, which can be checked for particular SSE2/3 functions that
have been checked specifically on such CPUs and are actually faster than
their MMX counterparts.
In addition, use this flag to enable particular VP8 and LPC SSE2 functions
that are faster than their MMX counterparts.
Based on a patch by Loren Merritt <lorenm AT u washington edu>.
author | rbultje |
---|---|
date | Mon, 19 Jul 2010 22:38:23 +0000 |
parents | 7dd2a45249a9 |
children |
rev | line source |
---|---|
1168 | 1 /* |
10864 | 2 * H.26L/H.264/AVC/JVT/14496-10/... motion vector predicion |
1168 | 3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
4 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3927
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3927
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3927
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
1168 | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3927
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
1168 | 11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3927
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
1168 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3927
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3029
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
1168 | 20 */ |
2967 | 21 |
1168 | 22 /** |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11293
diff
changeset
|
23 * @file |
10864 | 24 * H.264 / AVC / MPEG4 part10 motion vector predicion. |
1168 | 25 * @author Michael Niedermayer <michaelni@gmx.at> |
26 */ | |
27 | |
10882
5d34ab807e91
Add forgotton multiple inclusion guards to h264_mvpred.h.
michael
parents:
10864
diff
changeset
|
28 #ifndef AVCODEC_H264_MVPRED_H |
5d34ab807e91
Add forgotton multiple inclusion guards to h264_mvpred.h.
michael
parents:
10864
diff
changeset
|
29 #define AVCODEC_H264_MVPRED_H |
5d34ab807e91
Add forgotton multiple inclusion guards to h264_mvpred.h.
michael
parents:
10864
diff
changeset
|
30 |
9012
15a3df8c01fd
More approved hunks for VAAPI & our new and cleaner hwaccel API.
michael
parents:
9004
diff
changeset
|
31 #include "internal.h" |
1168 | 32 #include "avcodec.h" |
4975 | 33 #include "h264.h" |
1908
e20fd60b215c
h264 - progressive I frame CABAC support patch by (Laurent Aimar <fenrir at via dot ecp dot fr>)
michael
parents:
1899
diff
changeset
|
34 |
3284
a224d9752912
don't force asserts in release builds. 2% faster h264.
lorenm
parents:
3219
diff
changeset
|
35 //#undef NDEBUG |
1168 | 36 #include <assert.h> |
37 | |
10864 | 38 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){ |
39 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ]; | |
40 MpegEncContext *s = &h->s; | |
7481 | 41 |
10864 | 42 /* there is no consistent mapping of mvs to neighboring locations that will |
43 * make mbaff happy, so we can't move all this logic to fill_caches */ | |
44 if(FRAME_MBAFF){ | |
8443 | 45 |
11292
411ab09ada91
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
michael
parents:
11203
diff
changeset
|
46 #define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4)\ |
411ab09ada91
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
michael
parents:
11203
diff
changeset
|
47 const int xy = XY, y4 = Y4;\ |
411ab09ada91
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
michael
parents:
11203
diff
changeset
|
48 const int mb_type = mb_types[xy+(y4>>2)*s->mb_stride];\ |
10864 | 49 if(!USES_LIST(mb_type,list))\ |
50 return LIST_NOT_USED;\ | |
11292
411ab09ada91
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
michael
parents:
11203
diff
changeset
|
51 mv = s->current_picture_ptr->motion_val[list][h->mb2b_xy[xy]+3 + y4*h->b_stride];\ |
10864 | 52 h->mv_cache[list][scan8[0]-2][0] = mv[0];\ |
53 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\ | |
11292
411ab09ada91
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
michael
parents:
11203
diff
changeset
|
54 return s->current_picture_ptr->ref_index[list][4*xy+1 + (y4&~1)] REF_OP; |
2967 | 55 |
10864 | 56 if(topright_ref == PART_NOT_AVAILABLE |
10933
3a7fa91fa168
Remove 2 checks from fetch_diagonal_mv() that apparently serve no purpose.
michael
parents:
10932
diff
changeset
|
57 && i >= scan8[0]+8 && (i&7)==4 |
10864 | 58 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){ |
10932
5f0ef177bbac
Move a few things into a deeper if() in fetch_diagonal_mv().
michael
parents:
10931
diff
changeset
|
59 const uint32_t *mb_types = s->current_picture_ptr->mb_type; |
5f0ef177bbac
Move a few things into a deeper if() in fetch_diagonal_mv().
michael
parents:
10931
diff
changeset
|
60 const int16_t *mv; |
11203 | 61 AV_ZERO32(h->mv_cache[list][scan8[0]-2]); |
10932
5f0ef177bbac
Move a few things into a deeper if() in fetch_diagonal_mv().
michael
parents:
10931
diff
changeset
|
62 *C = h->mv_cache[list][scan8[0]-2]; |
5f0ef177bbac
Move a few things into a deeper if() in fetch_diagonal_mv().
michael
parents:
10931
diff
changeset
|
63 |
10864 | 64 if(!MB_FIELD |
11293 | 65 && IS_INTERLACED(h->left_type[0])){ |
66 SET_DIAG_MV(*2, >>1, h->left_mb_xy[0]+s->mb_stride, (s->mb_y&1)*2+(i>>5)); | |
11292
411ab09ada91
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
michael
parents:
11203
diff
changeset
|
67 assert(h->left_mb_xy[0] == h->left_mb_xy[1]); |
10864 | 68 } |
69 if(MB_FIELD | |
11293 | 70 && !IS_INTERLACED(h->left_type[0])){ |
10864 | 71 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK. |
11293 | 72 SET_DIAG_MV(/2, <<1, h->left_mb_xy[i>=36], ((i>>2))&3); |
2551
615995277bc5
MBAFF I slice no deblocking patch by (Loic >>lll+ffmpeg m4x org<<)
michael
parents:
2548
diff
changeset
|
73 } |
615995277bc5
MBAFF I slice no deblocking patch by (Loic >>lll+ffmpeg m4x org<<)
michael
parents:
2548
diff
changeset
|
74 } |
10864 | 75 #undef SET_DIAG_MV |
1168 | 76 } |
77 | |
10864 | 78 if(topright_ref != PART_NOT_AVAILABLE){ |
79 *C= h->mv_cache[list][ i - 8 + part_width ]; | |
80 return topright_ref; | |
81 }else{ | |
82 tprintf(s->avctx, "topright MV not available\n"); | |
83 | |
84 *C= h->mv_cache[list][ i - 8 - 1 ]; | |
85 return h->ref_cache[list][ i - 8 - 1 ]; | |
86 } | |
87 } | |
88 | |
89 /** | |
90 * gets the predicted MV. | |
91 * @param n the block index | |
92 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4) | |
93 * @param mx the x component of the predicted motion vector | |
94 * @param my the y component of the predicted motion vector | |
95 */ | |
96 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){ | |
97 const int index8= scan8[n]; | |
98 const int top_ref= h->ref_cache[list][ index8 - 8 ]; | |
99 const int left_ref= h->ref_cache[list][ index8 - 1 ]; | |
100 const int16_t * const A= h->mv_cache[list][ index8 - 1 ]; | |
101 const int16_t * const B= h->mv_cache[list][ index8 - 8 ]; | |
102 const int16_t * C; | |
103 int diagonal_ref, match_count; | |
3316 | 104 |
10864 | 105 assert(part_width==1 || part_width==2 || part_width==4); |
106 | |
107 /* mv_cache | |
108 B . . A T T T T | |
109 U . . L . . , . | |
110 U . . L . . . . | |
111 U . . L . . , . | |
112 . . . L . . . . | |
113 */ | |
114 | |
115 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width); | |
116 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref); | |
117 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count); | |
118 if(match_count > 1){ //most common | |
119 *mx= mid_pred(A[0], B[0], C[0]); | |
120 *my= mid_pred(A[1], B[1], C[1]); | |
121 }else if(match_count==1){ | |
122 if(left_ref==ref){ | |
123 *mx= A[0]; | |
124 *my= A[1]; | |
125 }else if(top_ref==ref){ | |
126 *mx= B[0]; | |
127 *my= B[1]; | |
128 }else{ | |
129 *mx= C[0]; | |
130 *my= C[1]; | |
3316 | 131 } |
2449 | 132 }else{ |
10864 | 133 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){ |
134 *mx= A[0]; | |
135 *my= A[1]; | |
7532 | 136 }else{ |
10864 | 137 *mx= mid_pred(A[0], B[0], C[0]); |
138 *my= mid_pred(A[1], B[1], C[1]); | |
2551
615995277bc5
MBAFF I slice no deblocking patch by (Loic >>lll+ffmpeg m4x org<<)
michael
parents:
2548
diff
changeset
|
139 } |
615995277bc5
MBAFF I slice no deblocking patch by (Loic >>lll+ffmpeg m4x org<<)
michael
parents:
2548
diff
changeset
|
140 } |
615995277bc5
MBAFF I slice no deblocking patch by (Loic >>lll+ffmpeg m4x org<<)
michael
parents:
2548
diff
changeset
|
141 |
10864 | 142 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list); |
1168 | 143 } |
144 | |
145 /** | |
146 * gets the directionally predicted 16x8 MV. | |
147 * @param n the block index | |
148 * @param mx the x component of the predicted motion vector | |
149 * @param my the y component of the predicted motion vector | |
150 */ | |
151 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ | |
152 if(n==0){ | |
153 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ]; | |
154 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; | |
155 | |
4600 | 156 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list); |
2967 | 157 |
1168 | 158 if(top_ref == ref){ |
159 *mx= B[0]; | |
160 *my= B[1]; | |
161 return; | |
162 } | |
163 }else{ | |
164 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ]; | |
165 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ]; | |
2967 | 166 |
4600 | 167 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); |
1168 | 168 |
169 if(left_ref == ref){ | |
170 *mx= A[0]; | |
171 *my= A[1]; | |
172 return; | |
173 } | |
174 } | |
175 | |
176 //RARE | |
177 pred_motion(h, n, 4, list, ref, mx, my); | |
178 } | |
179 | |
180 /** | |
181 * gets the directionally predicted 8x16 MV. | |
182 * @param n the block index | |
183 * @param mx the x component of the predicted motion vector | |
184 * @param my the y component of the predicted motion vector | |
185 */ | |
186 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ | |
187 if(n==0){ | |
188 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ]; | |
189 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; | |
2967 | 190 |
4600 | 191 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); |
1168 | 192 |
193 if(left_ref == ref){ | |
194 *mx= A[0]; | |
195 *my= A[1]; | |
196 return; | |
197 } | |
198 }else{ | |
1169 | 199 const int16_t * C; |
200 int diagonal_ref; | |
201 | |
202 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2); | |
2967 | 203 |
4600 | 204 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list); |
1168 | 205 |
2967 | 206 if(diagonal_ref == ref){ |
1168 | 207 *mx= C[0]; |
208 *my= C[1]; | |
209 return; | |
210 } | |
211 } | |
212 | |
213 //RARE | |
214 pred_motion(h, n, 2, list, ref, mx, my); | |
215 } | |
216 | |
217 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){ | |
218 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ]; | |
219 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ]; | |
220 | |
4600 | 221 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y); |
1168 | 222 |
223 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE | |
11203 | 224 || !( top_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 8 ])) |
225 || !(left_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 1 ]))){ | |
2967 | 226 |
1168 | 227 *mx = *my = 0; |
228 return; | |
229 } | |
2967 | 230 |
1168 | 231 pred_motion(h, 0, 4, 0, 0, mx, my); |
232 | |
233 return; | |
234 } | |
10882
5d34ab807e91
Add forgotton multiple inclusion guards to h264_mvpred.h.
michael
parents:
10864
diff
changeset
|
235 |
5d34ab807e91
Add forgotton multiple inclusion guards to h264_mvpred.h.
michael
parents:
10864
diff
changeset
|
236 #endif /* AVCODEC_H264_MVPRED_H */ |