Mercurial > libavcodec.hg
annotate h264dsp.c @ 12266:48d6738904a9 libavcodec
Fix SPLATB_REG mess. Used to be a if/elseif/elseif/elseif spaghetti, so this
splits it into small optimization-specific macros which are selected for each
DSP function. The advantage of this approach is that the sse4 functions now
use the ssse3 codepath also without needing an explicit sse4 codepath.
author | rbultje |
---|---|
date | Sat, 24 Jul 2010 19:33:05 +0000 |
parents | 7dd2a45249a9 |
children |
rev | line source |
---|---|
0 | 1 /* |
11499 | 2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder |
3 * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at> | |
5214 | 4 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
429 | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
0 | 11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
0 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 * Lesser General Public License for more details. | |
0 | 16 * |
429 | 17 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3029
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
0 | 20 */ |
2967 | 21 |
1106 | 22 /** |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11506
diff
changeset
|
23 * @file |
11499 | 24 * H.264 / AVC / MPEG4 part10 DSP functions. |
25 * @author Michael Niedermayer <michaelni@gmx.at> | |
1106 | 26 */ |
2967 | 27 |
11499 | 28 #include <stdint.h> |
0 | 29 #include "avcodec.h" |
11499 | 30 #include "h264dsp.h" |
1168 | 31 |
4594 | 32 #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom ) |
33 #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) | |
2415 | 34 #define H264_WEIGHT(W,H) \ |
35 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ | |
3029 | 36 int y; \ |
2415 | 37 offset <<= log2_denom; \ |
38 if(log2_denom) offset += 1<<(log2_denom-1); \ | |
39 for(y=0; y<H; y++, block += stride){ \ | |
40 op_scale1(0); \ | |
41 op_scale1(1); \ | |
42 if(W==2) continue; \ | |
43 op_scale1(2); \ | |
44 op_scale1(3); \ | |
45 if(W==4) continue; \ | |
46 op_scale1(4); \ | |
47 op_scale1(5); \ | |
48 op_scale1(6); \ | |
49 op_scale1(7); \ | |
50 if(W==8) continue; \ | |
51 op_scale1(8); \ | |
52 op_scale1(9); \ | |
53 op_scale1(10); \ | |
54 op_scale1(11); \ | |
55 op_scale1(12); \ | |
56 op_scale1(13); \ | |
57 op_scale1(14); \ | |
58 op_scale1(15); \ | |
59 } \ | |
60 } \ | |
3029 | 61 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \ |
62 int y; \ | |
63 offset = ((offset + 1) | 1) << log2_denom; \ | |
2415 | 64 for(y=0; y<H; y++, dst += stride, src += stride){ \ |
65 op_scale2(0); \ | |
66 op_scale2(1); \ | |
67 if(W==2) continue; \ | |
68 op_scale2(2); \ | |
69 op_scale2(3); \ | |
70 if(W==4) continue; \ | |
71 op_scale2(4); \ | |
72 op_scale2(5); \ | |
73 op_scale2(6); \ | |
74 op_scale2(7); \ | |
75 if(W==8) continue; \ | |
76 op_scale2(8); \ | |
77 op_scale2(9); \ | |
78 op_scale2(10); \ | |
79 op_scale2(11); \ | |
80 op_scale2(12); \ | |
81 op_scale2(13); \ | |
82 op_scale2(14); \ | |
83 op_scale2(15); \ | |
84 } \ | |
85 } | |
86 | |
87 H264_WEIGHT(16,16) | |
88 H264_WEIGHT(16,8) | |
89 H264_WEIGHT(8,16) | |
90 H264_WEIGHT(8,8) | |
91 H264_WEIGHT(8,4) | |
92 H264_WEIGHT(4,8) | |
93 H264_WEIGHT(4,4) | |
94 H264_WEIGHT(4,2) | |
95 H264_WEIGHT(2,4) | |
96 H264_WEIGHT(2,2) | |
97 | |
98 #undef op_scale1 | |
99 #undef op_scale2 | |
100 #undef H264_WEIGHT | |
101 | |
10941
28edcc8c54c0
Mark the h264 c loop filter as av_always_inline av_flatten to make sure its
michael
parents:
10940
diff
changeset
|
102 static av_always_inline av_flatten void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) |
2633 | 103 { |
104 int i, d; | |
105 for( i = 0; i < 4; i++ ) { | |
106 if( tc0[i] < 0 ) { | |
107 pix += 4*ystride; | |
108 continue; | |
109 } | |
110 for( d = 0; d < 4; d++ ) { | |
111 const int p0 = pix[-1*xstride]; | |
112 const int p1 = pix[-2*xstride]; | |
113 const int p2 = pix[-3*xstride]; | |
114 const int q0 = pix[0]; | |
115 const int q1 = pix[1*xstride]; | |
116 const int q2 = pix[2*xstride]; | |
2967 | 117 |
4001 | 118 if( FFABS( p0 - q0 ) < alpha && |
119 FFABS( p1 - p0 ) < beta && | |
120 FFABS( q1 - q0 ) < beta ) { | |
2967 | 121 |
2633 | 122 int tc = tc0[i]; |
123 int i_delta; | |
2967 | 124 |
4001 | 125 if( FFABS( p2 - p0 ) < beta ) { |
10940
563cb9b1a9b7
skip outer pixels if possible in h264_loop_filter_luma_c().
michael
parents:
10878
diff
changeset
|
126 if(tc0[i]) |
4594 | 127 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); |
2633 | 128 tc++; |
129 } | |
4001 | 130 if( FFABS( q2 - q0 ) < beta ) { |
10940
563cb9b1a9b7
skip outer pixels if possible in h264_loop_filter_luma_c().
michael
parents:
10878
diff
changeset
|
131 if(tc0[i]) |
4594 | 132 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); |
2633 | 133 tc++; |
134 } | |
2967 | 135 |
4594 | 136 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); |
137 pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */ | |
138 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ | |
2633 | 139 } |
140 pix += ystride; | |
141 } | |
142 } | |
143 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
144 static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 145 { |
146 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0); | |
147 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
148 static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 149 { |
150 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0); | |
151 } | |
152 | |
10941
28edcc8c54c0
Mark the h264 c loop filter as av_always_inline av_flatten to make sure its
michael
parents:
10940
diff
changeset
|
153 static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) |
8395
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
154 { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
155 int d; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
156 for( d = 0; d < 16; d++ ) { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
157 const int p2 = pix[-3*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
158 const int p1 = pix[-2*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
159 const int p0 = pix[-1*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
160 |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
161 const int q0 = pix[ 0*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
162 const int q1 = pix[ 1*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
163 const int q2 = pix[ 2*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
164 |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
165 if( FFABS( p0 - q0 ) < alpha && |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
166 FFABS( p1 - p0 ) < beta && |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
167 FFABS( q1 - q0 ) < beta ) { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
168 |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
169 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
170 if( FFABS( p2 - p0 ) < beta) |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
171 { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
172 const int p3 = pix[-4*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
173 /* p0', p1', p2' */ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
174 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
175 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
176 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
177 } else { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
178 /* p0' */ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
179 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
180 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
181 if( FFABS( q2 - q0 ) < beta) |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
182 { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
183 const int q3 = pix[3*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
184 /* q0', q1', q2' */ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
185 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
186 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
187 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
188 } else { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
189 /* q0' */ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
190 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
191 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
192 }else{ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
193 /* p0', q0' */ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
194 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
195 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
196 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
197 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
198 pix += ystride; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
199 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
200 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
201 static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
202 { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
203 h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta); |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
204 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
205 static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
206 { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
207 h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta); |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
208 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
209 |
10941
28edcc8c54c0
Mark the h264 c loop filter as av_always_inline av_flatten to make sure its
michael
parents:
10940
diff
changeset
|
210 static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) |
2633 | 211 { |
212 int i, d; | |
213 for( i = 0; i < 4; i++ ) { | |
214 const int tc = tc0[i]; | |
215 if( tc <= 0 ) { | |
216 pix += 2*ystride; | |
217 continue; | |
218 } | |
219 for( d = 0; d < 2; d++ ) { | |
220 const int p0 = pix[-1*xstride]; | |
221 const int p1 = pix[-2*xstride]; | |
222 const int q0 = pix[0]; | |
223 const int q1 = pix[1*xstride]; | |
224 | |
4001 | 225 if( FFABS( p0 - q0 ) < alpha && |
226 FFABS( p1 - p0 ) < beta && | |
227 FFABS( q1 - q0 ) < beta ) { | |
2633 | 228 |
4594 | 229 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); |
230 | |
231 pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */ | |
232 pix[0] = av_clip_uint8( q0 - delta ); /* q0' */ | |
2633 | 233 } |
234 pix += ystride; | |
235 } | |
236 } | |
237 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
238 static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 239 { |
240 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0); | |
241 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
242 static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 243 { |
244 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0); | |
245 } | |
246 | |
10941
28edcc8c54c0
Mark the h264 c loop filter as av_always_inline av_flatten to make sure its
michael
parents:
10940
diff
changeset
|
247 static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
248 { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
249 int d; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
250 for( d = 0; d < 8; d++ ) { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
251 const int p0 = pix[-1*xstride]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
252 const int p1 = pix[-2*xstride]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
253 const int q0 = pix[0]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
254 const int q1 = pix[1*xstride]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
255 |
4001 | 256 if( FFABS( p0 - q0 ) < alpha && |
257 FFABS( p1 - p0 ) < beta && | |
258 FFABS( q1 - q0 ) < beta ) { | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
259 |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
260 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
261 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
262 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
263 pix += ystride; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
264 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
265 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
266 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
267 { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
268 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta); |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
269 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
270 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
271 { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
272 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta); |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
273 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
274 |
11499 | 275 void ff_h264dsp_init(H264DSPContext *c) |
296 | 276 { |
11499 | 277 c->h264_idct_add= ff_h264_idct_add_c; |
278 c->h264_idct8_add= ff_h264_idct8_add_c; | |
279 c->h264_idct_dc_add= ff_h264_idct_dc_add_c; | |
280 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; | |
281 c->h264_idct_add16 = ff_h264_idct_add16_c; | |
282 c->h264_idct8_add4 = ff_h264_idct8_add4_c; | |
283 c->h264_idct_add8 = ff_h264_idct_add8_c; | |
284 c->h264_idct_add16intra= ff_h264_idct_add16intra_c; | |
857 | 285 |
2415 | 286 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; |
287 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; | |
288 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; | |
289 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; | |
290 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; | |
291 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; | |
292 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; | |
293 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; | |
294 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; | |
295 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; | |
296 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; | |
297 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; | |
298 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; | |
299 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; | |
300 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; | |
301 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; | |
302 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; | |
303 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; | |
304 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; | |
305 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; | |
306 | |
2633 | 307 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; |
308 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; | |
8395
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
309 c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
310 c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c; |
2633 | 311 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; |
312 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
313 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
314 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; |
3645
47821be55b6c
mmx implementation of deblocking strength decision.
lorenm
parents:
3568
diff
changeset
|
315 c->h264_loop_filter_strength= NULL; |
2967 | 316 |
11499 | 317 if (ARCH_ARM) ff_h264dsp_init_arm(c); |
11506 | 318 if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c); |
11499 | 319 if (HAVE_MMX) ff_h264dsp_init_x86(c); |
0 | 320 } |