Mercurial > libavcodec.hg
annotate vc1dsp.c @ 9859:7a116de63777 libavcodec
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
Includes mmx2 asm for the various functions.
Note that the actual idct still does not have an x86 SIMD implemtation.
For wmv3 files using regular idct, the decoder just falls back to simple_idct,
since simple_idct_dc doesn't exist (yet).
author | darkshikari |
---|---|
date | Tue, 16 Jun 2009 09:00:55 +0000 |
parents | 3970fe47fea3 |
children | bf309c7ce615 |
rev | line source |
---|---|
3526 | 1 /* |
2 * VC-1 and WMV3 decoder - DSP functions | |
3 * Copyright (c) 2006 Konstantin Shishkov | |
4 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3665
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3665
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3665
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
3526 | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3665
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
3526 | 11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3665
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
3526 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3665
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3526 | 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 */ | |
21 | |
22 /** | |
8718
e9d9d946f213
Use full internal pathname in doxygen @file directives.
diego
parents:
8686
diff
changeset
|
23 * @file libavcodec/vc1dsp.c |
3526 | 24 * VC-1 and WMV3 decoder |
25 * | |
26 */ | |
27 | |
28 #include "dsputil.h" | |
29 | |
30 | |
4210 | 31 /** Apply overlap transform to horizontal edge |
3526 | 32 */ |
4239 | 33 static void vc1_v_overlap_c(uint8_t* src, int stride) |
3526 | 34 { |
35 int i; | |
36 int a, b, c, d; | |
4211 | 37 int d1, d2; |
4239 | 38 int rnd = 1; |
3526 | 39 for(i = 0; i < 8; i++) { |
40 a = src[-2*stride]; | |
41 b = src[-stride]; | |
42 c = src[0]; | |
43 d = src[stride]; | |
4211 | 44 d1 = (a - d + 3 + rnd) >> 3; |
45 d2 = (a - d + b - c + 4 - rnd) >> 3; | |
3526 | 46 |
4211 | 47 src[-2*stride] = a - d1; |
8686 | 48 src[-stride] = av_clip_uint8(b - d2); |
49 src[0] = av_clip_uint8(c + d2); | |
4211 | 50 src[stride] = d + d1; |
3526 | 51 src++; |
4239 | 52 rnd = !rnd; |
3526 | 53 } |
54 } | |
55 | |
4210 | 56 /** Apply overlap transform to vertical edge |
3526 | 57 */ |
4239 | 58 static void vc1_h_overlap_c(uint8_t* src, int stride) |
3526 | 59 { |
60 int i; | |
61 int a, b, c, d; | |
4211 | 62 int d1, d2; |
4239 | 63 int rnd = 1; |
3526 | 64 for(i = 0; i < 8; i++) { |
65 a = src[-2]; | |
66 b = src[-1]; | |
67 c = src[0]; | |
68 d = src[1]; | |
4211 | 69 d1 = (a - d + 3 + rnd) >> 3; |
70 d2 = (a - d + b - c + 4 - rnd) >> 3; | |
3526 | 71 |
4211 | 72 src[-2] = a - d1; |
8686 | 73 src[-1] = av_clip_uint8(b - d2); |
74 src[0] = av_clip_uint8(c + d2); | |
4211 | 75 src[1] = d + d1; |
3526 | 76 src += stride; |
4239 | 77 rnd = !rnd; |
3526 | 78 } |
79 } | |
80 | |
9442 | 81 /** |
82 * VC-1 in-loop deblocking filter for one line | |
83 * @param src source block type | |
84 * @param stride block stride | |
85 * @param pq block quantizer | |
86 * @return whether other 3 pairs should be filtered or not | |
87 * @see 8.6 | |
88 */ | |
89 static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){ | |
90 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |
91 | |
92 int a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3; | |
93 int a0_sign = a0 >> 31; /* Store sign */ | |
94 a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */ | |
95 if(a0 < pq){ | |
96 int a1 = FFABS((2*(src[-4*stride] - src[-1*stride]) - 5*(src[-3*stride] - src[-2*stride]) + 4) >> 3); | |
97 int a2 = FFABS((2*(src[ 0*stride] - src[ 3*stride]) - 5*(src[ 1*stride] - src[ 2*stride]) + 4) >> 3); | |
98 if(a1 < a0 || a2 < a0){ | |
99 int clip = src[-1*stride] - src[ 0*stride]; | |
100 int clip_sign = clip >> 31; | |
101 clip = ((clip ^ clip_sign) - clip_sign)>>1; | |
102 if(clip){ | |
103 int a3 = FFMIN(a1, a2); | |
104 int d = 5 * (a3 - a0); | |
105 int d_sign = (d >> 31); | |
106 d = ((d ^ d_sign) - d_sign) >> 3; | |
107 d_sign ^= a0_sign; | |
108 | |
109 if( d_sign ^ clip_sign ) | |
110 d = 0; | |
111 else{ | |
112 d = FFMIN(d, clip); | |
113 d = (d ^ d_sign) - d_sign; /* Restore sign */ | |
114 src[-1*stride] = cm[src[-1*stride] - d]; | |
115 src[ 0*stride] = cm[src[ 0*stride] + d]; | |
116 } | |
117 return 1; | |
118 } | |
119 } | |
120 } | |
121 return 0; | |
122 } | |
123 | |
124 /** | |
125 * VC-1 in-loop deblocking filter | |
126 * @param src source block type | |
127 * @param step distance between horizontally adjacent elements | |
128 * @param stride distance between vertically adjacent elements | |
129 * @param len edge length to filter (4 or 8 pixels) | |
130 * @param pq block quantizer | |
131 * @see 8.6 | |
132 */ | |
9443
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
133 static inline void vc1_loop_filter(uint8_t* src, int step, int stride, int len, int pq) |
9442 | 134 { |
135 int i; | |
136 int filt3; | |
137 | |
138 for(i = 0; i < len; i += 4){ | |
139 filt3 = vc1_filter_line(src + 2*step, stride, pq); | |
140 if(filt3){ | |
141 vc1_filter_line(src + 0*step, stride, pq); | |
142 vc1_filter_line(src + 1*step, stride, pq); | |
143 vc1_filter_line(src + 3*step, stride, pq); | |
144 } | |
145 src += step * 4; | |
146 } | |
147 } | |
3526 | 148 |
9443
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
149 static void vc1_v_loop_filter4_c(uint8_t *src, int stride, int pq) |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
150 { |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
151 vc1_loop_filter(src, 1, stride, 4, pq); |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
152 } |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
153 |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
154 static void vc1_h_loop_filter4_c(uint8_t *src, int stride, int pq) |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
155 { |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
156 vc1_loop_filter(src, stride, 1, 4, pq); |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
157 } |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
158 |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
159 static void vc1_v_loop_filter8_c(uint8_t *src, int stride, int pq) |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
160 { |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
161 vc1_loop_filter(src, 1, stride, 8, pq); |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
162 } |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
163 |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
164 static void vc1_h_loop_filter8_c(uint8_t *src, int stride, int pq) |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
165 { |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
166 vc1_loop_filter(src, stride, 1, 8, pq); |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
167 } |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
168 |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
169 static void vc1_v_loop_filter16_c(uint8_t *src, int stride, int pq) |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
170 { |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
171 vc1_loop_filter(src, 1, stride, 16, pq); |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
172 } |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
173 |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
174 static void vc1_h_loop_filter16_c(uint8_t *src, int stride, int pq) |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
175 { |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
176 vc1_loop_filter(src, stride, 1, 16, pq); |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
177 } |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
178 |
3526 | 179 /** Do inverse transform on 8x8 block |
180 */ | |
9859
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
181 static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block) |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
182 { |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
183 int i; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
184 int dc = block[0]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
185 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
186 dc = (3 * dc + 1) >> 1; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
187 dc = (3 * dc + 16) >> 5; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
188 for(i = 0; i < 8; i++){ |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
189 dest[0] = cm[dest[0]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
190 dest[1] = cm[dest[1]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
191 dest[2] = cm[dest[2]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
192 dest[3] = cm[dest[3]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
193 dest[4] = cm[dest[4]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
194 dest[5] = cm[dest[5]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
195 dest[6] = cm[dest[6]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
196 dest[7] = cm[dest[7]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
197 dest += linesize; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
198 } |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
199 } |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
200 |
3526 | 201 static void vc1_inv_trans_8x8_c(DCTELEM block[64]) |
202 { | |
203 int i; | |
204 register int t1,t2,t3,t4,t5,t6,t7,t8; | |
205 DCTELEM *src, *dst; | |
206 | |
207 src = block; | |
208 dst = block; | |
209 for(i = 0; i < 8; i++){ | |
6157 | 210 t1 = 12 * (src[0] + src[4]) + 4; |
211 t2 = 12 * (src[0] - src[4]) + 4; | |
3526 | 212 t3 = 16 * src[2] + 6 * src[6]; |
213 t4 = 6 * src[2] - 16 * src[6]; | |
214 | |
215 t5 = t1 + t3; | |
216 t6 = t2 + t4; | |
217 t7 = t2 - t4; | |
218 t8 = t1 - t3; | |
219 | |
220 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; | |
221 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; | |
222 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; | |
223 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; | |
224 | |
6157 | 225 dst[0] = (t5 + t1) >> 3; |
226 dst[1] = (t6 + t2) >> 3; | |
227 dst[2] = (t7 + t3) >> 3; | |
228 dst[3] = (t8 + t4) >> 3; | |
229 dst[4] = (t8 - t4) >> 3; | |
230 dst[5] = (t7 - t3) >> 3; | |
231 dst[6] = (t6 - t2) >> 3; | |
232 dst[7] = (t5 - t1) >> 3; | |
3526 | 233 |
234 src += 8; | |
235 dst += 8; | |
236 } | |
237 | |
238 src = block; | |
239 dst = block; | |
240 for(i = 0; i < 8; i++){ | |
6157 | 241 t1 = 12 * (src[ 0] + src[32]) + 64; |
242 t2 = 12 * (src[ 0] - src[32]) + 64; | |
3526 | 243 t3 = 16 * src[16] + 6 * src[48]; |
244 t4 = 6 * src[16] - 16 * src[48]; | |
245 | |
246 t5 = t1 + t3; | |
247 t6 = t2 + t4; | |
248 t7 = t2 - t4; | |
249 t8 = t1 - t3; | |
250 | |
251 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; | |
252 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; | |
253 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | |
254 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | |
255 | |
6157 | 256 dst[ 0] = (t5 + t1) >> 7; |
257 dst[ 8] = (t6 + t2) >> 7; | |
258 dst[16] = (t7 + t3) >> 7; | |
259 dst[24] = (t8 + t4) >> 7; | |
260 dst[32] = (t8 - t4 + 1) >> 7; | |
261 dst[40] = (t7 - t3 + 1) >> 7; | |
262 dst[48] = (t6 - t2 + 1) >> 7; | |
263 dst[56] = (t5 - t1 + 1) >> 7; | |
3526 | 264 |
265 src++; | |
266 dst++; | |
267 } | |
268 } | |
269 | |
270 /** Do inverse transform on 8x4 part of block | |
271 */ | |
9859
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
272 static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block) |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
273 { |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
274 int i; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
275 int dc = block[0]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
276 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
277 dc = ( 3 * dc + 1) >> 1; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
278 dc = (17 * dc + 64) >> 7; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
279 for(i = 0; i < 4; i++){ |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
280 dest[0] = cm[dest[0]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
281 dest[1] = cm[dest[1]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
282 dest[2] = cm[dest[2]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
283 dest[3] = cm[dest[3]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
284 dest[4] = cm[dest[4]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
285 dest[5] = cm[dest[5]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
286 dest[6] = cm[dest[6]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
287 dest[7] = cm[dest[7]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
288 dest += linesize; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
289 } |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
290 } |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
291 |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
292 static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block) |
3526 | 293 { |
294 int i; | |
295 register int t1,t2,t3,t4,t5,t6,t7,t8; | |
296 DCTELEM *src, *dst; | |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
297 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
3526 | 298 |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
299 src = block; |
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
300 dst = block; |
3526 | 301 for(i = 0; i < 4; i++){ |
6157 | 302 t1 = 12 * (src[0] + src[4]) + 4; |
303 t2 = 12 * (src[0] - src[4]) + 4; | |
3526 | 304 t3 = 16 * src[2] + 6 * src[6]; |
305 t4 = 6 * src[2] - 16 * src[6]; | |
306 | |
307 t5 = t1 + t3; | |
308 t6 = t2 + t4; | |
309 t7 = t2 - t4; | |
310 t8 = t1 - t3; | |
311 | |
312 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; | |
313 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; | |
314 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; | |
315 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; | |
316 | |
6157 | 317 dst[0] = (t5 + t1) >> 3; |
318 dst[1] = (t6 + t2) >> 3; | |
319 dst[2] = (t7 + t3) >> 3; | |
320 dst[3] = (t8 + t4) >> 3; | |
321 dst[4] = (t8 - t4) >> 3; | |
322 dst[5] = (t7 - t3) >> 3; | |
323 dst[6] = (t6 - t2) >> 3; | |
324 dst[7] = (t5 - t1) >> 3; | |
3526 | 325 |
326 src += 8; | |
327 dst += 8; | |
328 } | |
329 | |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
330 src = block; |
3526 | 331 for(i = 0; i < 8; i++){ |
6157 | 332 t1 = 17 * (src[ 0] + src[16]) + 64; |
333 t2 = 17 * (src[ 0] - src[16]) + 64; | |
6158 | 334 t3 = 22 * src[ 8] + 10 * src[24]; |
335 t4 = 22 * src[24] - 10 * src[ 8]; | |
3526 | 336 |
6158 | 337 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3) >> 7)]; |
338 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4) >> 7)]; | |
339 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4) >> 7)]; | |
340 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3) >> 7)]; | |
3526 | 341 |
342 src ++; | |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
343 dest++; |
3526 | 344 } |
345 } | |
346 | |
347 /** Do inverse transform on 4x8 parts of block | |
348 */ | |
9859
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
349 static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block) |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
350 { |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
351 int i; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
352 int dc = block[0]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
353 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
354 dc = (17 * dc + 4) >> 3; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
355 dc = (12 * dc + 64) >> 7; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
356 for(i = 0; i < 8; i++){ |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
357 dest[0] = cm[dest[0]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
358 dest[1] = cm[dest[1]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
359 dest[2] = cm[dest[2]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
360 dest[3] = cm[dest[3]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
361 dest += linesize; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
362 } |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
363 } |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
364 |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
365 static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block) |
3526 | 366 { |
367 int i; | |
368 register int t1,t2,t3,t4,t5,t6,t7,t8; | |
369 DCTELEM *src, *dst; | |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
370 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
3526 | 371 |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
372 src = block; |
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
373 dst = block; |
3526 | 374 for(i = 0; i < 8; i++){ |
6157 | 375 t1 = 17 * (src[0] + src[2]) + 4; |
376 t2 = 17 * (src[0] - src[2]) + 4; | |
6158 | 377 t3 = 22 * src[1] + 10 * src[3]; |
378 t4 = 22 * src[3] - 10 * src[1]; | |
3526 | 379 |
6158 | 380 dst[0] = (t1 + t3) >> 3; |
381 dst[1] = (t2 - t4) >> 3; | |
382 dst[2] = (t2 + t4) >> 3; | |
383 dst[3] = (t1 - t3) >> 3; | |
3526 | 384 |
385 src += 8; | |
386 dst += 8; | |
387 } | |
388 | |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
389 src = block; |
3526 | 390 for(i = 0; i < 4; i++){ |
6157 | 391 t1 = 12 * (src[ 0] + src[32]) + 64; |
392 t2 = 12 * (src[ 0] - src[32]) + 64; | |
3526 | 393 t3 = 16 * src[16] + 6 * src[48]; |
394 t4 = 6 * src[16] - 16 * src[48]; | |
395 | |
396 t5 = t1 + t3; | |
397 t6 = t2 + t4; | |
398 t7 = t2 - t4; | |
399 t8 = t1 - t3; | |
400 | |
401 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; | |
402 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; | |
403 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | |
404 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | |
405 | |
6157 | 406 dest[0*linesize] = cm[dest[0*linesize] + ((t5 + t1) >> 7)]; |
407 dest[1*linesize] = cm[dest[1*linesize] + ((t6 + t2) >> 7)]; | |
408 dest[2*linesize] = cm[dest[2*linesize] + ((t7 + t3) >> 7)]; | |
409 dest[3*linesize] = cm[dest[3*linesize] + ((t8 + t4) >> 7)]; | |
410 dest[4*linesize] = cm[dest[4*linesize] + ((t8 - t4 + 1) >> 7)]; | |
411 dest[5*linesize] = cm[dest[5*linesize] + ((t7 - t3 + 1) >> 7)]; | |
412 dest[6*linesize] = cm[dest[6*linesize] + ((t6 - t2 + 1) >> 7)]; | |
413 dest[7*linesize] = cm[dest[7*linesize] + ((t5 - t1 + 1) >> 7)]; | |
3526 | 414 |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
415 src ++; |
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
416 dest++; |
3526 | 417 } |
418 } | |
419 | |
420 /** Do inverse transform on 4x4 part of block | |
421 */ | |
9859
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
422 static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block) |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
423 { |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
424 int i; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
425 int dc = block[0]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
426 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
427 dc = (17 * dc + 4) >> 3; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
428 dc = (17 * dc + 64) >> 7; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
429 for(i = 0; i < 4; i++){ |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
430 dest[0] = cm[dest[0]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
431 dest[1] = cm[dest[1]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
432 dest[2] = cm[dest[2]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
433 dest[3] = cm[dest[3]+dc]; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
434 dest += linesize; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
435 } |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
436 } |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
437 |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
438 static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block) |
3526 | 439 { |
440 int i; | |
6158 | 441 register int t1,t2,t3,t4; |
3526 | 442 DCTELEM *src, *dst; |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
443 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
3526 | 444 |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
445 src = block; |
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
446 dst = block; |
3526 | 447 for(i = 0; i < 4; i++){ |
6157 | 448 t1 = 17 * (src[0] + src[2]) + 4; |
449 t2 = 17 * (src[0] - src[2]) + 4; | |
6158 | 450 t3 = 22 * src[1] + 10 * src[3]; |
451 t4 = 22 * src[3] - 10 * src[1]; | |
3526 | 452 |
6158 | 453 dst[0] = (t1 + t3) >> 3; |
454 dst[1] = (t2 - t4) >> 3; | |
455 dst[2] = (t2 + t4) >> 3; | |
456 dst[3] = (t1 - t3) >> 3; | |
3526 | 457 |
458 src += 8; | |
459 dst += 8; | |
460 } | |
461 | |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
462 src = block; |
3526 | 463 for(i = 0; i < 4; i++){ |
6157 | 464 t1 = 17 * (src[ 0] + src[16]) + 64; |
465 t2 = 17 * (src[ 0] - src[16]) + 64; | |
6158 | 466 t3 = 22 * src[ 8] + 10 * src[24]; |
467 t4 = 22 * src[24] - 10 * src[ 8]; | |
3526 | 468 |
6158 | 469 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3) >> 7)]; |
470 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4) >> 7)]; | |
471 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4) >> 7)]; | |
472 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3) >> 7)]; | |
3526 | 473 |
474 src ++; | |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5416
diff
changeset
|
475 dest++; |
3526 | 476 } |
477 } | |
478 | |
479 /* motion compensation functions */ | |
5416 | 480 /** Filter in case of 2 filters */ |
481 #define VC1_MSPEL_FILTER_16B(DIR, TYPE) \ | |
482 static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, int stride, int mode) \ | |
483 { \ | |
484 switch(mode){ \ | |
485 case 0: /* no shift - should not occur */ \ | |
486 return 0; \ | |
487 case 1: /* 1/4 shift */ \ | |
488 return -4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2]; \ | |
489 case 2: /* 1/2 shift */ \ | |
490 return -src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2]; \ | |
491 case 3: /* 3/4 shift */ \ | |
492 return -3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2]; \ | |
493 } \ | |
494 return 0; /* should not occur */ \ | |
495 } | |
496 | |
497 VC1_MSPEL_FILTER_16B(ver, uint8_t); | |
498 VC1_MSPEL_FILTER_16B(hor, int16_t); | |
499 | |
3526 | 500 |
501 /** Filter used to interpolate fractional pel values | |
502 */ | |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4239
diff
changeset
|
503 static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r) |
3526 | 504 { |
505 switch(mode){ | |
506 case 0: //no shift | |
507 return src[0]; | |
508 case 1: // 1/4 shift | |
509 return (-4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2] + 32 - r) >> 6; | |
510 case 2: // 1/2 shift | |
511 return (-src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2] + 8 - r) >> 4; | |
512 case 3: // 3/4 shift | |
513 return (-3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2] + 32 - r) >> 6; | |
514 } | |
515 return 0; //should not occur | |
516 } | |
517 | |
518 /** Function used to do motion compensation with bicubic interpolation | |
519 */ | |
9437 | 520 #define VC1_MSPEL_MC(OP, OPNAME)\ |
521 static void OPNAME ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)\ | |
522 {\ | |
523 int i, j;\ | |
524 \ | |
525 if (vmode) { /* Horizontal filter to apply */\ | |
526 int r;\ | |
527 \ | |
528 if (hmode) { /* Vertical filter to apply, output to tmp */\ | |
529 static const int shift_value[] = { 0, 5, 1, 5 };\ | |
530 int shift = (shift_value[hmode]+shift_value[vmode])>>1;\ | |
531 int16_t tmp[11*8], *tptr = tmp;\ | |
532 \ | |
533 r = (1<<(shift-1)) + rnd-1;\ | |
534 \ | |
535 src -= 1;\ | |
536 for(j = 0; j < 8; j++) {\ | |
537 for(i = 0; i < 11; i++)\ | |
538 tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift;\ | |
539 src += stride;\ | |
540 tptr += 11;\ | |
541 }\ | |
542 \ | |
543 r = 64-rnd;\ | |
544 tptr = tmp+1;\ | |
545 for(j = 0; j < 8; j++) {\ | |
546 for(i = 0; i < 8; i++)\ | |
547 OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7);\ | |
548 dst += stride;\ | |
549 tptr += 11;\ | |
550 }\ | |
551 \ | |
552 return;\ | |
553 }\ | |
554 else { /* No horizontal filter, output 8 lines to dst */\ | |
555 r = 1-rnd;\ | |
556 \ | |
557 for(j = 0; j < 8; j++) {\ | |
558 for(i = 0; i < 8; i++)\ | |
559 OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));\ | |
560 src += stride;\ | |
561 dst += stride;\ | |
562 }\ | |
563 return;\ | |
564 }\ | |
565 }\ | |
566 \ | |
567 /* Horizontal mode with no vertical mode */\ | |
568 for(j = 0; j < 8; j++) {\ | |
569 for(i = 0; i < 8; i++)\ | |
570 OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));\ | |
571 dst += stride;\ | |
572 src += stride;\ | |
573 }\ | |
574 } | |
3526 | 575 |
9437 | 576 #define op_put(a, b) a = av_clip_uint8(b) |
577 #define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1 | |
5416 | 578 |
9437 | 579 VC1_MSPEL_MC(op_put, put_) |
580 VC1_MSPEL_MC(op_avg, avg_) | |
3526 | 581 |
582 /* pixel functions - really are entry points to vc1_mspel_mc */ | |
583 | |
584 /* this one is defined in dsputil.c */ | |
585 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd); | |
9437 | 586 void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd); |
3526 | 587 |
5252 | 588 #define PUT_VC1_MSPEL(a, b)\ |
589 static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ | |
9437 | 590 put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ |
591 }\ | |
592 static void avg_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ | |
593 avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ | |
3526 | 594 } |
595 | |
5252 | 596 PUT_VC1_MSPEL(1, 0) |
597 PUT_VC1_MSPEL(2, 0) | |
598 PUT_VC1_MSPEL(3, 0) | |
3526 | 599 |
5252 | 600 PUT_VC1_MSPEL(0, 1) |
601 PUT_VC1_MSPEL(1, 1) | |
602 PUT_VC1_MSPEL(2, 1) | |
603 PUT_VC1_MSPEL(3, 1) | |
3526 | 604 |
5252 | 605 PUT_VC1_MSPEL(0, 2) |
606 PUT_VC1_MSPEL(1, 2) | |
607 PUT_VC1_MSPEL(2, 2) | |
608 PUT_VC1_MSPEL(3, 2) | |
3526 | 609 |
5252 | 610 PUT_VC1_MSPEL(0, 3) |
611 PUT_VC1_MSPEL(1, 3) | |
612 PUT_VC1_MSPEL(2, 3) | |
613 PUT_VC1_MSPEL(3, 3) | |
3526 | 614 |
615 void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) { | |
616 dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c; | |
617 dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c; | |
618 dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c; | |
619 dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c; | |
9859
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
620 dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_c; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
621 dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_c; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
622 dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_c; |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9443
diff
changeset
|
623 dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_c; |
3526 | 624 dsp->vc1_h_overlap = vc1_h_overlap_c; |
625 dsp->vc1_v_overlap = vc1_v_overlap_c; | |
9443
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
626 dsp->vc1_v_loop_filter4 = vc1_v_loop_filter4_c; |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
627 dsp->vc1_h_loop_filter4 = vc1_h_loop_filter4_c; |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
628 dsp->vc1_v_loop_filter8 = vc1_v_loop_filter8_c; |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
629 dsp->vc1_h_loop_filter8 = vc1_h_loop_filter8_c; |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
630 dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_c; |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
631 dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_c; |
3526 | 632 |
633 dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_c; | |
5251 | 634 dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_c; |
635 dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_c; | |
636 dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_c; | |
637 dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_c; | |
638 dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_c; | |
639 dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_c; | |
640 dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_c; | |
641 dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_c; | |
642 dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_c; | |
643 dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_c; | |
644 dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_c; | |
645 dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_c; | |
646 dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c; | |
647 dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c; | |
648 dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c; | |
9437 | 649 |
650 dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_c; | |
651 dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_c; | |
652 dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_c; | |
653 dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_c; | |
654 dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_c; | |
655 dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_c; | |
656 dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_c; | |
657 dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_c; | |
658 dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_c; | |
659 dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_c; | |
660 dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_c; | |
661 dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_c; | |
662 dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_c; | |
663 dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c; | |
664 dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c; | |
665 dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c; | |
3526 | 666 } |