Mercurial > libavcodec.hg
annotate vc1dsp.c @ 4066:a3f06c7a0bff libavcodec
16-bit grayscale support
author | kostya |
---|---|
date | Tue, 24 Oct 2006 04:57:43 +0000 |
parents | c8c591fe26f8 |
children | 6f6fe05712e4 |
rev | line source |
---|---|
3526 | 1 /* |
2 * VC-1 and WMV3 decoder - DSP functions | |
3 * Copyright (c) 2006 Konstantin Shishkov | |
4 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3665
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3665
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3665
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
3526 | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3665
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
3526 | 11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3665
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
3526 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3665
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3526 | 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 * | |
21 */ | |
22 | |
23 /** | |
24 * @file vc1dsp.c | |
25 * VC-1 and WMV3 decoder | |
26 * | |
27 */ | |
28 | |
29 #include "dsputil.h" | |
30 | |
31 | |
32 /** Apply overlap transform to vertical edge | |
33 */ | |
34 static void vc1_v_overlap_c(uint8_t* src, int stride, int rnd) | |
35 { | |
36 int i; | |
37 int a, b, c, d; | |
38 for(i = 0; i < 8; i++) { | |
39 a = src[-2*stride]; | |
40 b = src[-stride]; | |
41 c = src[0]; | |
42 d = src[stride]; | |
43 | |
44 src[-2*stride] = clip_uint8((7*a + d + 4 - rnd) >> 3); | |
45 src[-stride] = clip_uint8((-a + 7*b + c + d + 3 + rnd) >> 3); | |
46 src[0] = clip_uint8((a + b + 7*c - d + 4 - rnd) >> 3); | |
47 src[stride] = clip_uint8((a + 7*d + 3 + rnd) >> 3); | |
48 src++; | |
49 } | |
50 } | |
51 | |
52 /** Apply overlap transform to horizontal edge | |
53 */ | |
54 static void vc1_h_overlap_c(uint8_t* src, int stride, int rnd) | |
55 { | |
56 int i; | |
57 int a, b, c, d; | |
58 for(i = 0; i < 8; i++) { | |
59 a = src[-2]; | |
60 b = src[-1]; | |
61 c = src[0]; | |
62 d = src[1]; | |
63 | |
64 src[-2] = clip_uint8((7*a + d + 4 - rnd) >> 3); | |
65 src[-1] = clip_uint8((-a + 7*b + c + d + 3 + rnd) >> 3); | |
66 src[0] = clip_uint8((a + b + 7*c - d + 4 - rnd) >> 3); | |
67 src[1] = clip_uint8((a + 7*d + 3 + rnd) >> 3); | |
68 src += stride; | |
69 } | |
70 } | |
71 | |
72 | |
73 /** Do inverse transform on 8x8 block | |
74 */ | |
75 static void vc1_inv_trans_8x8_c(DCTELEM block[64]) | |
76 { | |
77 int i; | |
78 register int t1,t2,t3,t4,t5,t6,t7,t8; | |
79 DCTELEM *src, *dst; | |
80 | |
81 src = block; | |
82 dst = block; | |
83 for(i = 0; i < 8; i++){ | |
84 t1 = 12 * (src[0] + src[4]); | |
85 t2 = 12 * (src[0] - src[4]); | |
86 t3 = 16 * src[2] + 6 * src[6]; | |
87 t4 = 6 * src[2] - 16 * src[6]; | |
88 | |
89 t5 = t1 + t3; | |
90 t6 = t2 + t4; | |
91 t7 = t2 - t4; | |
92 t8 = t1 - t3; | |
93 | |
94 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; | |
95 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; | |
96 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; | |
97 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; | |
98 | |
99 dst[0] = (t5 + t1 + 4) >> 3; | |
100 dst[1] = (t6 + t2 + 4) >> 3; | |
101 dst[2] = (t7 + t3 + 4) >> 3; | |
102 dst[3] = (t8 + t4 + 4) >> 3; | |
103 dst[4] = (t8 - t4 + 4) >> 3; | |
104 dst[5] = (t7 - t3 + 4) >> 3; | |
105 dst[6] = (t6 - t2 + 4) >> 3; | |
106 dst[7] = (t5 - t1 + 4) >> 3; | |
107 | |
108 src += 8; | |
109 dst += 8; | |
110 } | |
111 | |
112 src = block; | |
113 dst = block; | |
114 for(i = 0; i < 8; i++){ | |
115 t1 = 12 * (src[ 0] + src[32]); | |
116 t2 = 12 * (src[ 0] - src[32]); | |
117 t3 = 16 * src[16] + 6 * src[48]; | |
118 t4 = 6 * src[16] - 16 * src[48]; | |
119 | |
120 t5 = t1 + t3; | |
121 t6 = t2 + t4; | |
122 t7 = t2 - t4; | |
123 t8 = t1 - t3; | |
124 | |
125 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; | |
126 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; | |
127 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | |
128 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | |
129 | |
130 dst[ 0] = (t5 + t1 + 64) >> 7; | |
131 dst[ 8] = (t6 + t2 + 64) >> 7; | |
132 dst[16] = (t7 + t3 + 64) >> 7; | |
133 dst[24] = (t8 + t4 + 64) >> 7; | |
134 dst[32] = (t8 - t4 + 64 + 1) >> 7; | |
135 dst[40] = (t7 - t3 + 64 + 1) >> 7; | |
136 dst[48] = (t6 - t2 + 64 + 1) >> 7; | |
137 dst[56] = (t5 - t1 + 64 + 1) >> 7; | |
138 | |
139 src++; | |
140 dst++; | |
141 } | |
142 } | |
143 | |
144 /** Do inverse transform on 8x4 part of block | |
145 */ | |
146 static void vc1_inv_trans_8x4_c(DCTELEM block[64], int n) | |
147 { | |
148 int i; | |
149 register int t1,t2,t3,t4,t5,t6,t7,t8; | |
150 DCTELEM *src, *dst; | |
151 int off; | |
152 | |
153 off = n * 32; | |
154 src = block + off; | |
155 dst = block + off; | |
156 for(i = 0; i < 4; i++){ | |
157 t1 = 12 * (src[0] + src[4]); | |
158 t2 = 12 * (src[0] - src[4]); | |
159 t3 = 16 * src[2] + 6 * src[6]; | |
160 t4 = 6 * src[2] - 16 * src[6]; | |
161 | |
162 t5 = t1 + t3; | |
163 t6 = t2 + t4; | |
164 t7 = t2 - t4; | |
165 t8 = t1 - t3; | |
166 | |
167 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; | |
168 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; | |
169 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; | |
170 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; | |
171 | |
172 dst[0] = (t5 + t1 + 4) >> 3; | |
173 dst[1] = (t6 + t2 + 4) >> 3; | |
174 dst[2] = (t7 + t3 + 4) >> 3; | |
175 dst[3] = (t8 + t4 + 4) >> 3; | |
176 dst[4] = (t8 - t4 + 4) >> 3; | |
177 dst[5] = (t7 - t3 + 4) >> 3; | |
178 dst[6] = (t6 - t2 + 4) >> 3; | |
179 dst[7] = (t5 - t1 + 4) >> 3; | |
180 | |
181 src += 8; | |
182 dst += 8; | |
183 } | |
184 | |
185 src = block + off; | |
186 dst = block + off; | |
187 for(i = 0; i < 8; i++){ | |
188 t1 = 17 * (src[ 0] + src[16]); | |
189 t2 = 17 * (src[ 0] - src[16]); | |
190 t3 = 22 * src[ 8]; | |
191 t4 = 22 * src[24]; | |
192 t5 = 10 * src[ 8]; | |
193 t6 = 10 * src[24]; | |
194 | |
195 dst[ 0] = (t1 + t3 + t6 + 64) >> 7; | |
196 dst[ 8] = (t2 - t4 + t5 + 64) >> 7; | |
197 dst[16] = (t2 + t4 - t5 + 64) >> 7; | |
198 dst[24] = (t1 - t3 - t6 + 64) >> 7; | |
199 | |
200 src ++; | |
201 dst ++; | |
202 } | |
203 } | |
204 | |
205 /** Do inverse transform on 4x8 parts of block | |
206 */ | |
207 static void vc1_inv_trans_4x8_c(DCTELEM block[64], int n) | |
208 { | |
209 int i; | |
210 register int t1,t2,t3,t4,t5,t6,t7,t8; | |
211 DCTELEM *src, *dst; | |
212 int off; | |
213 | |
214 off = n * 4; | |
215 src = block + off; | |
216 dst = block + off; | |
217 for(i = 0; i < 8; i++){ | |
218 t1 = 17 * (src[0] + src[2]); | |
219 t2 = 17 * (src[0] - src[2]); | |
220 t3 = 22 * src[1]; | |
221 t4 = 22 * src[3]; | |
222 t5 = 10 * src[1]; | |
223 t6 = 10 * src[3]; | |
224 | |
225 dst[0] = (t1 + t3 + t6 + 4) >> 3; | |
226 dst[1] = (t2 - t4 + t5 + 4) >> 3; | |
227 dst[2] = (t2 + t4 - t5 + 4) >> 3; | |
228 dst[3] = (t1 - t3 - t6 + 4) >> 3; | |
229 | |
230 src += 8; | |
231 dst += 8; | |
232 } | |
233 | |
234 src = block + off; | |
235 dst = block + off; | |
236 for(i = 0; i < 4; i++){ | |
237 t1 = 12 * (src[ 0] + src[32]); | |
238 t2 = 12 * (src[ 0] - src[32]); | |
239 t3 = 16 * src[16] + 6 * src[48]; | |
240 t4 = 6 * src[16] - 16 * src[48]; | |
241 | |
242 t5 = t1 + t3; | |
243 t6 = t2 + t4; | |
244 t7 = t2 - t4; | |
245 t8 = t1 - t3; | |
246 | |
247 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; | |
248 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; | |
249 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | |
250 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | |
251 | |
252 dst[ 0] = (t5 + t1 + 64) >> 7; | |
253 dst[ 8] = (t6 + t2 + 64) >> 7; | |
254 dst[16] = (t7 + t3 + 64) >> 7; | |
255 dst[24] = (t8 + t4 + 64) >> 7; | |
256 dst[32] = (t8 - t4 + 64 + 1) >> 7; | |
257 dst[40] = (t7 - t3 + 64 + 1) >> 7; | |
258 dst[48] = (t6 - t2 + 64 + 1) >> 7; | |
259 dst[56] = (t5 - t1 + 64 + 1) >> 7; | |
260 | |
261 src++; | |
262 dst++; | |
263 } | |
264 } | |
265 | |
266 /** Do inverse transform on 4x4 part of block | |
267 */ | |
268 static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n) | |
269 { | |
270 int i; | |
271 register int t1,t2,t3,t4,t5,t6; | |
272 DCTELEM *src, *dst; | |
273 int off; | |
274 | |
275 off = (n&1) * 4 + (n&2) * 16; | |
276 src = block + off; | |
277 dst = block + off; | |
278 for(i = 0; i < 4; i++){ | |
279 t1 = 17 * (src[0] + src[2]); | |
280 t2 = 17 * (src[0] - src[2]); | |
281 t3 = 22 * src[1]; | |
282 t4 = 22 * src[3]; | |
283 t5 = 10 * src[1]; | |
284 t6 = 10 * src[3]; | |
285 | |
286 dst[0] = (t1 + t3 + t6 + 4) >> 3; | |
287 dst[1] = (t2 - t4 + t5 + 4) >> 3; | |
288 dst[2] = (t2 + t4 - t5 + 4) >> 3; | |
289 dst[3] = (t1 - t3 - t6 + 4) >> 3; | |
290 | |
291 src += 8; | |
292 dst += 8; | |
293 } | |
294 | |
295 src = block + off; | |
296 dst = block + off; | |
297 for(i = 0; i < 4; i++){ | |
298 t1 = 17 * (src[ 0] + src[16]); | |
299 t2 = 17 * (src[ 0] - src[16]); | |
300 t3 = 22 * src[ 8]; | |
301 t4 = 22 * src[24]; | |
302 t5 = 10 * src[ 8]; | |
303 t6 = 10 * src[24]; | |
304 | |
305 dst[ 0] = (t1 + t3 + t6 + 64) >> 7; | |
306 dst[ 8] = (t2 - t4 + t5 + 64) >> 7; | |
307 dst[16] = (t2 + t4 - t5 + 64) >> 7; | |
308 dst[24] = (t1 - t3 - t6 + 64) >> 7; | |
309 | |
310 src ++; | |
311 dst ++; | |
312 } | |
313 } | |
314 | |
315 /* motion compensation functions */ | |
316 | |
317 /** Filter used to interpolate fractional pel values | |
318 */ | |
3529 | 319 static always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r) |
3526 | 320 { |
321 switch(mode){ | |
322 case 0: //no shift | |
323 return src[0]; | |
324 case 1: // 1/4 shift | |
325 return (-4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2] + 32 - r) >> 6; | |
326 case 2: // 1/2 shift | |
327 return (-src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2] + 8 - r) >> 4; | |
328 case 3: // 3/4 shift | |
329 return (-3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2] + 32 - r) >> 6; | |
330 } | |
331 return 0; //should not occur | |
332 } | |
333 | |
334 /** Function used to do motion compensation with bicubic interpolation | |
335 */ | |
336 static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int mode, int rnd) | |
337 { | |
338 int i, j; | |
339 uint8_t tmp[8*11], *tptr; | |
340 int m, r; | |
341 | |
342 m = (mode & 3); | |
343 r = rnd; | |
344 src -= stride; | |
345 tptr = tmp; | |
346 for(j = 0; j < 11; j++) { | |
347 for(i = 0; i < 8; i++) | |
3529 | 348 tptr[i] = clip_uint8(vc1_mspel_filter(src + i, 1, m, r)); |
3526 | 349 src += stride; |
350 tptr += 8; | |
351 } | |
352 r = 1 - rnd; | |
353 m = (mode >> 2) & 3; | |
354 | |
355 tptr = tmp + 8; | |
356 for(j = 0; j < 8; j++) { | |
357 for(i = 0; i < 8; i++) | |
3529 | 358 dst[i] = clip_uint8(vc1_mspel_filter(tptr + i, 8, m, r)); |
3526 | 359 dst += stride; |
360 tptr += 8; | |
361 } | |
362 } | |
363 | |
364 /* pixel functions - really are entry points to vc1_mspel_mc */ | |
365 | |
366 /* this one is defined in dsputil.c */ | |
367 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd); | |
368 | |
369 static void ff_put_vc1_mspel_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
370 vc1_mspel_mc(dst, src, stride, 0x1, rnd); | |
371 } | |
372 | |
373 static void ff_put_vc1_mspel_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
374 vc1_mspel_mc(dst, src, stride, 0x2, rnd); | |
375 } | |
376 | |
377 static void ff_put_vc1_mspel_mc30_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
378 vc1_mspel_mc(dst, src, stride, 0x3, rnd); | |
379 } | |
380 | |
381 static void ff_put_vc1_mspel_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
382 vc1_mspel_mc(dst, src, stride, 0x4, rnd); | |
383 } | |
384 | |
385 static void ff_put_vc1_mspel_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
386 vc1_mspel_mc(dst, src, stride, 0x5, rnd); | |
387 } | |
388 | |
389 static void ff_put_vc1_mspel_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
390 vc1_mspel_mc(dst, src, stride, 0x6, rnd); | |
391 } | |
392 | |
393 static void ff_put_vc1_mspel_mc31_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
394 vc1_mspel_mc(dst, src, stride, 0x7, rnd); | |
395 } | |
396 | |
397 static void ff_put_vc1_mspel_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
398 vc1_mspel_mc(dst, src, stride, 0x8, rnd); | |
399 } | |
400 | |
401 static void ff_put_vc1_mspel_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
402 vc1_mspel_mc(dst, src, stride, 0x9, rnd); | |
403 } | |
404 | |
405 static void ff_put_vc1_mspel_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
406 vc1_mspel_mc(dst, src, stride, 0xA, rnd); | |
407 } | |
408 | |
409 static void ff_put_vc1_mspel_mc32_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
410 vc1_mspel_mc(dst, src, stride, 0xB, rnd); | |
411 } | |
412 | |
413 static void ff_put_vc1_mspel_mc03_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
414 vc1_mspel_mc(dst, src, stride, 0xC, rnd); | |
415 } | |
416 | |
417 static void ff_put_vc1_mspel_mc13_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
418 vc1_mspel_mc(dst, src, stride, 0xD, rnd); | |
419 } | |
420 | |
421 static void ff_put_vc1_mspel_mc23_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
422 vc1_mspel_mc(dst, src, stride, 0xE, rnd); | |
423 } | |
424 | |
425 static void ff_put_vc1_mspel_mc33_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | |
426 vc1_mspel_mc(dst, src, stride, 0xF, rnd); | |
427 } | |
428 | |
429 void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) { | |
430 dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c; | |
431 dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c; | |
432 dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c; | |
433 dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c; | |
434 dsp->vc1_h_overlap = vc1_h_overlap_c; | |
435 dsp->vc1_v_overlap = vc1_v_overlap_c; | |
436 | |
437 dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_c; | |
438 dsp->put_vc1_mspel_pixels_tab[ 1] = ff_put_vc1_mspel_mc10_c; | |
439 dsp->put_vc1_mspel_pixels_tab[ 2] = ff_put_vc1_mspel_mc20_c; | |
440 dsp->put_vc1_mspel_pixels_tab[ 3] = ff_put_vc1_mspel_mc30_c; | |
441 dsp->put_vc1_mspel_pixels_tab[ 4] = ff_put_vc1_mspel_mc01_c; | |
442 dsp->put_vc1_mspel_pixels_tab[ 5] = ff_put_vc1_mspel_mc11_c; | |
443 dsp->put_vc1_mspel_pixels_tab[ 6] = ff_put_vc1_mspel_mc21_c; | |
444 dsp->put_vc1_mspel_pixels_tab[ 7] = ff_put_vc1_mspel_mc31_c; | |
445 dsp->put_vc1_mspel_pixels_tab[ 8] = ff_put_vc1_mspel_mc02_c; | |
446 dsp->put_vc1_mspel_pixels_tab[ 9] = ff_put_vc1_mspel_mc12_c; | |
447 dsp->put_vc1_mspel_pixels_tab[10] = ff_put_vc1_mspel_mc22_c; | |
448 dsp->put_vc1_mspel_pixels_tab[11] = ff_put_vc1_mspel_mc32_c; | |
449 dsp->put_vc1_mspel_pixels_tab[12] = ff_put_vc1_mspel_mc03_c; | |
450 dsp->put_vc1_mspel_pixels_tab[13] = ff_put_vc1_mspel_mc13_c; | |
451 dsp->put_vc1_mspel_pixels_tab[14] = ff_put_vc1_mspel_mc23_c; | |
452 dsp->put_vc1_mspel_pixels_tab[15] = ff_put_vc1_mspel_mc33_c; | |
453 } |