Mercurial > libavcodec.hg
annotate 4xm.c @ 3198:6b9f0c4fbdbe libavcodec
First part of a series of speed-enchancing patches.
This one sets up a snow.h and makes snow use the dsputil function pointer
framework to access the three functions that will be implemented in asm
in the other parts of the patchset.
Patch by Robert Edele < yartrebo AH earthlink POIS net>
Original thread:
Subject: [Ffmpeg-devel] [PATCH] Snow mmx+sse2 asm optimizations
Date: Sun, 05 Feb 2006 12:47:14 -0500
author | gpoirier |
---|---|
date | Thu, 16 Mar 2006 19:18:18 +0000 |
parents | 072dbc669253 |
children | 68721b62a528 |
rev | line source |
---|---|
1293 | 1 /* |
2 * 4XM codec | |
3 * Copyright (c) 2003 Michael Niedermayer | |
4 * | |
5 * This library is free software; you can redistribute it and/or | |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
9 * | |
10 * This library is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 * Lesser General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU Lesser General Public | |
16 * License along with this library; if not, write to the Free Software | |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
1293 | 18 */ |
2967 | 19 |
1293 | 20 /** |
21 * @file 4xm.c | |
22 * 4XM codec. | |
23 */ | |
2967 | 24 |
1293 | 25 #include "avcodec.h" |
26 #include "dsputil.h" | |
27 #include "mpegvideo.h" | |
28 | |
29 //#undef NDEBUG | |
30 //#include <assert.h> | |
31 | |
32 #define BLOCK_TYPE_VLC_BITS 5 | |
33 #define ACDC_VLC_BITS 9 | |
34 | |
35 #define CFRAME_BUFFER_COUNT 100 | |
36 | |
37 static const uint8_t block_type_tab[4][8][2]={ | |
38 { //{8,4,2}x{8,4,2} | |
39 { 0,1}, { 2,2}, { 6,3}, {14,4}, {30,5}, {31,5}, { 0,0} | |
40 },{ //{8,4}x1 | |
41 { 0,1}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}, { 0,0} | |
42 },{ //1x{8,4} | |
43 { 0,1}, { 2,2}, { 0,0}, { 6,3}, {14,4}, {15,4}, { 0,0} | |
44 },{ //1x2, 2x1 | |
45 { 0,1}, { 0,0}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4} | |
46 } | |
47 }; | |
48 | |
49 static const uint8_t size2index[4][4]={ | |
50 {-1, 3, 1, 1}, | |
51 { 3, 0, 0, 0}, | |
52 { 2, 0, 0, 0}, | |
53 { 2, 0, 0, 0}, | |
54 }; | |
55 | |
56 static const int8_t mv[256][2]={ | |
57 { 0, 0},{ 0, -1},{ -1, 0},{ 1, 0},{ 0, 1},{ -1, -1},{ 1, -1},{ -1, 1}, | |
58 { 1, 1},{ 0, -2},{ -2, 0},{ 2, 0},{ 0, 2},{ -1, -2},{ 1, -2},{ -2, -1}, | |
59 { 2, -1},{ -2, 1},{ 2, 1},{ -1, 2},{ 1, 2},{ -2, -2},{ 2, -2},{ -2, 2}, | |
60 { 2, 2},{ 0, -3},{ -3, 0},{ 3, 0},{ 0, 3},{ -1, -3},{ 1, -3},{ -3, -1}, | |
61 { 3, -1},{ -3, 1},{ 3, 1},{ -1, 3},{ 1, 3},{ -2, -3},{ 2, -3},{ -3, -2}, | |
62 { 3, -2},{ -3, 2},{ 3, 2},{ -2, 3},{ 2, 3},{ 0, -4},{ -4, 0},{ 4, 0}, | |
63 { 0, 4},{ -1, -4},{ 1, -4},{ -4, -1},{ 4, -1},{ 4, 1},{ -1, 4},{ 1, 4}, | |
64 { -3, -3},{ -3, 3},{ 3, 3},{ -2, -4},{ -4, -2},{ 4, -2},{ -4, 2},{ -2, 4}, | |
65 { 2, 4},{ -3, -4},{ 3, -4},{ 4, -3},{ -5, 0},{ -4, 3},{ -3, 4},{ 3, 4}, | |
66 { -1, -5},{ -5, -1},{ -5, 1},{ -1, 5},{ -2, -5},{ 2, -5},{ 5, -2},{ 5, 2}, | |
67 { -4, -4},{ -4, 4},{ -3, -5},{ -5, -3},{ -5, 3},{ 3, 5},{ -6, 0},{ 0, 6}, | |
68 { -6, -1},{ -6, 1},{ 1, 6},{ 2, -6},{ -6, 2},{ 2, 6},{ -5, -4},{ 5, 4}, | |
69 { 4, 5},{ -6, -3},{ 6, 3},{ -7, 0},{ -1, -7},{ 5, -5},{ -7, 1},{ -1, 7}, | |
70 { 4, -6},{ 6, 4},{ -2, -7},{ -7, 2},{ -3, -7},{ 7, -3},{ 3, 7},{ 6, -5}, | |
71 { 0, -8},{ -1, -8},{ -7, -4},{ -8, 1},{ 4, 7},{ 2, -8},{ -2, 8},{ 6, 6}, | |
72 { -8, 3},{ 5, -7},{ -5, 7},{ 8, -4},{ 0, -9},{ -9, -1},{ 1, 9},{ 7, -6}, | |
73 { -7, 6},{ -5, -8},{ -5, 8},{ -9, 3},{ 9, -4},{ 7, -7},{ 8, -6},{ 6, 8}, | |
74 { 10, 1},{-10, 2},{ 9, -5},{ 10, -3},{ -8, -7},{-10, -4},{ 6, -9},{-11, 0}, | |
75 { 11, 1},{-11, -2},{ -2, 11},{ 7, -9},{ -7, 9},{ 10, 6},{ -4, 11},{ 8, -9}, | |
76 { 8, 9},{ 5, 11},{ 7,-10},{ 12, -3},{ 11, 6},{ -9, -9},{ 8, 10},{ 5, 12}, | |
77 {-11, 7},{ 13, 2},{ 6,-12},{ 10, 9},{-11, 8},{ -7, 12},{ 0, 14},{ 14, -2}, | |
78 { -9, 11},{ -6, 13},{-14, -4},{ -5,-14},{ 5, 14},{-15, -1},{-14, -6},{ 3,-15}, | |
79 { 11,-11},{ -7, 14},{ -5, 15},{ 8,-14},{ 15, 6},{ 3, 16},{ 7,-15},{-16, 5}, | |
80 { 0, 17},{-16, -6},{-10, 14},{-16, 7},{ 12, 13},{-16, 8},{-17, 6},{-18, 3}, | |
81 { -7, 17},{ 15, 11},{ 16, 10},{ 2,-19},{ 3,-19},{-11,-16},{-18, 8},{-19, -6}, | |
82 { 2,-20},{-17,-11},{-10,-18},{ 8, 19},{-21, -1},{-20, 7},{ -4, 21},{ 21, 5}, | |
83 { 15, 16},{ 2,-22},{-10,-20},{-22, 5},{ 20,-11},{ -7,-22},{-12, 20},{ 23, -5}, | |
84 { 13,-20},{ 24, -2},{-15, 19},{-11, 22},{ 16, 19},{ 23,-10},{-18,-18},{ -9,-24}, | |
85 { 24,-10},{ -3, 26},{-23, 13},{-18,-20},{ 17, 21},{ -4, 27},{ 27, 6},{ 1,-28}, | |
86 {-11, 26},{-17,-23},{ 7, 28},{ 11,-27},{ 29, 5},{-23,-19},{-28,-11},{-21, 22}, | |
87 {-30, 7},{-17, 26},{-27, 16},{ 13, 29},{ 19,-26},{ 10,-31},{-14,-30},{ 20,-27}, | |
88 {-29, 18},{-16,-31},{-28,-22},{ 21,-30},{-25, 28},{ 26,-29},{ 25,-32},{-32,-32} | |
89 }; | |
90 | |
91 // this is simply the scaled down elementwise product of the standard jpeg quantizer table and the AAN premul table | |
92 static const uint8_t dequant_table[64]={ | |
93 16, 15, 13, 19, 24, 31, 28, 17, | |
94 17, 23, 25, 31, 36, 63, 45, 21, | |
95 18, 24, 27, 37, 52, 59, 49, 20, | |
96 16, 28, 34, 40, 60, 80, 51, 20, | |
97 18, 31, 48, 66, 68, 86, 56, 21, | |
98 19, 38, 56, 59, 64, 64, 48, 20, | |
99 27, 48, 55, 55, 56, 51, 35, 15, | |
100 20, 35, 34, 32, 31, 22, 15, 8, | |
101 }; | |
102 | |
103 static VLC block_type_vlc[4]; | |
104 | |
105 | |
106 typedef struct CFrameBuffer{ | |
107 int allocated_size; | |
108 int size; | |
109 int id; | |
110 uint8_t *data; | |
111 }CFrameBuffer; | |
112 | |
113 typedef struct FourXContext{ | |
114 AVCodecContext *avctx; | |
115 DSPContext dsp; | |
116 AVFrame current_picture, last_picture; | |
117 GetBitContext pre_gb; ///< ac/dc prefix | |
118 GetBitContext gb; | |
119 uint8_t *bytestream; | |
120 uint16_t *wordstream; | |
121 int mv[256]; | |
122 VLC pre_vlc; | |
123 int last_dc; | |
3089 | 124 DECLARE_ALIGNED_8(DCTELEM, block[6][64]); |
1293 | 125 uint8_t *bitstream_buffer; |
3066
04b924f8f5a5
warning fixes by Luca Abeni, lucabe72 ##@## email ##.## it
diego
parents:
3036
diff
changeset
|
126 unsigned int bitstream_buffer_size; |
1293 | 127 CFrameBuffer cfrm[CFRAME_BUFFER_COUNT]; |
128 } FourXContext; | |
129 | |
130 | |
131 #define FIX_1_082392200 70936 | |
132 #define FIX_1_414213562 92682 | |
133 #define FIX_1_847759065 121095 | |
134 #define FIX_2_613125930 171254 | |
135 | |
136 #define MULTIPLY(var,const) (((var)*(const)) >> 16) | |
137 | |
138 static void idct(DCTELEM block[64]){ | |
139 int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | |
140 int tmp10, tmp11, tmp12, tmp13; | |
141 int z5, z10, z11, z12, z13; | |
142 int i; | |
143 int temp[64]; | |
2967 | 144 |
1293 | 145 for(i=0; i<8; i++){ |
146 tmp10 = block[8*0 + i] + block[8*4 + i]; | |
147 tmp11 = block[8*0 + i] - block[8*4 + i]; | |
148 | |
149 tmp13 = block[8*2 + i] + block[8*6 + i]; | |
150 tmp12 = MULTIPLY(block[8*2 + i] - block[8*6 + i], FIX_1_414213562) - tmp13; | |
151 | |
152 tmp0 = tmp10 + tmp13; | |
153 tmp3 = tmp10 - tmp13; | |
154 tmp1 = tmp11 + tmp12; | |
155 tmp2 = tmp11 - tmp12; | |
2967 | 156 |
1293 | 157 z13 = block[8*5 + i] + block[8*3 + i]; |
158 z10 = block[8*5 + i] - block[8*3 + i]; | |
159 z11 = block[8*1 + i] + block[8*7 + i]; | |
160 z12 = block[8*1 + i] - block[8*7 + i]; | |
161 | |
162 tmp7 = z11 + z13; | |
163 tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); | |
164 | |
165 z5 = MULTIPLY(z10 + z12, FIX_1_847759065); | |
166 tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; | |
167 tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; | |
168 | |
169 tmp6 = tmp12 - tmp7; | |
170 tmp5 = tmp11 - tmp6; | |
171 tmp4 = tmp10 + tmp5; | |
172 | |
173 temp[8*0 + i] = tmp0 + tmp7; | |
174 temp[8*7 + i] = tmp0 - tmp7; | |
175 temp[8*1 + i] = tmp1 + tmp6; | |
176 temp[8*6 + i] = tmp1 - tmp6; | |
177 temp[8*2 + i] = tmp2 + tmp5; | |
178 temp[8*5 + i] = tmp2 - tmp5; | |
179 temp[8*4 + i] = tmp3 + tmp4; | |
180 temp[8*3 + i] = tmp3 - tmp4; | |
181 } | |
2967 | 182 |
1293 | 183 for(i=0; i<8*8; i+=8){ |
184 tmp10 = temp[0 + i] + temp[4 + i]; | |
185 tmp11 = temp[0 + i] - temp[4 + i]; | |
186 | |
187 tmp13 = temp[2 + i] + temp[6 + i]; | |
188 tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13; | |
189 | |
190 tmp0 = tmp10 + tmp13; | |
191 tmp3 = tmp10 - tmp13; | |
192 tmp1 = tmp11 + tmp12; | |
193 tmp2 = tmp11 - tmp12; | |
194 | |
195 z13 = temp[5 + i] + temp[3 + i]; | |
196 z10 = temp[5 + i] - temp[3 + i]; | |
197 z11 = temp[1 + i] + temp[7 + i]; | |
198 z12 = temp[1 + i] - temp[7 + i]; | |
199 | |
200 tmp7 = z11 + z13; | |
201 tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); | |
202 | |
203 z5 = MULTIPLY(z10 + z12, FIX_1_847759065); | |
204 tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; | |
205 tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; | |
206 | |
207 tmp6 = tmp12 - tmp7; | |
208 tmp5 = tmp11 - tmp6; | |
209 tmp4 = tmp10 + tmp5; | |
210 | |
211 block[0 + i] = (tmp0 + tmp7)>>6; | |
212 block[7 + i] = (tmp0 - tmp7)>>6; | |
213 block[1 + i] = (tmp1 + tmp6)>>6; | |
214 block[6 + i] = (tmp1 - tmp6)>>6; | |
215 block[2 + i] = (tmp2 + tmp5)>>6; | |
216 block[5 + i] = (tmp2 - tmp5)>>6; | |
217 block[4 + i] = (tmp3 + tmp4)>>6; | |
218 block[3 + i] = (tmp3 - tmp4)>>6; | |
219 } | |
220 } | |
221 | |
222 static void init_vlcs(FourXContext *f){ | |
223 int i; | |
224 | |
2370
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2028
diff
changeset
|
225 for(i=0; i<4; i++){ |
2967 | 226 init_vlc(&block_type_vlc[i], BLOCK_TYPE_VLC_BITS, 7, |
2370
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2028
diff
changeset
|
227 &block_type_tab[i][0][1], 2, 1, |
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2028
diff
changeset
|
228 &block_type_tab[i][0][0], 2, 1, 1); |
1293 | 229 } |
230 } | |
231 | |
232 static void init_mv(FourXContext *f){ | |
233 int i; | |
234 | |
235 for(i=0; i<256; i++){ | |
236 f->mv[i] = mv[i][0] + mv[i][1]*f->current_picture.linesize[0]/2; | |
237 } | |
238 } | |
239 | |
240 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w, int h, int stride, int scale, int dc){ | |
241 int i; | |
242 dc*= 0x10001; | |
243 | |
244 switch(log2w){ | |
245 case 0: | |
246 for(i=0; i<h; i++){ | |
247 dst[0] = scale*src[0] + dc; | |
248 if(scale) src += stride; | |
249 dst += stride; | |
250 } | |
251 break; | |
252 case 1: | |
253 for(i=0; i<h; i++){ | |
254 ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc; | |
255 if(scale) src += stride; | |
256 dst += stride; | |
257 } | |
258 break; | |
259 case 2: | |
260 for(i=0; i<h; i++){ | |
261 ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc; | |
262 ((uint32_t*)dst)[1] = scale*((uint32_t*)src)[1] + dc; | |
263 if(scale) src += stride; | |
264 dst += stride; | |
265 } | |
266 break; | |
267 case 3: | |
268 for(i=0; i<h; i++){ | |
269 ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc; | |
270 ((uint32_t*)dst)[1] = scale*((uint32_t*)src)[1] + dc; | |
271 ((uint32_t*)dst)[2] = scale*((uint32_t*)src)[2] + dc; | |
272 ((uint32_t*)dst)[3] = scale*((uint32_t*)src)[3] + dc; | |
273 if(scale) src += stride; | |
274 dst += stride; | |
275 } | |
276 break; | |
277 default: assert(0); | |
278 } | |
279 } | |
280 | |
281 static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src, int log2w, int log2h, int stride){ | |
282 const int index= size2index[log2h][log2w]; | |
283 const int h= 1<<log2h; | |
284 int code= get_vlc2(&f->gb, block_type_vlc[index].table, BLOCK_TYPE_VLC_BITS, 1); | |
2967 | 285 |
1293 | 286 assert(code>=0 && code<=6); |
287 | |
288 if(code == 0){ | |
289 src += f->mv[ *f->bytestream++ ]; | |
290 mcdc(dst, src, log2w, h, stride, 1, 0); | |
291 }else if(code == 1){ | |
292 log2h--; | |
293 decode_p_block(f, dst , src , log2w, log2h, stride); | |
294 decode_p_block(f, dst + (stride<<log2h), src + (stride<<log2h), log2w, log2h, stride); | |
295 }else if(code == 2){ | |
296 log2w--; | |
297 decode_p_block(f, dst , src , log2w, log2h, stride); | |
298 decode_p_block(f, dst + (1<<log2w), src + (1<<log2w), log2w, log2h, stride); | |
299 }else if(code == 4){ | |
300 src += f->mv[ *f->bytestream++ ]; | |
301 mcdc(dst, src, log2w, h, stride, 1, le2me_16(*f->wordstream++)); | |
302 }else if(code == 5){ | |
303 mcdc(dst, src, log2w, h, stride, 0, le2me_16(*f->wordstream++)); | |
304 }else if(code == 6){ | |
305 if(log2w){ | |
306 dst[0] = le2me_16(*f->wordstream++); | |
307 dst[1] = le2me_16(*f->wordstream++); | |
308 }else{ | |
309 dst[0 ] = le2me_16(*f->wordstream++); | |
310 dst[stride] = le2me_16(*f->wordstream++); | |
311 } | |
312 } | |
313 } | |
314 | |
315 static int get32(void *p){ | |
316 return le2me_32(*(uint32_t*)p); | |
317 } | |
318 | |
319 static int decode_p_frame(FourXContext *f, uint8_t *buf, int length){ | |
320 int x, y; | |
321 const int width= f->avctx->width; | |
322 const int height= f->avctx->height; | |
323 uint16_t *src= (uint16_t*)f->last_picture.data[0]; | |
324 uint16_t *dst= (uint16_t*)f->current_picture.data[0]; | |
325 const int stride= f->current_picture.linesize[0]>>1; | |
2422 | 326 const unsigned int bitstream_size= get32(buf+8); |
327 const unsigned int bytestream_size= get32(buf+16); | |
328 const unsigned int wordstream_size= get32(buf+12); | |
2967 | 329 |
2422 | 330 if(bitstream_size+ bytestream_size+ wordstream_size + 20 != length |
331 || bitstream_size > (1<<26) | |
332 || bytestream_size > (1<<26) | |
333 || wordstream_size > (1<<26) | |
334 ){ | |
2967 | 335 av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size, |
1293 | 336 bitstream_size+ bytestream_size+ wordstream_size - length); |
2422 | 337 return -1; |
338 } | |
2967 | 339 |
1293 | 340 f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE); |
341 f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (uint32_t*)(buf + 20), bitstream_size/4); | |
342 init_get_bits(&f->gb, f->bitstream_buffer, 8*bitstream_size); | |
343 | |
344 f->wordstream= (uint16_t*)(buf + 20 + bitstream_size); | |
345 f->bytestream= buf + 20 + bitstream_size + wordstream_size; | |
2967 | 346 |
1293 | 347 init_mv(f); |
2967 | 348 |
1293 | 349 for(y=0; y<height; y+=8){ |
350 for(x=0; x<width; x+=8){ | |
351 decode_p_block(f, dst + x, src + x, 3, 3, stride); | |
352 } | |
2967 | 353 src += 8*stride; |
354 dst += 8*stride; | |
1293 | 355 } |
2967 | 356 |
1293 | 357 if(bitstream_size != (get_bits_count(&f->gb)+31)/32*4) |
2967 | 358 av_log(f->avctx, AV_LOG_ERROR, " %d %td %td bytes left\n", |
359 bitstream_size - (get_bits_count(&f->gb)+31)/32*4, | |
1293 | 360 bytestream_size - (f->bytestream - (buf + 20 + bitstream_size + wordstream_size)), |
361 wordstream_size - (((uint8_t*)f->wordstream) - (buf + 20 + bitstream_size)) | |
362 ); | |
2967 | 363 |
1293 | 364 return 0; |
365 } | |
366 | |
367 /** | |
368 * decode block and dequantize. | |
369 * Note this is allmost identical to mjpeg | |
370 */ | |
371 static int decode_i_block(FourXContext *f, DCTELEM *block){ | |
372 int code, i, j, level, val; | |
373 | |
374 /* DC coef */ | |
375 val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3); | |
376 if (val>>4){ | |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
377 av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n"); |
1293 | 378 } |
379 | |
380 if(val) | |
381 val = get_xbits(&f->gb, val); | |
382 | |
383 val = val * dequant_table[0] + f->last_dc; | |
384 f->last_dc = | |
385 block[0] = val; | |
386 /* AC coefs */ | |
387 i = 1; | |
388 for(;;) { | |
389 code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3); | |
2967 | 390 |
1293 | 391 /* EOB */ |
392 if (code == 0) | |
393 break; | |
394 if (code == 0xf0) { | |
395 i += 16; | |
396 } else { | |
397 level = get_xbits(&f->gb, code & 0xf); | |
398 i += code >> 4; | |
399 if (i >= 64) { | |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
400 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i); |
1293 | 401 return 0; |
402 } | |
403 | |
404 j= ff_zigzag_direct[i]; | |
405 block[j] = level * dequant_table[j]; | |
406 i++; | |
407 if (i >= 64) | |
408 break; | |
409 } | |
410 } | |
411 | |
412 return 0; | |
413 } | |
414 | |
415 static inline void idct_put(FourXContext *f, int x, int y){ | |
416 DCTELEM (*block)[64]= f->block; | |
417 int stride= f->current_picture.linesize[0]>>1; | |
418 int i; | |
419 uint16_t *dst = ((uint16_t*)f->current_picture.data[0]) + y * stride + x; | |
2967 | 420 |
1295 | 421 for(i=0; i<4; i++){ |
422 block[i][0] += 0x80*8*8; | |
423 idct(block[i]); | |
424 } | |
1293 | 425 |
1295 | 426 if(!(f->avctx->flags&CODEC_FLAG_GRAY)){ |
427 for(i=4; i<6; i++) idct(block[i]); | |
428 } | |
1293 | 429 |
1514 | 430 /* Note transform is: |
431 y= ( 1b + 4g + 2r)/14 | |
432 cb=( 3b - 2g - 1r)/14 | |
433 cr=(-1b - 4g + 5r)/14 | |
2967 | 434 */ |
1293 | 435 for(y=0; y<8; y++){ |
436 for(x=0; x<8; x++){ | |
437 DCTELEM *temp= block[(x>>2) + 2*(y>>2)] + 2*(x&3) + 2*8*(y&3); //FIXME optimize | |
1295 | 438 int cb= block[4][x + 8*y]; |
439 int cr= block[5][x + 8*y]; | |
1293 | 440 int cg= (cb + cr)>>1; |
441 int y; | |
2967 | 442 |
1295 | 443 cb+=cb; |
2967 | 444 |
1293 | 445 y = temp[0]; |
1295 | 446 dst[0 ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8); |
1293 | 447 y = temp[1]; |
1295 | 448 dst[1 ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8); |
1293 | 449 y = temp[8]; |
1295 | 450 dst[ stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8); |
1293 | 451 y = temp[9]; |
1295 | 452 dst[1+stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8); |
1293 | 453 dst += 2; |
454 } | |
455 dst += 2*stride - 2*8; | |
456 } | |
457 } | |
458 | |
459 static int decode_i_mb(FourXContext *f){ | |
460 int i; | |
2967 | 461 |
1293 | 462 f->dsp.clear_blocks(f->block[0]); |
2967 | 463 |
1293 | 464 for(i=0; i<6; i++){ |
465 if(decode_i_block(f, f->block[i]) < 0) | |
466 return -1; | |
467 } | |
2967 | 468 |
1293 | 469 return 0; |
470 } | |
471 | |
472 static uint8_t *read_huffman_tables(FourXContext *f, uint8_t * const buf){ | |
473 int frequency[512]; | |
474 uint8_t flag[512]; | |
475 int up[512]; | |
476 uint8_t len_tab[257]; | |
477 int bits_tab[257]; | |
478 int start, end; | |
479 uint8_t *ptr= buf; | |
480 int j; | |
2967 | 481 |
1293 | 482 memset(frequency, 0, sizeof(frequency)); |
483 memset(up, -1, sizeof(up)); | |
484 | |
485 start= *ptr++; | |
486 end= *ptr++; | |
487 for(;;){ | |
488 int i; | |
2967 | 489 |
1293 | 490 for(i=start; i<=end; i++){ |
491 frequency[i]= *ptr++; | |
492 // printf("%d %d %d\n", start, end, frequency[i]); | |
493 } | |
494 start= *ptr++; | |
495 if(start==0) break; | |
2967 | 496 |
1293 | 497 end= *ptr++; |
498 } | |
499 frequency[256]=1; | |
500 | |
2967 | 501 while((ptr - buf)&3) ptr++; // 4byte align |
1293 | 502 |
503 // for(j=0; j<16; j++) | |
504 // printf("%2X", ptr[j]); | |
2967 | 505 |
1293 | 506 for(j=257; j<512; j++){ |
1294 | 507 int min_freq[2]= {256*256, 256*256}; |
508 int smallest[2]= {0, 0}; | |
1293 | 509 int i; |
510 for(i=0; i<j; i++){ | |
511 if(frequency[i] == 0) continue; | |
1294 | 512 if(frequency[i] < min_freq[1]){ |
513 if(frequency[i] < min_freq[0]){ | |
514 min_freq[1]= min_freq[0]; smallest[1]= smallest[0]; | |
515 min_freq[0]= frequency[i];smallest[0]= i; | |
516 }else{ | |
517 min_freq[1]= frequency[i];smallest[1]= i; | |
518 } | |
1293 | 519 } |
520 } | |
1294 | 521 if(min_freq[1] == 256*256) break; |
2967 | 522 |
1294 | 523 frequency[j]= min_freq[0] + min_freq[1]; |
1293 | 524 flag[ smallest[0] ]= 0; |
525 flag[ smallest[1] ]= 1; | |
2967 | 526 up[ smallest[0] ]= |
1293 | 527 up[ smallest[1] ]= j; |
528 frequency[ smallest[0] ]= frequency[ smallest[1] ]= 0; | |
529 } | |
530 | |
531 for(j=0; j<257; j++){ | |
532 int node; | |
533 int len=0; | |
534 int bits=0; | |
535 | |
536 for(node= j; up[node] != -1; node= up[node]){ | |
537 bits += flag[node]<<len; | |
538 len++; | |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
539 if(len > 31) av_log(f->avctx, AV_LOG_ERROR, "vlc length overflow\n"); //can this happen at all ? |
1293 | 540 } |
2967 | 541 |
1293 | 542 bits_tab[j]= bits; |
543 len_tab[j]= len; | |
544 } | |
2967 | 545 |
546 init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, | |
1293 | 547 len_tab , 1, 1, |
2370
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2028
diff
changeset
|
548 bits_tab, 4, 4, 0); |
2967 | 549 |
1293 | 550 return ptr; |
551 } | |
552 | |
553 static int decode_i_frame(FourXContext *f, uint8_t *buf, int length){ | |
554 int x, y; | |
555 const int width= f->avctx->width; | |
556 const int height= f->avctx->height; | |
557 uint16_t *dst= (uint16_t*)f->current_picture.data[0]; | |
558 const int stride= f->current_picture.linesize[0]>>1; | |
2422 | 559 const unsigned int bitstream_size= get32(buf); |
1478 | 560 const int token_count __attribute__((unused)) = get32(buf + bitstream_size + 8); |
2422 | 561 unsigned int prestream_size= 4*get32(buf + bitstream_size + 4); |
1293 | 562 uint8_t *prestream= buf + bitstream_size + 12; |
2967 | 563 |
2422 | 564 if(prestream_size + bitstream_size + 12 != length |
565 || bitstream_size > (1<<26) | |
566 || prestream_size > (1<<26)){ | |
2628
511e3afc43e1
Ministry of English Composition, reporting for duty (and the word is "skipped", not "skiped"; "skiped" would rhyme with "hyped")
melanson
parents:
2453
diff
changeset
|
567 av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n", prestream_size, bitstream_size, length); |
2422 | 568 return -1; |
569 } | |
2967 | 570 |
1293 | 571 prestream= read_huffman_tables(f, prestream); |
572 | |
573 init_get_bits(&f->gb, buf + 4, 8*bitstream_size); | |
574 | |
575 prestream_size= length + buf - prestream; | |
1294 | 576 |
1293 | 577 f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, prestream_size + FF_INPUT_BUFFER_PADDING_SIZE); |
578 f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (uint32_t*)prestream, prestream_size/4); | |
579 init_get_bits(&f->pre_gb, f->bitstream_buffer, 8*prestream_size); | |
580 | |
581 f->last_dc= 0*128*8*8; | |
2967 | 582 |
1293 | 583 for(y=0; y<height; y+=16){ |
584 for(x=0; x<width; x+=16){ | |
585 if(decode_i_mb(f) < 0) | |
586 return -1; | |
587 | |
588 idct_put(f, x, y); | |
589 } | |
2967 | 590 dst += 16*stride; |
1293 | 591 } |
592 | |
593 if(get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256) | |
2628
511e3afc43e1
Ministry of English Composition, reporting for duty (and the word is "skipped", not "skiped"; "skiped" would rhyme with "hyped")
melanson
parents:
2453
diff
changeset
|
594 av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n"); |
2967 | 595 |
1293 | 596 return 0; |
597 } | |
598 | |
2967 | 599 static int decode_frame(AVCodecContext *avctx, |
1293 | 600 void *data, int *data_size, |
601 uint8_t *buf, int buf_size) | |
602 { | |
603 FourXContext * const f = avctx->priv_data; | |
604 AVFrame *picture = data; | |
605 AVFrame *p, temp; | |
606 int i, frame_4cc, frame_size; | |
607 | |
608 frame_4cc= get32(buf); | |
609 if(buf_size != get32(buf+4)+8){ | |
2628
511e3afc43e1
Ministry of English Composition, reporting for duty (and the word is "skipped", not "skiped"; "skiped" would rhyme with "hyped")
melanson
parents:
2453
diff
changeset
|
610 av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n", buf_size, get32(buf+4)); |
1293 | 611 } |
612 | |
613 if(frame_4cc == ff_get_fourcc("cfrm")){ | |
614 int free_index=-1; | |
615 const int data_size= buf_size - 20; | |
616 const int id= get32(buf+12); | |
617 const int whole_size= get32(buf+16); | |
618 CFrameBuffer *cfrm; | |
619 | |
620 for(i=0; i<CFRAME_BUFFER_COUNT; i++){ | |
621 if(f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number) | |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
622 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n", f->cfrm[i].id); |
1293 | 623 } |
2967 | 624 |
1293 | 625 for(i=0; i<CFRAME_BUFFER_COUNT; i++){ |
626 if(f->cfrm[i].id == id) break; | |
627 if(f->cfrm[i].size == 0 ) free_index= i; | |
628 } | |
629 | |
630 if(i>=CFRAME_BUFFER_COUNT){ | |
631 i= free_index; | |
632 f->cfrm[i].id= id; | |
633 } | |
634 cfrm= &f->cfrm[i]; | |
2967 | 635 |
1293 | 636 cfrm->data= av_fast_realloc(cfrm->data, &cfrm->allocated_size, cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE); |
2967 | 637 |
1293 | 638 memcpy(cfrm->data + cfrm->size, buf+20, data_size); |
639 cfrm->size += data_size; | |
2967 | 640 |
1293 | 641 if(cfrm->size >= whole_size){ |
642 buf= cfrm->data; | |
643 frame_size= cfrm->size; | |
2967 | 644 |
1293 | 645 if(id != avctx->frame_number){ |
2628
511e3afc43e1
Ministry of English Composition, reporting for duty (and the word is "skipped", not "skiped"; "skiped" would rhyme with "hyped")
melanson
parents:
2453
diff
changeset
|
646 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n", id, avctx->frame_number); |
1293 | 647 } |
2967 | 648 |
1293 | 649 cfrm->size= cfrm->id= 0; |
650 frame_4cc= ff_get_fourcc("pfrm"); | |
651 }else | |
652 return buf_size; | |
653 }else{ | |
654 buf= buf + 12; | |
655 frame_size= buf_size - 12; | |
2967 | 656 } |
1293 | 657 |
658 temp= f->current_picture; | |
659 f->current_picture= f->last_picture; | |
660 f->last_picture= temp; | |
661 | |
662 p= &f->current_picture; | |
663 avctx->coded_frame= p; | |
664 | |
665 avctx->flags |= CODEC_FLAG_EMU_EDGE; // alternatively we would have to use our own buffer management | |
666 | |
667 if(p->data[0]) | |
668 avctx->release_buffer(avctx, p); | |
669 | |
670 p->reference= 1; | |
671 if(avctx->get_buffer(avctx, p) < 0){ | |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
672 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); |
1293 | 673 return -1; |
674 } | |
675 | |
676 if(frame_4cc == ff_get_fourcc("ifrm")){ | |
677 p->pict_type= I_TYPE; | |
678 if(decode_i_frame(f, buf, frame_size) < 0) | |
679 return -1; | |
680 }else if(frame_4cc == ff_get_fourcc("pfrm")){ | |
681 p->pict_type= P_TYPE; | |
682 if(decode_p_frame(f, buf, frame_size) < 0) | |
683 return -1; | |
684 }else if(frame_4cc == ff_get_fourcc("snd_")){ | |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
685 av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n", buf_size); |
1293 | 686 }else{ |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1514
diff
changeset
|
687 av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n", buf_size); |
1293 | 688 } |
689 | |
690 #if 0 | |
691 for(i=0; i<20; i++){ | |
692 printf("%2X %c ", buf[i], clip(buf[i],16,126)); | |
693 } | |
694 #endif | |
695 | |
696 p->key_frame= p->pict_type == I_TYPE; | |
697 | |
698 *picture= *p; | |
699 *data_size = sizeof(AVPicture); | |
700 | |
701 emms_c(); | |
2967 | 702 |
1293 | 703 return buf_size; |
704 } | |
705 | |
706 | |
707 static void common_init(AVCodecContext *avctx){ | |
708 FourXContext * const f = avctx->priv_data; | |
709 | |
710 dsputil_init(&f->dsp, avctx); | |
711 | |
712 f->avctx= avctx; | |
713 } | |
714 | |
715 static int decode_init(AVCodecContext *avctx){ | |
716 FourXContext * const f = avctx->priv_data; | |
2967 | 717 |
1293 | 718 common_init(avctx); |
719 init_vlcs(f); | |
720 | |
721 avctx->pix_fmt= PIX_FMT_RGB565; | |
722 | |
723 return 0; | |
724 } | |
725 | |
726 | |
727 static int decode_end(AVCodecContext *avctx){ | |
728 FourXContext * const f = avctx->priv_data; | |
729 int i; | |
730 | |
731 av_freep(&f->bitstream_buffer); | |
732 f->bitstream_buffer_size=0; | |
733 for(i=0; i<CFRAME_BUFFER_COUNT; i++){ | |
734 av_freep(&f->cfrm[i].data); | |
735 f->cfrm[i].allocated_size= 0; | |
736 } | |
737 free_vlc(&f->pre_vlc); | |
2967 | 738 |
1293 | 739 return 0; |
740 } | |
741 | |
742 AVCodec fourxm_decoder = { | |
743 "4xm", | |
744 CODEC_TYPE_VIDEO, | |
745 CODEC_ID_4XM, | |
746 sizeof(FourXContext), | |
747 decode_init, | |
748 NULL, | |
749 decode_end, | |
750 decode_frame, | |
751 /*CODEC_CAP_DR1,*/ | |
752 }; | |
753 |