Mercurial > mplayer.hg
comparison libvo/jpeg_enc.c @ 4347:d036a9992baf
fast 4:2:2 jpeg encoder, based on libavcodec. - patch by Rik Snel <rsnel@cube.dyndns.org>
author | arpi |
---|---|
date | Sat, 26 Jan 2002 00:52:59 +0000 |
parents | |
children | 49f2bc22f880 |
comparison
equal
deleted
inserted
replaced
4346:d45744794581 | 4347:d036a9992baf |
---|---|
1 /* Straightforward (to be) optimized JPEG encoder for the YUV422 format | |
2 * based on mjpeg code from ffmpeg. | |
3 * | |
4 * Copyright (c) 2002, Rik Snel | |
5 * Parts from ffmpeg Copyright (c) 2000, 2001 Gerard Lantau | |
6 * | |
7 * This program is free software; you can redistribute it and/or modify | |
8 * it under the terms of the GNU General Public License as published by | |
9 * the Free Software Foundation; either version 2 of the License, or | |
10 * (at your option) any later version. | |
11 * | |
12 * This program is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 * GNU General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU General Public License | |
18 * along with this program; if not, write to the Free Software | |
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
20 * | |
21 * For an excellent introduction to the JPEG format, see: | |
22 * http://www.ece.purdue.edu/~bourman/grad-labs/lab8/pdf/lab.pdf | |
23 */ | |
24 | |
25 | |
26 /* stuff from libavcodec/common.h */ | |
27 | |
28 #include <sys/types.h> | |
29 #include <stdio.h> | |
30 #include <stdlib.h> | |
31 #include "config.h" | |
32 #ifdef USE_FASTMEMCPY | |
33 #include "fastmemcpy.h" | |
34 #endif | |
35 #include "../mp_msg.h" | |
36 #include "../libavcodec/common.h" | |
37 #include "../libavcodec/dsputil.h" | |
38 | |
39 | |
40 static int height, width, fields, cheap_upsample, qscale, bw = 0, first = 1; | |
41 | |
42 /* from dsputils.c */ | |
43 | |
44 static DCTELEM **blck; | |
45 | |
46 extern void (*av_fdct)(DCTELEM *b); | |
47 | |
48 static UINT8 zr_zigzag_direct[64] = { | |
49 0, 1, 8, 16, 9, 2, 3, 10, | |
50 17, 24, 32, 25, 18, 11, 4, 5, | |
51 12, 19, 26, 33, 40, 48, 41, 34, | |
52 27, 20, 13, 6, 7, 14, 21, 28, | |
53 35, 42, 49, 56, 57, 50, 43, 36, | |
54 29, 22, 15, 23, 30, 37, 44, 51, | |
55 58, 59, 52, 45, 38, 31, 39, 46, | |
56 53, 60, 61, 54, 47, 55, 62, 63 | |
57 }; | |
58 | |
59 /* bit output */ | |
60 | |
61 static PutBitContext pb; | |
62 | |
63 /* from mpegvideo.c */ | |
64 | |
65 #define QMAT_SHIFT 25 | |
66 #define QMAT_SHIFT_MMX 19 | |
67 | |
68 static const unsigned short aanscales[64] = { | |
69 /* precomputed values scaled up by 14 bits */ | |
70 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, | |
71 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, | |
72 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, | |
73 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, | |
74 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, | |
75 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, | |
76 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, | |
77 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 | |
78 }; | |
79 | |
80 | |
81 static unsigned int simple_mmx_permutation[64]={ | |
82 0x00, 0x08, 0x01, 0x09, 0x04, 0x0C, 0x05, 0x0D, | |
83 0x10, 0x18, 0x11, 0x19, 0x14, 0x1C, 0x15, 0x1D, | |
84 0x02, 0x0A, 0x03, 0x0B, 0x06, 0x0E, 0x07, 0x0F, | |
85 0x12, 0x1A, 0x13, 0x1B, 0x16, 0x1E, 0x17, 0x1F, | |
86 0x20, 0x28, 0x21, 0x29, 0x24, 0x2C, 0x25, 0x2D, | |
87 0x30, 0x38, 0x31, 0x39, 0x34, 0x3C, 0x35, 0x3D, | |
88 0x22, 0x2A, 0x23, 0x2B, 0x26, 0x2E, 0x27, 0x2F, | |
89 0x32, 0x3A, 0x33, 0x3B, 0x36, 0x3E, 0x37, 0x3F, | |
90 }; | |
91 | |
92 #if 0 | |
93 void block_permute(short int *block) | |
94 { | |
95 int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; | |
96 int i; | |
97 | |
98 for(i=0;i<8;i++) { | |
99 tmp1 = block[1]; | |
100 tmp2 = block[2]; | |
101 tmp3 = block[3]; | |
102 tmp4 = block[4]; | |
103 tmp5 = block[5]; | |
104 tmp6 = block[6]; | |
105 block[1] = tmp2; | |
106 block[2] = tmp4; | |
107 block[3] = tmp6; | |
108 block[4] = tmp1; | |
109 block[5] = tmp3; | |
110 block[6] = tmp5; | |
111 block += 8; | |
112 } | |
113 } | |
114 #endif | |
115 | |
116 static int q_intra_matrix[64]; | |
117 | |
118 static int dct_quantize(DCTELEM *block, int n, | |
119 int qscale) | |
120 { | |
121 int i, j, level, last_non_zero, q; | |
122 const int *qmat; | |
123 | |
124 av_fdct (block); | |
125 | |
126 /* we need this permutation so that we correct the IDCT | |
127 permutation. will be moved into DCT code */ | |
128 //block_permute(block); | |
129 | |
130 /*if (n < 4) | |
131 q = s->y_dc_scale; | |
132 else | |
133 q = s->c_dc_scale; | |
134 q = q << 3;*/ | |
135 q = 64; | |
136 /* note: block[0] is assumed to be positive */ | |
137 block[0] = (block[0] + (q >> 1)) / q; | |
138 i = 1; | |
139 last_non_zero = 0; | |
140 | |
141 qmat = q_intra_matrix; | |
142 for(;i<64;i++) { | |
143 j = zr_zigzag_direct[i]; | |
144 level = block[j]; | |
145 level = level * qmat[j]; | |
146 /* XXX: slight error for the low range. Test should be equivalent to | |
147 (level <= -(1 << (QMAT_SHIFT - 3)) || level >= (1 << | |
148 (QMAT_SHIFT - 3))) | |
149 */ | |
150 if (((level << (31 - (QMAT_SHIFT - 3))) >> (31 - (QMAT_SHIFT - 3))) != | |
151 level) { | |
152 level = level / (1 << (QMAT_SHIFT - 3)); | |
153 /* XXX: currently, this code is not optimal. the range should be: | |
154 mpeg1: -255..255 | |
155 mpeg2: -2048..2047 | |
156 h263: -128..127 | |
157 mpeg4: -2048..2047 | |
158 */ | |
159 if (level > 255) | |
160 level = 255; | |
161 else if (level < -255) | |
162 level = -255; | |
163 block[j] = level; | |
164 last_non_zero = i; | |
165 } else { | |
166 block[j] = 0; | |
167 } | |
168 | |
169 } | |
170 return last_non_zero; | |
171 } | |
172 | |
173 static int dct_quantize_mmx(DCTELEM *block, int n, int qscale) | |
174 { | |
175 int i, j, level, last_non_zero, q; | |
176 const int *qmat; | |
177 DCTELEM *b = block; | |
178 | |
179 /*for (i = 0; i < 8; i++) { | |
180 printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2], | |
181 b[8*i+3], b[8*i+4], b[8*i+5], b[8*i+6], b[8*i+7]); | |
182 }*/ | |
183 av_fdct (block); | |
184 /*for (i = 0; i < 8; i++) { | |
185 printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2], | |
186 b[8*i+3], b[8*i+4], b[8*i+5], b[8*i+6], b[8*i+7]); | |
187 }*/ | |
188 | |
189 | |
190 /* we need this permutation so that we correct the IDCT | |
191 permutation. will be moved into DCT code */ | |
192 //block_permute(block); | |
193 | |
194 //if (n < 2) | |
195 q = 8; | |
196 /*else | |
197 q = 8;*/ | |
198 | |
199 /* note: block[0] is assumed to be positive */ | |
200 block[0] = (block[0] + (q >> 1)) / q; | |
201 i = 1; | |
202 last_non_zero = 0; | |
203 qmat = q_intra_matrix; | |
204 | |
205 for(;i<64;i++) { | |
206 j = zr_zigzag_direct[i]; | |
207 level = block[j]; | |
208 level = level * qmat[j]; | |
209 /* XXX: slight error for the low range. Test should be equivalent to | |
210 (level <= -(1 << (QMAT_SHIFT_MMX - 3)) || level >= (1 << | |
211 (QMAT_SHIFT_MMX - 3))) | |
212 */ | |
213 if (((level << (31 - (QMAT_SHIFT_MMX - 3))) >> (31 - (QMAT_SHIFT_MMX - 3))) != | |
214 level) { | |
215 level = level / (1 << (QMAT_SHIFT_MMX - 3)); | |
216 /* XXX: currently, this code is not optimal. the range should be: | |
217 mpeg1: -255..255 | |
218 mpeg2: -2048..2047 | |
219 h263: -128..127 | |
220 mpeg4: -2048..2047 | |
221 * jpeg: -1024..1023 11 bit */ | |
222 if (level > 1023) | |
223 level = 1023; | |
224 else if (level < -1024) | |
225 level = -1024; | |
226 block[j] = level; | |
227 last_non_zero = i; | |
228 } else { | |
229 block[j] = 0; | |
230 } | |
231 } | |
232 /*for (i = 0; i < 8; i++) { | |
233 printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2], | |
234 b[8*i+3], b[8*i+4], b[8*i+5], b[8*i+6], b[8*i+7]); | |
235 }*/ | |
236 | |
237 return last_non_zero; | |
238 } | |
239 | |
240 static void convert_matrix(int *qmat, const unsigned short *quant_matrix, | |
241 int qscale) | |
242 { | |
243 int i; | |
244 | |
245 if (av_fdct == jpeg_fdct_ifast) { | |
246 for(i=0;i<64;i++) { | |
247 /* 16 <= qscale * quant_matrix[i] <= 7905 */ | |
248 /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ | |
249 | |
250 qmat[i] = (int)(((unsigned long long)1 << (QMAT_SHIFT + 11)) / | |
251 (aanscales[i] * qscale * quant_matrix[i])); | |
252 } | |
253 } else { | |
254 for(i=0;i<64;i++) { | |
255 /* We can safely suppose that 16 <= quant_matrix[i] <= 255 | |
256 So 16 <= qscale * quant_matrix[i] <= 7905 | |
257 so (1 << QMAT_SHIFT) / 16 >= qmat[i] >= (1 << QMAT_SHIFT) / 7905 | |
258 */ | |
259 qmat[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); | |
260 } | |
261 } | |
262 } | |
263 | |
264 #define SOF0 0xC0 | |
265 #define SOI 0xD8 | |
266 #define EOI 0xD9 | |
267 #define DQT 0xDB | |
268 #define DHT 0xC4 | |
269 #define SOS 0xDA | |
270 | |
271 /* this is almost the quantisation table, used for luminance and chrominance */ | |
272 /*short int zr_default_intra_matrix[64] = { | |
273 16, 11, 10, 16, 24, 40, 51, 61, | |
274 12, 12, 14, 19, 26, 58, 60, 55, | |
275 14, 13, 16, 24, 40, 57, 69, 56, | |
276 14, 17, 22, 29, 51, 87, 80, 62, | |
277 18, 22, 37, 56, 68, 109, 103, 77, | |
278 24, 35, 55, 64, 81, 104, 113, 92, | |
279 49, 64, 78, 87, 103, 121, 120, 101, | |
280 72, 92, 95, 98, 112, 100, 103, 99 | |
281 };*/ | |
282 /* | |
283 short int default_intra_matrix[64] = { | |
284 8, 16, 19, 22, 26, 27, 29, 34, | |
285 16, 16, 22, 24, 27, 29, 34, 37, | |
286 19, 22, 26, 27, 29, 34, 34, 38, | |
287 22, 22, 26, 27, 29, 34, 37, 40, | |
288 22, 26, 27, 29, 32, 35, 40, 48, | |
289 26, 27, 29, 32, 35, 40, 48, 58, | |
290 26, 27, 29, 34, 38, 46, 56, 69, | |
291 27, 29, 35, 38, 46, 56, 69, 83 | |
292 }; | |
293 */ | |
294 extern short int default_intra_matrix[64]; | |
295 | |
296 static short int intra_matrix[64]; | |
297 | |
298 /* Set up the standard Huffman tables (cf. JPEG standard section K.3) */ | |
299 /* IMPORTANT: these are only valid for 8-bit data precision! */ | |
300 static const unsigned char bits_dc_luminance[17] = | |
301 { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; | |
302 static const unsigned char val_dc_luminance[] = | |
303 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; | |
304 | |
305 #if 0 | |
306 static const unsigned char bits_dc_chrominance[17] = | |
307 { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; | |
308 static const unsigned char val_dc_chrominance[] = | |
309 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; | |
310 #endif | |
311 | |
312 static const unsigned char bits_ac_luminance[17] = | |
313 { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d }; | |
314 static const unsigned char val_ac_luminance[] = | |
315 { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, | |
316 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, | |
317 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, | |
318 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, | |
319 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, | |
320 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, | |
321 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, | |
322 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, | |
323 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, | |
324 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, | |
325 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, | |
326 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, | |
327 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, | |
328 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, | |
329 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, | |
330 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, | |
331 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, | |
332 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, | |
333 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, | |
334 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, | |
335 0xf9, 0xfa | |
336 }; | |
337 | |
338 #if 0 | |
339 static const unsigned char bits_ac_chrominance[17] = | |
340 { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 }; | |
341 | |
342 static const unsigned char val_ac_chrominance[] = | |
343 { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, | |
344 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, | |
345 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, | |
346 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, | |
347 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, | |
348 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, | |
349 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, | |
350 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, | |
351 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, | |
352 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, | |
353 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, | |
354 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, | |
355 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, | |
356 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, | |
357 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, | |
358 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, | |
359 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, | |
360 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, | |
361 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, | |
362 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, | |
363 0xf9, 0xfa | |
364 }; | |
365 #endif | |
366 | |
367 static unsigned char huff_size_dc_luminance[12]; | |
368 static unsigned short huff_code_dc_luminance[12]; | |
369 #if 0 | |
370 unsigned char huff_size_dc_chrominance[12]; | |
371 unsigned short huff_code_dc_chrominance[12]; | |
372 #endif | |
373 | |
374 static unsigned char huff_size_ac_luminance[256]; | |
375 static unsigned short huff_code_ac_luminance[256]; | |
376 #if 0 | |
377 unsigned char huff_size_ac_chrominance[256]; | |
378 unsigned short huff_code_ac_chrominance[256]; | |
379 #endif | |
380 | |
381 static int last_dc[3]; | |
382 static int block_last_index[4]; | |
383 | |
384 /* isn't this function nicer than the one in the libjpeg ? */ | |
385 static void build_huffman_codes(unsigned char *huff_size, | |
386 unsigned short *huff_code, const unsigned char *bits_table, | |
387 const unsigned char *val_table) | |
388 { | |
389 int i, j, k,nb, code, sym; | |
390 | |
391 code = 0; | |
392 k = 0; | |
393 for(i=1;i<=16;i++) { | |
394 nb = bits_table[i]; | |
395 for(j=0;j<nb;j++) { | |
396 sym = val_table[k++]; | |
397 huff_size[sym] = i; | |
398 huff_code[sym] = code; | |
399 code++; | |
400 } | |
401 code <<= 1; | |
402 } | |
403 } | |
404 | |
405 static int zr_mjpeg_init() | |
406 { | |
407 /* build all the huffman tables */ | |
408 build_huffman_codes(huff_size_dc_luminance, huff_code_dc_luminance, | |
409 bits_dc_luminance, val_dc_luminance); | |
410 //build_huffman_codes(huff_size_dc_chrominance, huff_code_dc_chrominance, | |
411 // bits_dc_chrominance, val_dc_chrominance); | |
412 build_huffman_codes(huff_size_ac_luminance, huff_code_ac_luminance, | |
413 bits_ac_luminance, val_ac_luminance); | |
414 //build_huffman_codes(huff_size_ac_chrominance, huff_code_ac_chrominance, | |
415 // bits_ac_chrominance, val_ac_chrominance); | |
416 | |
417 return 0; | |
418 } | |
419 | |
420 static void zr_mjpeg_close() | |
421 { | |
422 } | |
423 | |
424 static inline void put_marker(PutBitContext *p, int code) | |
425 { | |
426 put_bits(p, 8, 0xff); | |
427 put_bits(p, 8, code); | |
428 } | |
429 | |
430 /* table_class: 0 = DC coef, 1 = AC coefs */ | |
431 static int put_huffman_table(int table_class, int table_id, | |
432 const unsigned char *bits_table, | |
433 const unsigned char *value_table) | |
434 { | |
435 PutBitContext *p = &pb; | |
436 int n, i; | |
437 | |
438 put_bits(p, 4, table_class); | |
439 put_bits(p, 4, table_id); | |
440 | |
441 n = 0; | |
442 for(i=1;i<=16;i++) { | |
443 n += bits_table[i]; | |
444 put_bits(p, 8, bits_table[i]); | |
445 } | |
446 | |
447 for(i=0;i<n;i++) | |
448 put_bits(p, 8, value_table[i]); | |
449 | |
450 return n + 17; | |
451 } | |
452 | |
453 static void jpeg_qtable_header() | |
454 { | |
455 PutBitContext *p = &pb; | |
456 int i, j, size; | |
457 | |
458 /* quant matrixes */ | |
459 put_marker(p, DQT); | |
460 put_bits(p, 16, 2 + 1 * (1 + 64)); | |
461 put_bits(p, 4, 0); /* 8 bit precision */ | |
462 put_bits(p, 4, 0); /* table 0 */ | |
463 for(i=0;i<64;i++) { | |
464 j = zr_zigzag_direct[i]; | |
465 put_bits(p, 8, intra_matrix[j]); | |
466 } | |
467 } | |
468 | |
469 static void jpeg_htable_header() { | |
470 PutBitContext *p = &pb; | |
471 int i, j, size; | |
472 unsigned char *ptr; | |
473 /* huffman table */ | |
474 put_marker(p, DHT); | |
475 flush_put_bits(p); | |
476 ptr = p->buf_ptr; | |
477 put_bits(p, 16, 0); /* patched later */ | |
478 size = 2; | |
479 size += put_huffman_table(0, 0, bits_dc_luminance, val_dc_luminance); | |
480 // size += put_huffman_table(0, 1, bits_dc_chrominance, val_dc_chrominance); | |
481 | |
482 ptr[0] = size >> 8; | |
483 ptr[1] = size; | |
484 put_marker(p, DHT); | |
485 flush_put_bits(p); | |
486 ptr = p->buf_ptr; | |
487 put_bits(p, 16, 0); /* patched later */ | |
488 size = 2; | |
489 size += put_huffman_table(1, 0, bits_ac_luminance, val_ac_luminance); | |
490 // size += put_huffman_table(1, 1, bits_ac_chrominance, val_ac_chrominance); | |
491 ptr[0] = size >> 8; | |
492 ptr[1] = size; | |
493 } | |
494 | |
495 static void zr_mjpeg_picture_header() | |
496 { | |
497 put_marker(&pb, SOI); | |
498 | |
499 if (first) { | |
500 jpeg_qtable_header(); | |
501 jpeg_htable_header(); | |
502 first = 0; | |
503 } | |
504 put_marker(&pb, SOF0); | |
505 | |
506 put_bits(&pb, 16, 17); | |
507 put_bits(&pb, 8, 8); /* 8 bits/component */ | |
508 put_bits(&pb, 16, height); | |
509 put_bits(&pb, 16, width); | |
510 put_bits(&pb, 8, 3); /* 3 components */ | |
511 | |
512 /* Y component */ | |
513 put_bits(&pb, 8, 0); /* component number */ | |
514 put_bits(&pb, 4, 2); /* H factor */ | |
515 put_bits(&pb, 4, 1); /* V factor */ | |
516 put_bits(&pb, 8, 0); /* select matrix */ | |
517 | |
518 /* Cb component */ | |
519 put_bits(&pb, 8, 1); /* component number */ | |
520 put_bits(&pb, 4, 1); /* H factor */ | |
521 put_bits(&pb, 4, 1); /* V factor */ | |
522 put_bits(&pb, 8, 0); /* select matrix */ | |
523 | |
524 /* Cr component */ | |
525 put_bits(&pb, 8, 2); /* component number */ | |
526 put_bits(&pb, 4, 1); /* H factor */ | |
527 put_bits(&pb, 4, 1); /* V factor */ | |
528 put_bits(&pb, 8, 0); /* select matrix */ | |
529 | |
530 | |
531 /* scan header */ | |
532 put_marker(&pb, SOS); | |
533 put_bits(&pb, 16, 12); /* length */ | |
534 put_bits(&pb, 8, 3); /* 3 components */ | |
535 | |
536 /* Y component */ | |
537 put_bits(&pb, 8, 0); /* index */ | |
538 put_bits(&pb, 4, 0); /* DC huffman table index */ | |
539 put_bits(&pb, 4, 0); /* AC huffman table index */ | |
540 | |
541 /* Cb component */ | |
542 put_bits(&pb, 8, 1); /* index */ | |
543 put_bits(&pb, 4, 0); /* DC huffman table index */ | |
544 put_bits(&pb, 4, 0); /* AC huffman table index */ | |
545 | |
546 /* Cr component */ | |
547 put_bits(&pb, 8, 2); /* index */ | |
548 put_bits(&pb, 4, 0); /* DC huffman table index */ | |
549 put_bits(&pb, 4, 0); /* AC huffman table index */ | |
550 | |
551 put_bits(&pb, 8, 0); /* Ss (not used) */ | |
552 put_bits(&pb, 8, 63); /* Se (not used) */ | |
553 put_bits(&pb, 8, 0); /* (not used) */ | |
554 } | |
555 | |
556 static void zr_flush_buffer(PutBitContext *s) | |
557 { | |
558 int size; | |
559 if (s->write_data) { | |
560 size = s->buf_ptr - s->buf; | |
561 if (size > 0) | |
562 s->write_data(s->opaque, s->buf, size); | |
563 s->buf_ptr = s->buf; | |
564 s->data_out_size += size; | |
565 } | |
566 } | |
567 | |
568 /* pad the end of the output stream with ones */ | |
569 static void zr_jflush_put_bits(PutBitContext *s) | |
570 { | |
571 unsigned int b; | |
572 s->bit_buf |= ~1U >> s->bit_cnt; /* set all the unused bits to one */ | |
573 | |
574 while (s->bit_cnt > 0) { | |
575 b = s->bit_buf >> 24; | |
576 *s->buf_ptr++ = b; | |
577 if (b == 0xff) | |
578 *s->buf_ptr++ = 0; | |
579 s->bit_buf<<=8; | |
580 s->bit_cnt-=8; | |
581 } | |
582 zr_flush_buffer(s); | |
583 s->bit_cnt=0; | |
584 s->bit_buf=0; | |
585 } | |
586 | |
587 static void zr_mjpeg_picture_trailer() | |
588 { | |
589 zr_jflush_put_bits(&pb); | |
590 put_marker(&pb, EOI); | |
591 } | |
592 | |
593 static inline void encode_dc(int val, unsigned char *huff_size, | |
594 unsigned short *huff_code) | |
595 { | |
596 int mant, nbits; | |
597 | |
598 if (val == 0) { | |
599 // printf("dc val=0 "); | |
600 jput_bits(&pb, huff_size[0], huff_code[0]); | |
601 //printf("dc encoding %d %d\n", huff_size[0], huff_code[0]); | |
602 } else { | |
603 mant = val; | |
604 if (val < 0) { | |
605 val = -val; | |
606 mant--; | |
607 } | |
608 | |
609 /* compute the log (XXX: optimize) */ | |
610 nbits = 0; | |
611 while (val != 0) { | |
612 val = val >> 1; | |
613 nbits++; | |
614 } | |
615 /*nbits = av_log2(val);*/ | |
616 | |
617 //printf("dc "); | |
618 jput_bits(&pb, huff_size[nbits], huff_code[nbits]); | |
619 //printf("dc encoding %d %d\n", huff_size[nbits], huff_code[nbits]); | |
620 | |
621 //printf("dc "); | |
622 jput_bits(&pb, nbits, mant & ((1 << nbits) - 1)); | |
623 //printf("dc encoding %d %d\n", huff_size[nbits], huff_code[nbits]); | |
624 } | |
625 } | |
626 | |
627 static void encode_block(DCTELEM *b, int n) | |
628 { | |
629 int mant, nbits, code, i, j; | |
630 int component, dc, run, last_index, val; | |
631 unsigned char *huff_size_ac; | |
632 unsigned short *huff_code_ac; | |
633 | |
634 /* DC coef */ | |
635 component = (n <= 1 ? 0 : n - 2 + 1); | |
636 dc = b[0]; /* overflow is impossible */ | |
637 /*for (i = 0; i < 8; i++) { | |
638 printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2], | |
639 b[8*i+3], b[8*i+4], b[8+i*5], b[8+i*6], b[8+i*7]); | |
640 }*/ | |
641 val = dc - last_dc[component]; | |
642 //if (n < 2) { | |
643 encode_dc(val, huff_size_dc_luminance, huff_code_dc_luminance); | |
644 huff_size_ac = huff_size_ac_luminance; | |
645 huff_code_ac = huff_code_ac_luminance; | |
646 //} else { | |
647 // encode_dc(val, huff_size_dc_chrominance, huff_code_dc_chrominance); | |
648 // huff_size_ac = huff_size_ac_chrominance; | |
649 // huff_code_ac = huff_code_ac_chrominance; | |
650 //} | |
651 last_dc[component] = dc; | |
652 | |
653 /* AC coefs */ | |
654 | |
655 run = 0; | |
656 last_index = block_last_index[n]; | |
657 for(i=1;i<=last_index;i++) { | |
658 j = zr_zigzag_direct[i]; | |
659 val = b[j]; | |
660 if (val == 0) { | |
661 run++; | |
662 } else { | |
663 while (run >= 16) { | |
664 //printf("ac 16 white "); | |
665 jput_bits(&pb, huff_size_ac[0xf0], huff_code_ac[0xf0]); | |
666 run -= 16; | |
667 } | |
668 mant = val; | |
669 if (val < 0) { | |
670 val = -val; | |
671 mant--; | |
672 } | |
673 | |
674 /* compute the log (XXX: optimize) */ | |
675 nbits = 0; | |
676 while (val != 0) { | |
677 val = val >> 1; | |
678 nbits++; | |
679 } | |
680 code = (run << 4) | nbits; | |
681 | |
682 //printf("ac "); | |
683 jput_bits(&pb, huff_size_ac[code], huff_code_ac[code]); | |
684 | |
685 //printf("ac "); | |
686 jput_bits(&pb, nbits, mant & ((1 << nbits) - 1)); | |
687 run = 0; | |
688 } | |
689 } | |
690 | |
691 /* output EOB only if not already 64 values */ | |
692 if (last_index < 63 || run != 0) { | |
693 //printf("ac EOB "); | |
694 jput_bits(&pb, huff_size_ac[0], huff_code_ac[0]); | |
695 } | |
696 } | |
697 | |
698 static void zr_mjpeg_encode_mb(DCTELEM **bla) | |
699 { | |
700 encode_block(*(bla), 0); | |
701 encode_block(*(bla+1), 1); | |
702 if (bw) { | |
703 jput_bits(&pb, 12, 512+128+8+2); /* 2 times code for 'no color' | |
704 * 001010001010 */ | |
705 } else { | |
706 encode_block(*(bla+2), 2); | |
707 encode_block(*(bla+3), 3); | |
708 } | |
709 } | |
710 | |
711 static int mb_width, mb_height, mb_x, mb_y; | |
712 static unsigned char *y_data, *u_data, *v_data; | |
713 static int y_ps, u_ps, v_ps, y_rs, u_rs, v_rs; | |
714 static char code[256*1024]; // 256kb! | |
715 /* this function can take all kinds of YUV colorspaces | |
716 * YV12, YVYU, UYVY. The necesary parameters must be set up by te caller | |
717 * y_ps means "y pixel size", y_rs means "y row size". | |
718 * For YUYV, for example, is u = y + 1, v = y + 3, y_ps = 2, u_ps = 4 | |
719 * v_ps = 4, y_rs = u_rs = v_rs. | |
720 * | |
721 * The data is straightened out at the moment it is put in DCT | |
722 * blocks, there are therefore no spurious memcopies involved */ | |
723 /* Notice that w must be a multiple of 16 and h must be a multiple of | |
724 * fields*8 */ | |
725 /* We produce YUV422 jpegs, the colors must be subsampled horizontally, | |
726 * if the colors are also subsampled vertically, then this function | |
727 * performs cheap upsampling (better solution will be: a DCT that is | |
728 * optimized in the case that every two rows are the same) */ | |
729 /* cu = 0 means 'No cheap upsampling' | |
730 * cu = 1 means 'perform cheap upsampling' */ | |
731 void mjpeg_encoder_init(int w, int h, | |
732 unsigned char* y, int y_psize, int y_rsize, | |
733 unsigned char* u, int u_psize, int u_rsize, | |
734 unsigned char* v, int v_psize, int v_rsize, | |
735 int f, int cu, int q, int b) { | |
736 int i; | |
737 mp_msg(MSGT_VO, MSGL_V, "JPEnc init: %dx%d %p %d %d %p %d %d %p %d %d\n", | |
738 w, h, y, y_psize, y_rsize, | |
739 u, u_psize, u_rsize, | |
740 v, v_psize, v_rsize); | |
741 y_data = y; u_data = u; v_data = v; | |
742 y_ps = y_psize; u_ps = u_psize; v_ps = v_psize; | |
743 y_rs = y_rsize*f; | |
744 u_rs = u_rsize*f; | |
745 v_rs = v_rsize*f; | |
746 width = w; | |
747 height = h/f; | |
748 fields = f; | |
749 qscale = q; | |
750 cheap_upsample = cu; | |
751 mb_width = width/16; | |
752 mb_height = height/8; | |
753 bw = b; | |
754 zr_mjpeg_init(); | |
755 i = 0; | |
756 intra_matrix[0] = default_intra_matrix[0]; | |
757 for (i = 1; i < 64; i++) { | |
758 intra_matrix[i] = (default_intra_matrix[i]*qscale) >> 3; | |
759 } | |
760 if ( | |
761 #ifdef HAVE_MMX | |
762 av_fdct != fdct_mmx && | |
763 #endif | |
764 av_fdct != jpeg_fdct_ifast) { | |
765 /* libavcodec is probably not yet initialized */ | |
766 av_fdct = jpeg_fdct_ifast; | |
767 #ifdef HAVE_MMX | |
768 dsputil_init_mmx(); | |
769 #endif | |
770 } | |
771 convert_matrix(q_intra_matrix, intra_matrix, 8); | |
772 blck = malloc(4*sizeof(DCTELEM*)); | |
773 blck[0] = malloc(64*sizeof(DCTELEM)); | |
774 blck[1] = malloc(64*sizeof(DCTELEM)); | |
775 blck[2] = malloc(64*sizeof(DCTELEM)); | |
776 blck[3] = malloc(64*sizeof(DCTELEM)); | |
777 } | |
778 | |
779 int mjpeg_encode_frame(char *bufr, int field) { | |
780 int i, j, k, l; | |
781 short int *dest; | |
782 unsigned char *source; | |
783 /* initialize the buffer */ | |
784 if (field == 1) { | |
785 y_data += y_rs/2; | |
786 u_data += u_rs/2; | |
787 v_data += v_rs/2; | |
788 } | |
789 init_put_bits(&pb, bufr, 1024*256, NULL, NULL); | |
790 | |
791 zr_mjpeg_picture_header(); | |
792 | |
793 last_dc[0] = 128; last_dc[1] = 128; last_dc[2] = 128; | |
794 mb_x = 0; | |
795 mb_y = 0; | |
796 for (mb_y = 0; mb_y < mb_height; mb_y++) { | |
797 for (mb_x = 0; mb_x < mb_width; mb_x++) { | |
798 //printf("Processing macroblock mb_x=%d, mb_y=%d, mb_width=%d, mb_height=%d, size=%d\n", mb_x, mb_y, mb_width, mb_height, pb.buf_ptr - pb.buf); | |
799 /* fill 2 Y macroblocks and one U and one V */ | |
800 source = mb_y * 8 * y_rs + 16 * y_ps * mb_x + y_data; | |
801 dest = blck[0]; | |
802 for (i = 0; i < 8; i++) { | |
803 for (j = 0; j < 8; j++) { | |
804 dest[j] = source[j*y_ps]; | |
805 } | |
806 dest += 8; | |
807 source += y_rs; | |
808 } | |
809 source = mb_y * 8 * y_rs + (16*mb_x + 8)*y_ps + y_data; | |
810 dest = blck[1]; | |
811 for (i = 0; i < 8; i++) { | |
812 for (j = 0; j < 8; j++) { | |
813 dest[j] = source[j*y_ps]; | |
814 } | |
815 dest += 8; | |
816 source += y_rs; | |
817 } | |
818 if (!bw) { | |
819 if (cheap_upsample) { | |
820 source = mb_y*4*u_rs + 8*mb_x*u_ps + u_data; | |
821 dest = blck[2]; | |
822 for (i = 0; i < 4; i++) { | |
823 for (j = 0; j < 8; j++) { | |
824 dest[j] = source[j*u_ps]; | |
825 dest[j+8] = source[j*u_ps]; | |
826 } | |
827 dest += 16; | |
828 source += u_rs; | |
829 } | |
830 source = mb_y*4*v_rs + 8*mb_x*v_ps + v_data; | |
831 dest = blck[3]; | |
832 for (i = 0; i < 4; i++) { | |
833 for (j = 0; j < 8; j++) { | |
834 dest[j] = source[j*v_ps]; | |
835 dest[j+8] = source[j*v_ps]; | |
836 } | |
837 dest += 16; | |
838 source += u_rs; | |
839 } | |
840 } else { | |
841 source = mb_y*8*u_rs + 8*mb_x*u_ps + u_data; | |
842 dest = blck[2]; | |
843 for (i = 0; i < 8; i++) { | |
844 for (j = 0; j < 8; j++) { | |
845 dest[j] = source[j*u_ps]; | |
846 } | |
847 dest += 8; | |
848 source += u_rs; | |
849 } | |
850 source = mb_y*8*v_rs + 8*mb_x*v_ps + v_data; | |
851 dest = blck[3]; | |
852 for (i = 0; i < 8; i++) { | |
853 for (j = 0; j < 8; j++) { | |
854 dest[j] = source[j*v_ps]; | |
855 } | |
856 dest += 8; | |
857 source += u_rs; | |
858 } | |
859 } | |
860 } | |
861 /* so, **blck is filled now... */ | |
862 | |
863 for(i = 0; i < 2; i++) { | |
864 if (av_fdct == jpeg_fdct_ifast) | |
865 block_last_index[i] = | |
866 dct_quantize(blck[i], | |
867 i, qscale); | |
868 else | |
869 block_last_index[i] = | |
870 dct_quantize_mmx(blck[i], | |
871 i, qscale); | |
872 } | |
873 if (!bw) { | |
874 for(i = 2; i < 4; i++) { | |
875 if (av_fdct == jpeg_fdct_ifast) | |
876 block_last_index[i] = | |
877 dct_quantize(blck[i], | |
878 i, qscale); | |
879 else | |
880 block_last_index[i] = | |
881 dct_quantize_mmx(blck[i], | |
882 i, qscale); | |
883 } | |
884 } | |
885 zr_mjpeg_encode_mb(blck); | |
886 } | |
887 } | |
888 emms_c(); | |
889 zr_mjpeg_picture_trailer(); | |
890 flush_put_bits(&pb); | |
891 zr_mjpeg_close(); | |
892 if (field == 1) { | |
893 y_data -= y_rs/2; | |
894 u_data -= u_rs/2; | |
895 v_data -= v_rs/2; | |
896 } | |
897 return pb.buf_ptr - pb.buf; | |
898 } | |
899 |