comparison libvo/jpeg_enc.c @ 4347:d036a9992baf

fast 4:2:2 jpeg encoder, based on libavcodec. - patch by Rik Snel <rsnel@cube.dyndns.org>
author arpi
date Sat, 26 Jan 2002 00:52:59 +0000
parents
children 49f2bc22f880
comparison
equal deleted inserted replaced
4346:d45744794581 4347:d036a9992baf
1 /* Straightforward (to be) optimized JPEG encoder for the YUV422 format
2 * based on mjpeg code from ffmpeg.
3 *
4 * Copyright (c) 2002, Rik Snel
5 * Parts from ffmpeg Copyright (c) 2000, 2001 Gerard Lantau
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * For an excellent introduction to the JPEG format, see:
22 * http://www.ece.purdue.edu/~bourman/grad-labs/lab8/pdf/lab.pdf
23 */
24
25
26 /* stuff from libavcodec/common.h */
27
28 #include <sys/types.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include "config.h"
32 #ifdef USE_FASTMEMCPY
33 #include "fastmemcpy.h"
34 #endif
35 #include "../mp_msg.h"
36 #include "../libavcodec/common.h"
37 #include "../libavcodec/dsputil.h"
38
39
40 static int height, width, fields, cheap_upsample, qscale, bw = 0, first = 1;
41
42 /* from dsputils.c */
43
44 static DCTELEM **blck;
45
46 extern void (*av_fdct)(DCTELEM *b);
47
48 static UINT8 zr_zigzag_direct[64] = {
49 0, 1, 8, 16, 9, 2, 3, 10,
50 17, 24, 32, 25, 18, 11, 4, 5,
51 12, 19, 26, 33, 40, 48, 41, 34,
52 27, 20, 13, 6, 7, 14, 21, 28,
53 35, 42, 49, 56, 57, 50, 43, 36,
54 29, 22, 15, 23, 30, 37, 44, 51,
55 58, 59, 52, 45, 38, 31, 39, 46,
56 53, 60, 61, 54, 47, 55, 62, 63
57 };
58
59 /* bit output */
60
61 static PutBitContext pb;
62
63 /* from mpegvideo.c */
64
65 #define QMAT_SHIFT 25
66 #define QMAT_SHIFT_MMX 19
67
68 static const unsigned short aanscales[64] = {
69 /* precomputed values scaled up by 14 bits */
70 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
71 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
72 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
73 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
74 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
75 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
76 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
77 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
78 };
79
80
81 static unsigned int simple_mmx_permutation[64]={
82 0x00, 0x08, 0x01, 0x09, 0x04, 0x0C, 0x05, 0x0D,
83 0x10, 0x18, 0x11, 0x19, 0x14, 0x1C, 0x15, 0x1D,
84 0x02, 0x0A, 0x03, 0x0B, 0x06, 0x0E, 0x07, 0x0F,
85 0x12, 0x1A, 0x13, 0x1B, 0x16, 0x1E, 0x17, 0x1F,
86 0x20, 0x28, 0x21, 0x29, 0x24, 0x2C, 0x25, 0x2D,
87 0x30, 0x38, 0x31, 0x39, 0x34, 0x3C, 0x35, 0x3D,
88 0x22, 0x2A, 0x23, 0x2B, 0x26, 0x2E, 0x27, 0x2F,
89 0x32, 0x3A, 0x33, 0x3B, 0x36, 0x3E, 0x37, 0x3F,
90 };
91
92 #if 0
93 void block_permute(short int *block)
94 {
95 int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
96 int i;
97
98 for(i=0;i<8;i++) {
99 tmp1 = block[1];
100 tmp2 = block[2];
101 tmp3 = block[3];
102 tmp4 = block[4];
103 tmp5 = block[5];
104 tmp6 = block[6];
105 block[1] = tmp2;
106 block[2] = tmp4;
107 block[3] = tmp6;
108 block[4] = tmp1;
109 block[5] = tmp3;
110 block[6] = tmp5;
111 block += 8;
112 }
113 }
114 #endif
115
116 static int q_intra_matrix[64];
117
118 static int dct_quantize(DCTELEM *block, int n,
119 int qscale)
120 {
121 int i, j, level, last_non_zero, q;
122 const int *qmat;
123
124 av_fdct (block);
125
126 /* we need this permutation so that we correct the IDCT
127 permutation. will be moved into DCT code */
128 //block_permute(block);
129
130 /*if (n < 4)
131 q = s->y_dc_scale;
132 else
133 q = s->c_dc_scale;
134 q = q << 3;*/
135 q = 64;
136 /* note: block[0] is assumed to be positive */
137 block[0] = (block[0] + (q >> 1)) / q;
138 i = 1;
139 last_non_zero = 0;
140
141 qmat = q_intra_matrix;
142 for(;i<64;i++) {
143 j = zr_zigzag_direct[i];
144 level = block[j];
145 level = level * qmat[j];
146 /* XXX: slight error for the low range. Test should be equivalent to
147 (level <= -(1 << (QMAT_SHIFT - 3)) || level >= (1 <<
148 (QMAT_SHIFT - 3)))
149 */
150 if (((level << (31 - (QMAT_SHIFT - 3))) >> (31 - (QMAT_SHIFT - 3))) !=
151 level) {
152 level = level / (1 << (QMAT_SHIFT - 3));
153 /* XXX: currently, this code is not optimal. the range should be:
154 mpeg1: -255..255
155 mpeg2: -2048..2047
156 h263: -128..127
157 mpeg4: -2048..2047
158 */
159 if (level > 255)
160 level = 255;
161 else if (level < -255)
162 level = -255;
163 block[j] = level;
164 last_non_zero = i;
165 } else {
166 block[j] = 0;
167 }
168
169 }
170 return last_non_zero;
171 }
172
173 static int dct_quantize_mmx(DCTELEM *block, int n, int qscale)
174 {
175 int i, j, level, last_non_zero, q;
176 const int *qmat;
177 DCTELEM *b = block;
178
179 /*for (i = 0; i < 8; i++) {
180 printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2],
181 b[8*i+3], b[8*i+4], b[8*i+5], b[8*i+6], b[8*i+7]);
182 }*/
183 av_fdct (block);
184 /*for (i = 0; i < 8; i++) {
185 printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2],
186 b[8*i+3], b[8*i+4], b[8*i+5], b[8*i+6], b[8*i+7]);
187 }*/
188
189
190 /* we need this permutation so that we correct the IDCT
191 permutation. will be moved into DCT code */
192 //block_permute(block);
193
194 //if (n < 2)
195 q = 8;
196 /*else
197 q = 8;*/
198
199 /* note: block[0] is assumed to be positive */
200 block[0] = (block[0] + (q >> 1)) / q;
201 i = 1;
202 last_non_zero = 0;
203 qmat = q_intra_matrix;
204
205 for(;i<64;i++) {
206 j = zr_zigzag_direct[i];
207 level = block[j];
208 level = level * qmat[j];
209 /* XXX: slight error for the low range. Test should be equivalent to
210 (level <= -(1 << (QMAT_SHIFT_MMX - 3)) || level >= (1 <<
211 (QMAT_SHIFT_MMX - 3)))
212 */
213 if (((level << (31 - (QMAT_SHIFT_MMX - 3))) >> (31 - (QMAT_SHIFT_MMX - 3))) !=
214 level) {
215 level = level / (1 << (QMAT_SHIFT_MMX - 3));
216 /* XXX: currently, this code is not optimal. the range should be:
217 mpeg1: -255..255
218 mpeg2: -2048..2047
219 h263: -128..127
220 mpeg4: -2048..2047
221 * jpeg: -1024..1023 11 bit */
222 if (level > 1023)
223 level = 1023;
224 else if (level < -1024)
225 level = -1024;
226 block[j] = level;
227 last_non_zero = i;
228 } else {
229 block[j] = 0;
230 }
231 }
232 /*for (i = 0; i < 8; i++) {
233 printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2],
234 b[8*i+3], b[8*i+4], b[8*i+5], b[8*i+6], b[8*i+7]);
235 }*/
236
237 return last_non_zero;
238 }
239
240 static void convert_matrix(int *qmat, const unsigned short *quant_matrix,
241 int qscale)
242 {
243 int i;
244
245 if (av_fdct == jpeg_fdct_ifast) {
246 for(i=0;i<64;i++) {
247 /* 16 <= qscale * quant_matrix[i] <= 7905 */
248 /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
249
250 qmat[i] = (int)(((unsigned long long)1 << (QMAT_SHIFT + 11)) /
251 (aanscales[i] * qscale * quant_matrix[i]));
252 }
253 } else {
254 for(i=0;i<64;i++) {
255 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
256 So 16 <= qscale * quant_matrix[i] <= 7905
257 so (1 << QMAT_SHIFT) / 16 >= qmat[i] >= (1 << QMAT_SHIFT) / 7905
258 */
259 qmat[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
260 }
261 }
262 }
263
264 #define SOF0 0xC0
265 #define SOI 0xD8
266 #define EOI 0xD9
267 #define DQT 0xDB
268 #define DHT 0xC4
269 #define SOS 0xDA
270
271 /* this is almost the quantisation table, used for luminance and chrominance */
272 /*short int zr_default_intra_matrix[64] = {
273 16, 11, 10, 16, 24, 40, 51, 61,
274 12, 12, 14, 19, 26, 58, 60, 55,
275 14, 13, 16, 24, 40, 57, 69, 56,
276 14, 17, 22, 29, 51, 87, 80, 62,
277 18, 22, 37, 56, 68, 109, 103, 77,
278 24, 35, 55, 64, 81, 104, 113, 92,
279 49, 64, 78, 87, 103, 121, 120, 101,
280 72, 92, 95, 98, 112, 100, 103, 99
281 };*/
282 /*
283 short int default_intra_matrix[64] = {
284 8, 16, 19, 22, 26, 27, 29, 34,
285 16, 16, 22, 24, 27, 29, 34, 37,
286 19, 22, 26, 27, 29, 34, 34, 38,
287 22, 22, 26, 27, 29, 34, 37, 40,
288 22, 26, 27, 29, 32, 35, 40, 48,
289 26, 27, 29, 32, 35, 40, 48, 58,
290 26, 27, 29, 34, 38, 46, 56, 69,
291 27, 29, 35, 38, 46, 56, 69, 83
292 };
293 */
294 extern short int default_intra_matrix[64];
295
296 static short int intra_matrix[64];
297
298 /* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
299 /* IMPORTANT: these are only valid for 8-bit data precision! */
300 static const unsigned char bits_dc_luminance[17] =
301 { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
302 static const unsigned char val_dc_luminance[] =
303 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
304
305 #if 0
306 static const unsigned char bits_dc_chrominance[17] =
307 { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
308 static const unsigned char val_dc_chrominance[] =
309 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
310 #endif
311
312 static const unsigned char bits_ac_luminance[17] =
313 { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
314 static const unsigned char val_ac_luminance[] =
315 { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
316 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
317 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
318 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
319 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
320 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
321 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
322 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
323 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
324 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
325 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
326 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
327 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
328 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
329 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
330 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
331 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
332 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
333 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
334 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
335 0xf9, 0xfa
336 };
337
338 #if 0
339 static const unsigned char bits_ac_chrominance[17] =
340 { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
341
342 static const unsigned char val_ac_chrominance[] =
343 { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
344 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
345 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
346 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
347 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
348 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
349 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
350 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
351 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
352 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
353 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
354 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
355 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
356 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
357 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
358 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
359 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
360 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
361 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
362 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
363 0xf9, 0xfa
364 };
365 #endif
366
367 static unsigned char huff_size_dc_luminance[12];
368 static unsigned short huff_code_dc_luminance[12];
369 #if 0
370 unsigned char huff_size_dc_chrominance[12];
371 unsigned short huff_code_dc_chrominance[12];
372 #endif
373
374 static unsigned char huff_size_ac_luminance[256];
375 static unsigned short huff_code_ac_luminance[256];
376 #if 0
377 unsigned char huff_size_ac_chrominance[256];
378 unsigned short huff_code_ac_chrominance[256];
379 #endif
380
381 static int last_dc[3];
382 static int block_last_index[4];
383
384 /* isn't this function nicer than the one in the libjpeg ? */
385 static void build_huffman_codes(unsigned char *huff_size,
386 unsigned short *huff_code, const unsigned char *bits_table,
387 const unsigned char *val_table)
388 {
389 int i, j, k,nb, code, sym;
390
391 code = 0;
392 k = 0;
393 for(i=1;i<=16;i++) {
394 nb = bits_table[i];
395 for(j=0;j<nb;j++) {
396 sym = val_table[k++];
397 huff_size[sym] = i;
398 huff_code[sym] = code;
399 code++;
400 }
401 code <<= 1;
402 }
403 }
404
405 static int zr_mjpeg_init()
406 {
407 /* build all the huffman tables */
408 build_huffman_codes(huff_size_dc_luminance, huff_code_dc_luminance,
409 bits_dc_luminance, val_dc_luminance);
410 //build_huffman_codes(huff_size_dc_chrominance, huff_code_dc_chrominance,
411 // bits_dc_chrominance, val_dc_chrominance);
412 build_huffman_codes(huff_size_ac_luminance, huff_code_ac_luminance,
413 bits_ac_luminance, val_ac_luminance);
414 //build_huffman_codes(huff_size_ac_chrominance, huff_code_ac_chrominance,
415 // bits_ac_chrominance, val_ac_chrominance);
416
417 return 0;
418 }
419
420 static void zr_mjpeg_close()
421 {
422 }
423
424 static inline void put_marker(PutBitContext *p, int code)
425 {
426 put_bits(p, 8, 0xff);
427 put_bits(p, 8, code);
428 }
429
430 /* table_class: 0 = DC coef, 1 = AC coefs */
431 static int put_huffman_table(int table_class, int table_id,
432 const unsigned char *bits_table,
433 const unsigned char *value_table)
434 {
435 PutBitContext *p = &pb;
436 int n, i;
437
438 put_bits(p, 4, table_class);
439 put_bits(p, 4, table_id);
440
441 n = 0;
442 for(i=1;i<=16;i++) {
443 n += bits_table[i];
444 put_bits(p, 8, bits_table[i]);
445 }
446
447 for(i=0;i<n;i++)
448 put_bits(p, 8, value_table[i]);
449
450 return n + 17;
451 }
452
453 static void jpeg_qtable_header()
454 {
455 PutBitContext *p = &pb;
456 int i, j, size;
457
458 /* quant matrixes */
459 put_marker(p, DQT);
460 put_bits(p, 16, 2 + 1 * (1 + 64));
461 put_bits(p, 4, 0); /* 8 bit precision */
462 put_bits(p, 4, 0); /* table 0 */
463 for(i=0;i<64;i++) {
464 j = zr_zigzag_direct[i];
465 put_bits(p, 8, intra_matrix[j]);
466 }
467 }
468
469 static void jpeg_htable_header() {
470 PutBitContext *p = &pb;
471 int i, j, size;
472 unsigned char *ptr;
473 /* huffman table */
474 put_marker(p, DHT);
475 flush_put_bits(p);
476 ptr = p->buf_ptr;
477 put_bits(p, 16, 0); /* patched later */
478 size = 2;
479 size += put_huffman_table(0, 0, bits_dc_luminance, val_dc_luminance);
480 // size += put_huffman_table(0, 1, bits_dc_chrominance, val_dc_chrominance);
481
482 ptr[0] = size >> 8;
483 ptr[1] = size;
484 put_marker(p, DHT);
485 flush_put_bits(p);
486 ptr = p->buf_ptr;
487 put_bits(p, 16, 0); /* patched later */
488 size = 2;
489 size += put_huffman_table(1, 0, bits_ac_luminance, val_ac_luminance);
490 // size += put_huffman_table(1, 1, bits_ac_chrominance, val_ac_chrominance);
491 ptr[0] = size >> 8;
492 ptr[1] = size;
493 }
494
495 static void zr_mjpeg_picture_header()
496 {
497 put_marker(&pb, SOI);
498
499 if (first) {
500 jpeg_qtable_header();
501 jpeg_htable_header();
502 first = 0;
503 }
504 put_marker(&pb, SOF0);
505
506 put_bits(&pb, 16, 17);
507 put_bits(&pb, 8, 8); /* 8 bits/component */
508 put_bits(&pb, 16, height);
509 put_bits(&pb, 16, width);
510 put_bits(&pb, 8, 3); /* 3 components */
511
512 /* Y component */
513 put_bits(&pb, 8, 0); /* component number */
514 put_bits(&pb, 4, 2); /* H factor */
515 put_bits(&pb, 4, 1); /* V factor */
516 put_bits(&pb, 8, 0); /* select matrix */
517
518 /* Cb component */
519 put_bits(&pb, 8, 1); /* component number */
520 put_bits(&pb, 4, 1); /* H factor */
521 put_bits(&pb, 4, 1); /* V factor */
522 put_bits(&pb, 8, 0); /* select matrix */
523
524 /* Cr component */
525 put_bits(&pb, 8, 2); /* component number */
526 put_bits(&pb, 4, 1); /* H factor */
527 put_bits(&pb, 4, 1); /* V factor */
528 put_bits(&pb, 8, 0); /* select matrix */
529
530
531 /* scan header */
532 put_marker(&pb, SOS);
533 put_bits(&pb, 16, 12); /* length */
534 put_bits(&pb, 8, 3); /* 3 components */
535
536 /* Y component */
537 put_bits(&pb, 8, 0); /* index */
538 put_bits(&pb, 4, 0); /* DC huffman table index */
539 put_bits(&pb, 4, 0); /* AC huffman table index */
540
541 /* Cb component */
542 put_bits(&pb, 8, 1); /* index */
543 put_bits(&pb, 4, 0); /* DC huffman table index */
544 put_bits(&pb, 4, 0); /* AC huffman table index */
545
546 /* Cr component */
547 put_bits(&pb, 8, 2); /* index */
548 put_bits(&pb, 4, 0); /* DC huffman table index */
549 put_bits(&pb, 4, 0); /* AC huffman table index */
550
551 put_bits(&pb, 8, 0); /* Ss (not used) */
552 put_bits(&pb, 8, 63); /* Se (not used) */
553 put_bits(&pb, 8, 0); /* (not used) */
554 }
555
556 static void zr_flush_buffer(PutBitContext *s)
557 {
558 int size;
559 if (s->write_data) {
560 size = s->buf_ptr - s->buf;
561 if (size > 0)
562 s->write_data(s->opaque, s->buf, size);
563 s->buf_ptr = s->buf;
564 s->data_out_size += size;
565 }
566 }
567
568 /* pad the end of the output stream with ones */
569 static void zr_jflush_put_bits(PutBitContext *s)
570 {
571 unsigned int b;
572 s->bit_buf |= ~1U >> s->bit_cnt; /* set all the unused bits to one */
573
574 while (s->bit_cnt > 0) {
575 b = s->bit_buf >> 24;
576 *s->buf_ptr++ = b;
577 if (b == 0xff)
578 *s->buf_ptr++ = 0;
579 s->bit_buf<<=8;
580 s->bit_cnt-=8;
581 }
582 zr_flush_buffer(s);
583 s->bit_cnt=0;
584 s->bit_buf=0;
585 }
586
587 static void zr_mjpeg_picture_trailer()
588 {
589 zr_jflush_put_bits(&pb);
590 put_marker(&pb, EOI);
591 }
592
593 static inline void encode_dc(int val, unsigned char *huff_size,
594 unsigned short *huff_code)
595 {
596 int mant, nbits;
597
598 if (val == 0) {
599 // printf("dc val=0 ");
600 jput_bits(&pb, huff_size[0], huff_code[0]);
601 //printf("dc encoding %d %d\n", huff_size[0], huff_code[0]);
602 } else {
603 mant = val;
604 if (val < 0) {
605 val = -val;
606 mant--;
607 }
608
609 /* compute the log (XXX: optimize) */
610 nbits = 0;
611 while (val != 0) {
612 val = val >> 1;
613 nbits++;
614 }
615 /*nbits = av_log2(val);*/
616
617 //printf("dc ");
618 jput_bits(&pb, huff_size[nbits], huff_code[nbits]);
619 //printf("dc encoding %d %d\n", huff_size[nbits], huff_code[nbits]);
620
621 //printf("dc ");
622 jput_bits(&pb, nbits, mant & ((1 << nbits) - 1));
623 //printf("dc encoding %d %d\n", huff_size[nbits], huff_code[nbits]);
624 }
625 }
626
627 static void encode_block(DCTELEM *b, int n)
628 {
629 int mant, nbits, code, i, j;
630 int component, dc, run, last_index, val;
631 unsigned char *huff_size_ac;
632 unsigned short *huff_code_ac;
633
634 /* DC coef */
635 component = (n <= 1 ? 0 : n - 2 + 1);
636 dc = b[0]; /* overflow is impossible */
637 /*for (i = 0; i < 8; i++) {
638 printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2],
639 b[8*i+3], b[8*i+4], b[8+i*5], b[8+i*6], b[8+i*7]);
640 }*/
641 val = dc - last_dc[component];
642 //if (n < 2) {
643 encode_dc(val, huff_size_dc_luminance, huff_code_dc_luminance);
644 huff_size_ac = huff_size_ac_luminance;
645 huff_code_ac = huff_code_ac_luminance;
646 //} else {
647 // encode_dc(val, huff_size_dc_chrominance, huff_code_dc_chrominance);
648 // huff_size_ac = huff_size_ac_chrominance;
649 // huff_code_ac = huff_code_ac_chrominance;
650 //}
651 last_dc[component] = dc;
652
653 /* AC coefs */
654
655 run = 0;
656 last_index = block_last_index[n];
657 for(i=1;i<=last_index;i++) {
658 j = zr_zigzag_direct[i];
659 val = b[j];
660 if (val == 0) {
661 run++;
662 } else {
663 while (run >= 16) {
664 //printf("ac 16 white ");
665 jput_bits(&pb, huff_size_ac[0xf0], huff_code_ac[0xf0]);
666 run -= 16;
667 }
668 mant = val;
669 if (val < 0) {
670 val = -val;
671 mant--;
672 }
673
674 /* compute the log (XXX: optimize) */
675 nbits = 0;
676 while (val != 0) {
677 val = val >> 1;
678 nbits++;
679 }
680 code = (run << 4) | nbits;
681
682 //printf("ac ");
683 jput_bits(&pb, huff_size_ac[code], huff_code_ac[code]);
684
685 //printf("ac ");
686 jput_bits(&pb, nbits, mant & ((1 << nbits) - 1));
687 run = 0;
688 }
689 }
690
691 /* output EOB only if not already 64 values */
692 if (last_index < 63 || run != 0) {
693 //printf("ac EOB ");
694 jput_bits(&pb, huff_size_ac[0], huff_code_ac[0]);
695 }
696 }
697
698 static void zr_mjpeg_encode_mb(DCTELEM **bla)
699 {
700 encode_block(*(bla), 0);
701 encode_block(*(bla+1), 1);
702 if (bw) {
703 jput_bits(&pb, 12, 512+128+8+2); /* 2 times code for 'no color'
704 * 001010001010 */
705 } else {
706 encode_block(*(bla+2), 2);
707 encode_block(*(bla+3), 3);
708 }
709 }
710
711 static int mb_width, mb_height, mb_x, mb_y;
712 static unsigned char *y_data, *u_data, *v_data;
713 static int y_ps, u_ps, v_ps, y_rs, u_rs, v_rs;
714 static char code[256*1024]; // 256kb!
715 /* this function can take all kinds of YUV colorspaces
716 * YV12, YVYU, UYVY. The necesary parameters must be set up by te caller
717 * y_ps means "y pixel size", y_rs means "y row size".
718 * For YUYV, for example, is u = y + 1, v = y + 3, y_ps = 2, u_ps = 4
719 * v_ps = 4, y_rs = u_rs = v_rs.
720 *
721 * The data is straightened out at the moment it is put in DCT
722 * blocks, there are therefore no spurious memcopies involved */
723 /* Notice that w must be a multiple of 16 and h must be a multiple of
724 * fields*8 */
725 /* We produce YUV422 jpegs, the colors must be subsampled horizontally,
726 * if the colors are also subsampled vertically, then this function
727 * performs cheap upsampling (better solution will be: a DCT that is
728 * optimized in the case that every two rows are the same) */
729 /* cu = 0 means 'No cheap upsampling'
730 * cu = 1 means 'perform cheap upsampling' */
731 void mjpeg_encoder_init(int w, int h,
732 unsigned char* y, int y_psize, int y_rsize,
733 unsigned char* u, int u_psize, int u_rsize,
734 unsigned char* v, int v_psize, int v_rsize,
735 int f, int cu, int q, int b) {
736 int i;
737 mp_msg(MSGT_VO, MSGL_V, "JPEnc init: %dx%d %p %d %d %p %d %d %p %d %d\n",
738 w, h, y, y_psize, y_rsize,
739 u, u_psize, u_rsize,
740 v, v_psize, v_rsize);
741 y_data = y; u_data = u; v_data = v;
742 y_ps = y_psize; u_ps = u_psize; v_ps = v_psize;
743 y_rs = y_rsize*f;
744 u_rs = u_rsize*f;
745 v_rs = v_rsize*f;
746 width = w;
747 height = h/f;
748 fields = f;
749 qscale = q;
750 cheap_upsample = cu;
751 mb_width = width/16;
752 mb_height = height/8;
753 bw = b;
754 zr_mjpeg_init();
755 i = 0;
756 intra_matrix[0] = default_intra_matrix[0];
757 for (i = 1; i < 64; i++) {
758 intra_matrix[i] = (default_intra_matrix[i]*qscale) >> 3;
759 }
760 if (
761 #ifdef HAVE_MMX
762 av_fdct != fdct_mmx &&
763 #endif
764 av_fdct != jpeg_fdct_ifast) {
765 /* libavcodec is probably not yet initialized */
766 av_fdct = jpeg_fdct_ifast;
767 #ifdef HAVE_MMX
768 dsputil_init_mmx();
769 #endif
770 }
771 convert_matrix(q_intra_matrix, intra_matrix, 8);
772 blck = malloc(4*sizeof(DCTELEM*));
773 blck[0] = malloc(64*sizeof(DCTELEM));
774 blck[1] = malloc(64*sizeof(DCTELEM));
775 blck[2] = malloc(64*sizeof(DCTELEM));
776 blck[3] = malloc(64*sizeof(DCTELEM));
777 }
778
779 int mjpeg_encode_frame(char *bufr, int field) {
780 int i, j, k, l;
781 short int *dest;
782 unsigned char *source;
783 /* initialize the buffer */
784 if (field == 1) {
785 y_data += y_rs/2;
786 u_data += u_rs/2;
787 v_data += v_rs/2;
788 }
789 init_put_bits(&pb, bufr, 1024*256, NULL, NULL);
790
791 zr_mjpeg_picture_header();
792
793 last_dc[0] = 128; last_dc[1] = 128; last_dc[2] = 128;
794 mb_x = 0;
795 mb_y = 0;
796 for (mb_y = 0; mb_y < mb_height; mb_y++) {
797 for (mb_x = 0; mb_x < mb_width; mb_x++) {
798 //printf("Processing macroblock mb_x=%d, mb_y=%d, mb_width=%d, mb_height=%d, size=%d\n", mb_x, mb_y, mb_width, mb_height, pb.buf_ptr - pb.buf);
799 /* fill 2 Y macroblocks and one U and one V */
800 source = mb_y * 8 * y_rs + 16 * y_ps * mb_x + y_data;
801 dest = blck[0];
802 for (i = 0; i < 8; i++) {
803 for (j = 0; j < 8; j++) {
804 dest[j] = source[j*y_ps];
805 }
806 dest += 8;
807 source += y_rs;
808 }
809 source = mb_y * 8 * y_rs + (16*mb_x + 8)*y_ps + y_data;
810 dest = blck[1];
811 for (i = 0; i < 8; i++) {
812 for (j = 0; j < 8; j++) {
813 dest[j] = source[j*y_ps];
814 }
815 dest += 8;
816 source += y_rs;
817 }
818 if (!bw) {
819 if (cheap_upsample) {
820 source = mb_y*4*u_rs + 8*mb_x*u_ps + u_data;
821 dest = blck[2];
822 for (i = 0; i < 4; i++) {
823 for (j = 0; j < 8; j++) {
824 dest[j] = source[j*u_ps];
825 dest[j+8] = source[j*u_ps];
826 }
827 dest += 16;
828 source += u_rs;
829 }
830 source = mb_y*4*v_rs + 8*mb_x*v_ps + v_data;
831 dest = blck[3];
832 for (i = 0; i < 4; i++) {
833 for (j = 0; j < 8; j++) {
834 dest[j] = source[j*v_ps];
835 dest[j+8] = source[j*v_ps];
836 }
837 dest += 16;
838 source += u_rs;
839 }
840 } else {
841 source = mb_y*8*u_rs + 8*mb_x*u_ps + u_data;
842 dest = blck[2];
843 for (i = 0; i < 8; i++) {
844 for (j = 0; j < 8; j++) {
845 dest[j] = source[j*u_ps];
846 }
847 dest += 8;
848 source += u_rs;
849 }
850 source = mb_y*8*v_rs + 8*mb_x*v_ps + v_data;
851 dest = blck[3];
852 for (i = 0; i < 8; i++) {
853 for (j = 0; j < 8; j++) {
854 dest[j] = source[j*v_ps];
855 }
856 dest += 8;
857 source += u_rs;
858 }
859 }
860 }
861 /* so, **blck is filled now... */
862
863 for(i = 0; i < 2; i++) {
864 if (av_fdct == jpeg_fdct_ifast)
865 block_last_index[i] =
866 dct_quantize(blck[i],
867 i, qscale);
868 else
869 block_last_index[i] =
870 dct_quantize_mmx(blck[i],
871 i, qscale);
872 }
873 if (!bw) {
874 for(i = 2; i < 4; i++) {
875 if (av_fdct == jpeg_fdct_ifast)
876 block_last_index[i] =
877 dct_quantize(blck[i],
878 i, qscale);
879 else
880 block_last_index[i] =
881 dct_quantize_mmx(blck[i],
882 i, qscale);
883 }
884 }
885 zr_mjpeg_encode_mb(blck);
886 }
887 }
888 emms_c();
889 zr_mjpeg_picture_trailer();
890 flush_put_bits(&pb);
891 zr_mjpeg_close();
892 if (field == 1) {
893 y_data -= y_rs/2;
894 u_data -= u_rs/2;
895 v_data -= v_rs/2;
896 }
897 return pb.buf_ptr - pb.buf;
898 }
899