Mercurial > libavcodec.hg
annotate roqvideoenc.c @ 7744:7477cbdacb20 libavcodec
Fix lossless jpeg encoder to comply to spec and store full redundant
residuals, Note this does not change RGB32 as we need to check this
against some decoder that supports it.
author | michael |
---|---|
date | Sat, 30 Aug 2008 20:39:12 +0000 |
parents | 29319d07aeb4 |
children | 9a72bea281c3 |
rev | line source |
---|---|
5184 | 1 /* |
2 * RoQ Video Encoder. | |
3 * | |
5219 | 4 * Copyright (C) 2007 Vitor Sessak <vitor1001@gmail.com> |
5184 | 5 * Copyright (C) 2004-2007 Eric Lasota |
6 * Based on RoQ specs (C) 2001 Tim Ferguson | |
7 * | |
8 * This file is part of FFmpeg. | |
9 * | |
10 * FFmpeg is free software; you can redistribute it and/or | |
11 * modify it under the terms of the GNU Lesser General Public | |
12 * License as published by the Free Software Foundation; either | |
13 * version 2.1 of the License, or (at your option) any later version. | |
14 * | |
15 * FFmpeg is distributed in the hope that it will be useful, | |
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 * Lesser General Public License for more details. | |
19 * | |
20 * You should have received a copy of the GNU Lesser General Public | |
21 * License along with FFmpeg; if not, write to the Free Software | |
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
23 */ | |
24 | |
25 /** | |
26 * @file roqvideoenc.c | |
6812
0d01bae8d207
cosmetics: s/Id/id/ in libavcodec where Id refers to id Software.
diego
parents:
6788
diff
changeset
|
27 * id RoQ encoder by Vitor. Based on the Switchblade3 library and the |
5184 | 28 * Switchblade3 FFmpeg glue by Eric Lasota. |
29 */ | |
30 | |
31 /* | |
32 * COSTS: | |
33 * Level 1: | |
34 * SKIP - 2 bits | |
35 * MOTION - 2 + 8 bits | |
36 * CODEBOOK - 2 + 8 bits | |
37 * SUBDIVIDE - 2 + combined subcel cost | |
38 * | |
39 * Level 2: | |
40 * SKIP - 2 bits | |
41 * MOTION - 2 + 8 bits | |
42 * CODEBOOK - 2 + 8 bits | |
43 * SUBDIVIDE - 2 + 4*8 bits | |
44 * | |
45 * Maximum cost: 138 bits per cel | |
46 * | |
47 * Proper evaluation requires LCD fraction comparison, which requires | |
48 * Squared Error (SE) loss * savings increase | |
49 * | |
50 * Maximum savings increase: 136 bits | |
51 * Maximum SE loss without overflow: 31580641 | |
52 * Components in 8x8 supercel: 192 | |
53 * Maximum SE precision per component: 164482 | |
54 * >65025, so no truncation is needed (phew) | |
55 */ | |
56 | |
57 #include <string.h> | |
58 #include <unistd.h> | |
59 | |
60 #include "roqvideo.h" | |
61 #include "bytestream.h" | |
62 #include "elbg.h" | |
63 | |
64 #define CHROMA_BIAS 1 | |
65 | |
66 /** | |
67 * Maximum number of generated 4x4 codebooks. Can't be 256 to workaround a | |
68 * Quake 3 bug. | |
69 */ | |
70 #define MAX_CBS_4x4 255 | |
71 | |
72 #define MAX_CBS_2x2 256 ///< Maximum number of 2x2 codebooks. | |
73 | |
74 /* The cast is useful when multiplying it by INT_MAX */ | |
75 #define ROQ_LAMBDA_SCALE ((uint64_t) FF_LAMBDA_SCALE) | |
76 | |
77 /* Macroblock support functions */ | |
78 static void unpack_roq_cell(roq_cell *cell, uint8_t u[4*3]) | |
79 { | |
80 memcpy(u , cell->y, 4); | |
81 memset(u+4, cell->u, 4); | |
82 memset(u+8, cell->v, 4); | |
83 } | |
84 | |
85 static void unpack_roq_qcell(uint8_t cb2[], roq_qcell *qcell, uint8_t u[4*4*3]) | |
86 { | |
87 int i,cp; | |
88 static const int offsets[4] = {0, 2, 8, 10}; | |
89 | |
90 for (cp=0; cp<3; cp++) | |
91 for (i=0; i<4; i++) { | |
92 u[4*4*cp + offsets[i] ] = cb2[qcell->idx[i]*2*2*3 + 4*cp ]; | |
93 u[4*4*cp + offsets[i]+1] = cb2[qcell->idx[i]*2*2*3 + 4*cp+1]; | |
94 u[4*4*cp + offsets[i]+4] = cb2[qcell->idx[i]*2*2*3 + 4*cp+2]; | |
95 u[4*4*cp + offsets[i]+5] = cb2[qcell->idx[i]*2*2*3 + 4*cp+3]; | |
96 } | |
97 } | |
98 | |
99 | |
100 static void enlarge_roq_mb4(uint8_t base[3*16], uint8_t u[3*64]) | |
101 { | |
102 int x,y,cp; | |
103 | |
104 for(cp=0; cp<3; cp++) | |
105 for(y=0; y<8; y++) | |
106 for(x=0; x<8; x++) | |
107 *u++ = base[(y/2)*4 + (x/2) + 16*cp]; | |
108 } | |
109 | |
110 static inline int square(int x) | |
111 { | |
112 return x*x; | |
113 } | |
114 | |
115 static inline int eval_sse(uint8_t *a, uint8_t *b, int count) | |
116 { | |
117 int diff=0; | |
118 | |
119 while(count--) | |
120 diff += square(*b++ - *a++); | |
121 | |
122 return diff; | |
123 } | |
124 | |
125 // FIXME Could use DSPContext.sse, but it is not so speed critical (used | |
126 // just for motion estimation). | |
127 static int block_sse(uint8_t **buf1, uint8_t **buf2, int x1, int y1, int x2, | |
5198 | 128 int y2, int *stride1, int *stride2, int size) |
5184 | 129 { |
130 int i, k; | |
131 int sse=0; | |
132 | |
133 for (k=0; k<3; k++) { | |
134 int bias = (k ? CHROMA_BIAS : 4); | |
135 for (i=0; i<size; i++) | |
5198 | 136 sse += bias*eval_sse(buf1[k] + (y1+i)*stride1[k] + x1, |
137 buf2[k] + (y2+i)*stride2[k] + x2, size); | |
5184 | 138 } |
139 | |
140 return sse; | |
141 } | |
142 | |
143 static int eval_motion_dist(RoqContext *enc, int x, int y, motion_vect vect, | |
144 int size) | |
145 { | |
146 int mx=vect.d[0]; | |
147 int my=vect.d[1]; | |
148 | |
149 if (mx < -7 || mx > 7) | |
150 return INT_MAX; | |
151 | |
152 if (my < -7 || my > 7) | |
153 return INT_MAX; | |
154 | |
155 mx += x; | |
156 my += y; | |
157 | |
158 if ((unsigned) mx > enc->width-size || (unsigned) my > enc->height-size) | |
159 return INT_MAX; | |
160 | |
161 return block_sse(enc->frame_to_enc->data, enc->last_frame->data, x, y, | |
5198 | 162 mx, my, |
163 enc->frame_to_enc->linesize, enc->last_frame->linesize, | |
164 size); | |
5184 | 165 } |
166 | |
167 /** | |
168 * Returns distortion between two macroblocks | |
169 */ | |
170 static inline int squared_diff_macroblock(uint8_t a[], uint8_t b[], int size) | |
171 { | |
172 int cp, sdiff=0; | |
173 | |
174 for(cp=0;cp<3;cp++) { | |
175 int bias = (cp ? CHROMA_BIAS : 4); | |
176 sdiff += bias*eval_sse(a, b, size*size); | |
177 a += size*size; | |
178 b += size*size; | |
179 } | |
180 | |
181 return sdiff; | |
182 } | |
183 | |
184 typedef struct | |
185 { | |
186 int eval_dist[4]; | |
187 int best_bit_use; | |
188 int best_coding; | |
189 | |
190 int subCels[4]; | |
191 motion_vect motion; | |
192 int cbEntry; | |
193 } subcel_evaluation_t; | |
194 | |
195 typedef struct | |
196 { | |
197 int eval_dist[4]; | |
198 int best_coding; | |
199 | |
200 subcel_evaluation_t subCels[4]; | |
201 | |
202 motion_vect motion; | |
203 int cbEntry; | |
204 | |
205 int sourceX, sourceY; | |
206 } cel_evaluation_t; | |
207 | |
208 typedef struct | |
209 { | |
210 int numCB4; | |
211 int numCB2; | |
212 int usedCB2[MAX_CBS_2x2]; | |
213 int usedCB4[MAX_CBS_4x4]; | |
214 uint8_t unpacked_cb2[MAX_CBS_2x2*2*2*3]; | |
215 uint8_t unpacked_cb4[MAX_CBS_4x4*4*4*3]; | |
216 uint8_t unpacked_cb4_enlarged[MAX_CBS_4x4*8*8*3]; | |
217 } roq_codebooks_t; | |
218 | |
219 /** | |
220 * Temporary vars | |
221 */ | |
222 typedef struct | |
223 { | |
224 cel_evaluation_t *cel_evals; | |
225 | |
226 int f2i4[MAX_CBS_4x4]; | |
227 int i2f4[MAX_CBS_4x4]; | |
228 int f2i2[MAX_CBS_2x2]; | |
229 int i2f2[MAX_CBS_2x2]; | |
230 | |
231 int mainChunkSize; | |
232 | |
233 int numCB4; | |
234 int numCB2; | |
235 | |
236 roq_codebooks_t codebooks; | |
237 | |
238 int *closest_cb2; | |
239 int used_option[4]; | |
240 } roq_tempdata_t; | |
241 | |
242 /** | |
243 * Initializes cel evaluators and sets their source coordinates | |
244 */ | |
245 static void create_cel_evals(RoqContext *enc, roq_tempdata_t *tempData) | |
246 { | |
247 int n=0, x, y, i; | |
248 | |
249 tempData->cel_evals = av_malloc(enc->width*enc->height/64 * sizeof(cel_evaluation_t)); | |
250 | |
251 /* Map to the ROQ quadtree order */ | |
252 for (y=0; y<enc->height; y+=16) | |
253 for (x=0; x<enc->width; x+=16) | |
254 for(i=0; i<4; i++) { | |
255 tempData->cel_evals[n ].sourceX = x + (i&1)*8; | |
256 tempData->cel_evals[n++].sourceY = y + (i&2)*4; | |
257 } | |
258 } | |
259 | |
260 /** | |
261 * Get macroblocks from parts of the image | |
262 */ | |
263 static void get_frame_mb(AVFrame *frame, int x, int y, uint8_t mb[], int dim) | |
264 { | |
265 int i, j, cp; | |
266 | |
5191 | 267 for (cp=0; cp<3; cp++) { |
268 int stride = frame->linesize[cp]; | |
5184 | 269 for (i=0; i<dim; i++) |
270 for (j=0; j<dim; j++) | |
271 *mb++ = frame->data[cp][(y+i)*stride + x + j]; | |
5191 | 272 } |
5184 | 273 } |
274 | |
275 /** | |
276 * Find the codebook with the lowest distortion from an image | |
277 */ | |
278 static int index_mb(uint8_t cluster[], uint8_t cb[], int numCB, | |
279 int *outIndex, int dim) | |
280 { | |
281 int i, lDiff = INT_MAX, pick=0; | |
282 | |
283 /* Diff against the others */ | |
284 for (i=0; i<numCB; i++) { | |
285 int diff = squared_diff_macroblock(cluster, cb + i*dim*dim*3, dim); | |
286 if (diff < lDiff) { | |
287 lDiff = diff; | |
288 pick = i; | |
289 } | |
290 } | |
291 | |
292 *outIndex = pick; | |
293 return lDiff; | |
294 } | |
295 | |
296 #define EVAL_MOTION(MOTION) \ | |
297 do { \ | |
298 diff = eval_motion_dist(enc, j, i, MOTION, blocksize); \ | |
299 \ | |
300 if (diff < lowestdiff) { \ | |
301 lowestdiff = diff; \ | |
302 bestpick = MOTION; \ | |
303 } \ | |
304 } while(0) | |
305 | |
306 static void motion_search(RoqContext *enc, int blocksize) | |
307 { | |
308 static const motion_vect offsets[8] = { | |
309 {{ 0,-1}}, | |
310 {{ 0, 1}}, | |
311 {{-1, 0}}, | |
312 {{ 1, 0}}, | |
313 {{-1, 1}}, | |
314 {{ 1,-1}}, | |
315 {{-1,-1}}, | |
316 {{ 1, 1}}, | |
317 }; | |
318 | |
319 int diff, lowestdiff, oldbest; | |
320 int off[3]; | |
321 motion_vect bestpick = {{0,0}}; | |
322 int i, j, k, offset; | |
323 | |
324 motion_vect *last_motion; | |
325 motion_vect *this_motion; | |
326 motion_vect vect, vect2; | |
327 | |
328 int max=(enc->width/blocksize)*enc->height/blocksize; | |
329 | |
330 if (blocksize == 4) { | |
331 last_motion = enc->last_motion4; | |
332 this_motion = enc->this_motion4; | |
333 } else { | |
334 last_motion = enc->last_motion8; | |
335 this_motion = enc->this_motion8; | |
336 } | |
337 | |
338 for (i=0; i<enc->height; i+=blocksize) | |
339 for (j=0; j<enc->width; j+=blocksize) { | |
340 lowestdiff = eval_motion_dist(enc, j, i, (motion_vect) {{0,0}}, | |
341 blocksize); | |
342 bestpick.d[0] = 0; | |
343 bestpick.d[1] = 0; | |
344 | |
345 if (blocksize == 4) | |
346 EVAL_MOTION(enc->this_motion8[(i/8)*(enc->width/8) + j/8]); | |
347 | |
348 offset = (i/blocksize)*enc->width/blocksize + j/blocksize; | |
349 if (offset < max && offset >= 0) | |
350 EVAL_MOTION(last_motion[offset]); | |
351 | |
352 offset++; | |
353 if (offset < max && offset >= 0) | |
354 EVAL_MOTION(last_motion[offset]); | |
355 | |
356 offset = (i/blocksize + 1)*enc->width/blocksize + j/blocksize; | |
357 if (offset < max && offset >= 0) | |
358 EVAL_MOTION(last_motion[offset]); | |
359 | |
360 off[0]= (i/blocksize)*enc->width/blocksize + j/blocksize - 1; | |
361 off[1]= off[0] - enc->width/blocksize + 1; | |
362 off[2]= off[1] + 1; | |
363 | |
364 if (i) { | |
365 | |
366 for(k=0; k<2; k++) | |
367 vect.d[k]= mid_pred(this_motion[off[0]].d[k], | |
368 this_motion[off[1]].d[k], | |
369 this_motion[off[2]].d[k]); | |
370 | |
371 EVAL_MOTION(vect); | |
372 for(k=0; k<3; k++) | |
373 EVAL_MOTION(this_motion[off[k]]); | |
374 } else if(j) | |
375 EVAL_MOTION(this_motion[off[0]]); | |
376 | |
377 vect = bestpick; | |
378 | |
379 oldbest = -1; | |
380 while (oldbest != lowestdiff) { | |
381 oldbest = lowestdiff; | |
382 for (k=0; k<8; k++) { | |
383 vect2 = vect; | |
384 vect2.d[0] += offsets[k].d[0]; | |
385 vect2.d[1] += offsets[k].d[1]; | |
386 EVAL_MOTION(vect2); | |
387 } | |
388 vect = bestpick; | |
389 } | |
390 offset = (i/blocksize)*enc->width/blocksize + j/blocksize; | |
391 this_motion[offset] = bestpick; | |
392 } | |
393 } | |
394 | |
395 /** | |
396 * Gets distortion for all options available to a subcel | |
397 */ | |
398 static void gather_data_for_subcel(subcel_evaluation_t *subcel, int x, | |
399 int y, RoqContext *enc, roq_tempdata_t *tempData) | |
400 { | |
401 uint8_t mb4[4*4*3]; | |
402 uint8_t mb2[2*2*3]; | |
403 int cluster_index; | |
404 int i, best_dist; | |
405 | |
406 static const int bitsUsed[4] = {2, 10, 10, 34}; | |
407 | |
408 if (enc->framesSinceKeyframe >= 1) { | |
409 subcel->motion = enc->this_motion4[y*enc->width/16 + x/4]; | |
410 | |
411 subcel->eval_dist[RoQ_ID_FCC] = | |
412 eval_motion_dist(enc, x, y, | |
413 enc->this_motion4[y*enc->width/16 + x/4], 4); | |
414 } else | |
415 subcel->eval_dist[RoQ_ID_FCC] = INT_MAX; | |
416 | |
417 if (enc->framesSinceKeyframe >= 2) | |
418 subcel->eval_dist[RoQ_ID_MOT] = block_sse(enc->frame_to_enc->data, | |
419 enc->current_frame->data, x, | |
5191 | 420 y, x, y, |
421 enc->frame_to_enc->linesize, | |
5198 | 422 enc->current_frame->linesize, |
5191 | 423 4); |
5184 | 424 else |
425 subcel->eval_dist[RoQ_ID_MOT] = INT_MAX; | |
426 | |
427 cluster_index = y*enc->width/16 + x/4; | |
428 | |
429 get_frame_mb(enc->frame_to_enc, x, y, mb4, 4); | |
430 | |
431 subcel->eval_dist[RoQ_ID_SLD] = index_mb(mb4, | |
432 tempData->codebooks.unpacked_cb4, | |
433 tempData->codebooks.numCB4, | |
434 &subcel->cbEntry, 4); | |
435 | |
436 subcel->eval_dist[RoQ_ID_CCC] = 0; | |
437 | |
438 for(i=0;i<4;i++) { | |
439 subcel->subCels[i] = tempData->closest_cb2[cluster_index*4+i]; | |
440 | |
441 get_frame_mb(enc->frame_to_enc, x+2*(i&1), | |
442 y+(i&2), mb2, 2); | |
443 | |
444 subcel->eval_dist[RoQ_ID_CCC] += | |
445 squared_diff_macroblock(tempData->codebooks.unpacked_cb2 + subcel->subCels[i]*2*2*3, mb2, 2); | |
446 } | |
447 | |
448 best_dist = INT_MAX; | |
449 for (i=0; i<4; i++) | |
450 if (ROQ_LAMBDA_SCALE*subcel->eval_dist[i] + enc->lambda*bitsUsed[i] < | |
451 best_dist) { | |
452 subcel->best_coding = i; | |
453 subcel->best_bit_use = bitsUsed[i]; | |
454 best_dist = ROQ_LAMBDA_SCALE*subcel->eval_dist[i] + | |
455 enc->lambda*bitsUsed[i]; | |
456 } | |
457 } | |
458 | |
459 /** | |
460 * Gets distortion for all options available to a cel | |
461 */ | |
462 static void gather_data_for_cel(cel_evaluation_t *cel, RoqContext *enc, | |
463 roq_tempdata_t *tempData) | |
464 { | |
465 uint8_t mb8[8*8*3]; | |
466 int index = cel->sourceY*enc->width/64 + cel->sourceX/8; | |
467 int i, j, best_dist, divide_bit_use; | |
468 | |
469 int bitsUsed[4] = {2, 10, 10, 0}; | |
470 | |
471 if (enc->framesSinceKeyframe >= 1) { | |
472 cel->motion = enc->this_motion8[index]; | |
473 | |
474 cel->eval_dist[RoQ_ID_FCC] = | |
475 eval_motion_dist(enc, cel->sourceX, cel->sourceY, | |
476 enc->this_motion8[index], 8); | |
477 } else | |
478 cel->eval_dist[RoQ_ID_FCC] = INT_MAX; | |
479 | |
480 if (enc->framesSinceKeyframe >= 2) | |
481 cel->eval_dist[RoQ_ID_MOT] = block_sse(enc->frame_to_enc->data, | |
482 enc->current_frame->data, | |
483 cel->sourceX, cel->sourceY, | |
484 cel->sourceX, cel->sourceY, | |
5198 | 485 enc->frame_to_enc->linesize, |
486 enc->current_frame->linesize,8); | |
5184 | 487 else |
488 cel->eval_dist[RoQ_ID_MOT] = INT_MAX; | |
489 | |
490 get_frame_mb(enc->frame_to_enc, cel->sourceX, cel->sourceY, mb8, 8); | |
491 | |
492 cel->eval_dist[RoQ_ID_SLD] = | |
493 index_mb(mb8, tempData->codebooks.unpacked_cb4_enlarged, | |
494 tempData->codebooks.numCB4, &cel->cbEntry, 8); | |
495 | |
496 gather_data_for_subcel(cel->subCels + 0, cel->sourceX+0, cel->sourceY+0, enc, tempData); | |
497 gather_data_for_subcel(cel->subCels + 1, cel->sourceX+4, cel->sourceY+0, enc, tempData); | |
498 gather_data_for_subcel(cel->subCels + 2, cel->sourceX+0, cel->sourceY+4, enc, tempData); | |
499 gather_data_for_subcel(cel->subCels + 3, cel->sourceX+4, cel->sourceY+4, enc, tempData); | |
500 | |
501 cel->eval_dist[RoQ_ID_CCC] = 0; | |
502 divide_bit_use = 0; | |
503 for (i=0; i<4; i++) { | |
504 cel->eval_dist[RoQ_ID_CCC] += | |
505 cel->subCels[i].eval_dist[cel->subCels[i].best_coding]; | |
506 divide_bit_use += cel->subCels[i].best_bit_use; | |
507 } | |
508 | |
509 best_dist = INT_MAX; | |
510 bitsUsed[3] = 2 + divide_bit_use; | |
511 | |
512 for (i=0; i<4; i++) | |
513 if (ROQ_LAMBDA_SCALE*cel->eval_dist[i] + enc->lambda*bitsUsed[i] < | |
514 best_dist) { | |
515 cel->best_coding = i; | |
516 best_dist = ROQ_LAMBDA_SCALE*cel->eval_dist[i] + | |
517 enc->lambda*bitsUsed[i]; | |
518 } | |
519 | |
520 tempData->used_option[cel->best_coding]++; | |
521 tempData->mainChunkSize += bitsUsed[cel->best_coding]; | |
522 | |
523 if (cel->best_coding == RoQ_ID_SLD) | |
524 tempData->codebooks.usedCB4[cel->cbEntry]++; | |
525 | |
526 if (cel->best_coding == RoQ_ID_CCC) | |
527 for (i=0; i<4; i++) { | |
528 if (cel->subCels[i].best_coding == RoQ_ID_SLD) | |
529 tempData->codebooks.usedCB4[cel->subCels[i].cbEntry]++; | |
530 else if (cel->subCels[i].best_coding == RoQ_ID_CCC) | |
531 for (j=0; j<4; j++) | |
532 tempData->codebooks.usedCB2[cel->subCels[i].subCels[j]]++; | |
533 } | |
534 } | |
535 | |
536 static void remap_codebooks(RoqContext *enc, roq_tempdata_t *tempData) | |
537 { | |
538 int i, j, idx=0; | |
539 | |
540 /* Make remaps for the final codebook usage */ | |
541 for (i=0; i<MAX_CBS_4x4; i++) { | |
542 if (tempData->codebooks.usedCB4[i]) { | |
543 tempData->i2f4[i] = idx; | |
544 tempData->f2i4[idx] = i; | |
545 for (j=0; j<4; j++) | |
546 tempData->codebooks.usedCB2[enc->cb4x4[i].idx[j]]++; | |
547 idx++; | |
548 } | |
549 } | |
550 | |
551 tempData->numCB4 = idx; | |
552 | |
553 idx = 0; | |
554 for (i=0; i<MAX_CBS_2x2; i++) { | |
555 if (tempData->codebooks.usedCB2[i]) { | |
556 tempData->i2f2[i] = idx; | |
557 tempData->f2i2[idx] = i; | |
558 idx++; | |
559 } | |
560 } | |
561 tempData->numCB2 = idx; | |
562 | |
563 } | |
564 | |
565 /** | |
566 * Write codebook chunk | |
567 */ | |
568 static void write_codebooks(RoqContext *enc, roq_tempdata_t *tempData) | |
569 { | |
570 int i, j; | |
571 uint8_t **outp= &enc->out_buf; | |
572 | |
573 if (tempData->numCB2) { | |
574 bytestream_put_le16(outp, RoQ_QUAD_CODEBOOK); | |
575 bytestream_put_le32(outp, tempData->numCB2*6 + tempData->numCB4*4); | |
576 bytestream_put_byte(outp, tempData->numCB4); | |
577 bytestream_put_byte(outp, tempData->numCB2); | |
578 | |
579 for (i=0; i<tempData->numCB2; i++) { | |
580 bytestream_put_buffer(outp, enc->cb2x2[tempData->f2i2[i]].y, 4); | |
581 bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].u); | |
582 bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].v); | |
583 } | |
584 | |
585 for (i=0; i<tempData->numCB4; i++) | |
586 for (j=0; j<4; j++) | |
587 bytestream_put_byte(outp, tempData->i2f2[enc->cb4x4[tempData->f2i4[i]].idx[j]]); | |
588 | |
589 } | |
590 } | |
591 | |
592 static inline uint8_t motion_arg(motion_vect mot) | |
593 { | |
594 uint8_t ax = 8 - ((uint8_t) mot.d[0]); | |
595 uint8_t ay = 8 - ((uint8_t) mot.d[1]); | |
596 return ((ax&15)<<4) | (ay&15); | |
597 } | |
598 | |
599 typedef struct | |
600 { | |
601 int typeSpool; | |
602 int typeSpoolLength; | |
603 uint8_t argumentSpool[64]; | |
604 uint8_t *args; | |
605 uint8_t **pout; | |
606 } CodingSpool; | |
607 | |
608 /* NOTE: Typecodes must be spooled AFTER arguments!! */ | |
609 static void write_typecode(CodingSpool *s, uint8_t type) | |
610 { | |
5205 | 611 s->typeSpool |= (type & 3) << (14 - s->typeSpoolLength); |
5184 | 612 s->typeSpoolLength += 2; |
613 if (s->typeSpoolLength == 16) { | |
614 bytestream_put_le16(s->pout, s->typeSpool); | |
615 bytestream_put_buffer(s->pout, s->argumentSpool, | |
616 s->args - s->argumentSpool); | |
617 s->typeSpoolLength = 0; | |
618 s->typeSpool = 0; | |
619 s->args = s->argumentSpool; | |
620 } | |
621 } | |
622 | |
623 static void reconstruct_and_encode_image(RoqContext *enc, roq_tempdata_t *tempData, int w, int h, int numBlocks) | |
624 { | |
625 int i, j, k; | |
626 int x, y; | |
627 int subX, subY; | |
628 int dist=0; | |
629 | |
630 roq_qcell *qcell; | |
631 cel_evaluation_t *eval; | |
632 | |
633 CodingSpool spool; | |
634 | |
635 spool.typeSpool=0; | |
636 spool.typeSpoolLength=0; | |
637 spool.args = spool.argumentSpool; | |
638 spool.pout = &enc->out_buf; | |
639 | |
640 if (tempData->used_option[RoQ_ID_CCC]%2) | |
641 tempData->mainChunkSize+=8; //FIXME | |
642 | |
643 /* Write the video chunk header */ | |
644 bytestream_put_le16(&enc->out_buf, RoQ_QUAD_VQ); | |
645 bytestream_put_le32(&enc->out_buf, tempData->mainChunkSize/8); | |
646 bytestream_put_byte(&enc->out_buf, 0x0); | |
647 bytestream_put_byte(&enc->out_buf, 0x0); | |
648 | |
649 for (i=0; i<numBlocks; i++) { | |
650 eval = tempData->cel_evals + i; | |
651 | |
652 x = eval->sourceX; | |
653 y = eval->sourceY; | |
654 dist += eval->eval_dist[eval->best_coding]; | |
655 | |
656 switch (eval->best_coding) { | |
657 case RoQ_ID_MOT: | |
658 write_typecode(&spool, RoQ_ID_MOT); | |
659 break; | |
660 | |
661 case RoQ_ID_FCC: | |
662 bytestream_put_byte(&spool.args, motion_arg(eval->motion)); | |
663 | |
664 write_typecode(&spool, RoQ_ID_FCC); | |
665 ff_apply_motion_8x8(enc, x, y, | |
666 eval->motion.d[0], eval->motion.d[1]); | |
667 break; | |
668 | |
669 case RoQ_ID_SLD: | |
670 bytestream_put_byte(&spool.args, tempData->i2f4[eval->cbEntry]); | |
671 write_typecode(&spool, RoQ_ID_SLD); | |
672 | |
673 qcell = enc->cb4x4 + eval->cbEntry; | |
674 ff_apply_vector_4x4(enc, x , y , enc->cb2x2 + qcell->idx[0]); | |
675 ff_apply_vector_4x4(enc, x+4, y , enc->cb2x2 + qcell->idx[1]); | |
676 ff_apply_vector_4x4(enc, x , y+4, enc->cb2x2 + qcell->idx[2]); | |
677 ff_apply_vector_4x4(enc, x+4, y+4, enc->cb2x2 + qcell->idx[3]); | |
678 break; | |
679 | |
680 case RoQ_ID_CCC: | |
681 write_typecode(&spool, RoQ_ID_CCC); | |
682 | |
683 for (j=0; j<4; j++) { | |
684 subX = x + 4*(j&1); | |
685 subY = y + 2*(j&2); | |
686 | |
687 switch(eval->subCels[j].best_coding) { | |
688 case RoQ_ID_MOT: | |
689 break; | |
690 | |
691 case RoQ_ID_FCC: | |
692 bytestream_put_byte(&spool.args, | |
693 motion_arg(eval->subCels[j].motion)); | |
694 | |
695 ff_apply_motion_4x4(enc, subX, subY, | |
696 eval->subCels[j].motion.d[0], | |
697 eval->subCels[j].motion.d[1]); | |
698 break; | |
699 | |
700 case RoQ_ID_SLD: | |
701 bytestream_put_byte(&spool.args, | |
702 tempData->i2f4[eval->subCels[j].cbEntry]); | |
703 | |
704 qcell = enc->cb4x4 + eval->subCels[j].cbEntry; | |
705 | |
706 ff_apply_vector_2x2(enc, subX , subY , | |
707 enc->cb2x2 + qcell->idx[0]); | |
708 ff_apply_vector_2x2(enc, subX+2, subY , | |
709 enc->cb2x2 + qcell->idx[1]); | |
710 ff_apply_vector_2x2(enc, subX , subY+2, | |
711 enc->cb2x2 + qcell->idx[2]); | |
712 ff_apply_vector_2x2(enc, subX+2, subY+2, | |
713 enc->cb2x2 + qcell->idx[3]); | |
714 break; | |
715 | |
716 case RoQ_ID_CCC: | |
717 for (k=0; k<4; k++) { | |
718 int cb_idx = eval->subCels[j].subCels[k]; | |
719 bytestream_put_byte(&spool.args, | |
720 tempData->i2f2[cb_idx]); | |
721 | |
722 ff_apply_vector_2x2(enc, subX + 2*(k&1), subY + (k&2), | |
723 enc->cb2x2 + cb_idx); | |
724 } | |
725 break; | |
726 } | |
727 write_typecode(&spool, eval->subCels[j].best_coding); | |
728 } | |
729 break; | |
730 } | |
731 } | |
732 | |
733 /* Flush the remainder of the argument/type spool */ | |
734 while (spool.typeSpoolLength) | |
735 write_typecode(&spool, 0x0); | |
736 | |
737 #if 0 | |
738 uint8_t *fdata[3] = {enc->frame_to_enc->data[0], | |
739 enc->frame_to_enc->data[1], | |
740 enc->frame_to_enc->data[2]}; | |
741 uint8_t *cdata[3] = {enc->current_frame->data[0], | |
742 enc->current_frame->data[1], | |
743 enc->current_frame->data[2]}; | |
744 av_log(enc->avctx, AV_LOG_ERROR, "Expected distortion: %i Actual: %i\n", | |
745 dist, | |
746 block_sse(fdata, cdata, 0, 0, 0, 0, | |
5198 | 747 enc->frame_to_enc->linesize, |
748 enc->current_frame->linesize, | |
749 enc->width)); //WARNING: Square dimensions implied... | |
5184 | 750 #endif |
751 } | |
752 | |
753 | |
754 /** | |
755 * Create a single YUV cell from a 2x2 section of the image | |
756 */ | |
757 static inline void frame_block_to_cell(uint8_t *block, uint8_t **data, | |
758 int top, int left, int *stride) | |
759 { | |
760 int i, j, u=0, v=0; | |
761 | |
762 for (i=0; i<2; i++) | |
763 for (j=0; j<2; j++) { | |
764 int x = (top+i)*stride[0] + left + j; | |
765 *block++ = data[0][x]; | |
5191 | 766 x = (top+i)*stride[1] + left + j; |
5184 | 767 u += data[1][x]; |
768 v += data[2][x]; | |
769 } | |
770 | |
771 *block++ = (u+2)/4; | |
772 *block++ = (v+2)/4; | |
773 } | |
774 | |
775 /** | |
776 * Creates YUV clusters for the entire image | |
777 */ | |
778 static void create_clusters(AVFrame *frame, int w, int h, uint8_t *yuvClusters) | |
779 { | |
780 int i, j, k, l; | |
781 | |
782 for (i=0; i<h; i+=4) | |
783 for (j=0; j<w; j+=4) { | |
784 for (k=0; k < 2; k++) | |
785 for (l=0; l < 2; l++) | |
786 frame_block_to_cell(yuvClusters + (l + 2*k)*6, frame->data, | |
787 i+2*k, j+2*l, frame->linesize); | |
788 yuvClusters += 24; | |
789 } | |
790 } | |
791 | |
792 static void generate_codebook(RoqContext *enc, roq_tempdata_t *tempdata, | |
793 int *points, int inputCount, roq_cell *results, | |
794 int size, int cbsize) | |
795 { | |
796 int i, j, k; | |
797 int c_size = size*size/4; | |
798 int *buf = points; | |
799 int *codebook = av_malloc(6*c_size*cbsize*sizeof(int)); | |
800 int *closest_cb; | |
801 | |
802 if (size == 4) | |
803 closest_cb = av_malloc(6*c_size*inputCount*sizeof(int)); | |
804 else | |
805 closest_cb = tempdata->closest_cb2; | |
806 | |
807 ff_init_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx); | |
808 ff_do_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx); | |
809 | |
810 if (size == 4) | |
811 av_free(closest_cb); | |
812 | |
813 buf = codebook; | |
814 for (i=0; i<cbsize; i++) | |
815 for (k=0; k<c_size; k++) { | |
816 for(j=0; j<4; j++) | |
817 results->y[j] = *buf++; | |
818 | |
819 results->u = (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS; | |
820 results->v = (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS; | |
821 results++; | |
822 } | |
823 | |
824 av_free(codebook); | |
825 } | |
826 | |
827 static void generate_new_codebooks(RoqContext *enc, roq_tempdata_t *tempData) | |
828 { | |
829 int i,j; | |
830 roq_codebooks_t *codebooks = &tempData->codebooks; | |
831 int max = enc->width*enc->height/16; | |
832 uint8_t mb2[3*4]; | |
833 roq_cell *results4 = av_malloc(sizeof(roq_cell)*MAX_CBS_4x4*4); | |
834 uint8_t *yuvClusters=av_malloc(sizeof(int)*max*6*4); | |
835 int *points = av_malloc(max*6*4*sizeof(int)); | |
836 int bias; | |
837 | |
838 /* Subsample YUV data */ | |
839 create_clusters(enc->frame_to_enc, enc->width, enc->height, yuvClusters); | |
840 | |
841 /* Cast to integer and apply chroma bias */ | |
842 for (i=0; i<max*24; i++) { | |
843 bias = ((i%6)<4) ? 1 : CHROMA_BIAS; | |
844 points[i] = bias*yuvClusters[i]; | |
845 } | |
846 | |
847 /* Create 4x4 codebooks */ | |
848 generate_codebook(enc, tempData, points, max, results4, 4, MAX_CBS_4x4); | |
849 | |
850 codebooks->numCB4 = MAX_CBS_4x4; | |
851 | |
852 tempData->closest_cb2 = av_malloc(max*4*sizeof(int)); | |
853 | |
854 /* Create 2x2 codebooks */ | |
855 generate_codebook(enc, tempData, points, max*4, enc->cb2x2, 2, MAX_CBS_2x2); | |
856 | |
857 codebooks->numCB2 = MAX_CBS_2x2; | |
858 | |
859 /* Unpack 2x2 codebook clusters */ | |
860 for (i=0; i<codebooks->numCB2; i++) | |
861 unpack_roq_cell(enc->cb2x2 + i, codebooks->unpacked_cb2 + i*2*2*3); | |
862 | |
863 /* Index all 4x4 entries to the 2x2 entries, unpack, and enlarge */ | |
864 for (i=0; i<codebooks->numCB4; i++) { | |
865 for (j=0; j<4; j++) { | |
866 unpack_roq_cell(&results4[4*i + j], mb2); | |
867 index_mb(mb2, codebooks->unpacked_cb2, codebooks->numCB2, | |
868 &enc->cb4x4[i].idx[j], 2); | |
869 } | |
870 unpack_roq_qcell(codebooks->unpacked_cb2, enc->cb4x4 + i, | |
871 codebooks->unpacked_cb4 + i*4*4*3); | |
872 enlarge_roq_mb4(codebooks->unpacked_cb4 + i*4*4*3, | |
873 codebooks->unpacked_cb4_enlarged + i*8*8*3); | |
874 } | |
875 | |
876 av_free(yuvClusters); | |
877 av_free(points); | |
878 av_free(results4); | |
879 } | |
880 | |
881 static void roq_encode_video(RoqContext *enc) | |
882 { | |
883 roq_tempdata_t tempData; | |
884 int i; | |
885 | |
886 memset(&tempData, 0, sizeof(tempData)); | |
887 | |
888 create_cel_evals(enc, &tempData); | |
889 | |
890 generate_new_codebooks(enc, &tempData); | |
891 | |
892 if (enc->framesSinceKeyframe >= 1) { | |
893 motion_search(enc, 8); | |
894 motion_search(enc, 4); | |
895 } | |
896 | |
897 retry_encode: | |
898 for (i=0; i<enc->width*enc->height/64; i++) | |
899 gather_data_for_cel(tempData.cel_evals + i, enc, &tempData); | |
900 | |
901 /* Quake 3 can't handle chunks bigger than 65536 bytes */ | |
902 if (tempData.mainChunkSize/8 > 65536) { | |
903 enc->lambda *= .8; | |
904 goto retry_encode; | |
905 } | |
906 | |
907 remap_codebooks(enc, &tempData); | |
908 | |
909 write_codebooks(enc, &tempData); | |
910 | |
911 reconstruct_and_encode_image(enc, &tempData, enc->width, enc->height, | |
912 enc->width*enc->height/64); | |
913 | |
7308
29319d07aeb4
Set avctx->coded_frame in RoQ encoder. At some point in
vitor
parents:
7040
diff
changeset
|
914 enc->avctx->coded_frame = enc->current_frame; |
29319d07aeb4
Set avctx->coded_frame in RoQ encoder. At some point in
vitor
parents:
7040
diff
changeset
|
915 |
5184 | 916 /* Rotate frame history */ |
917 FFSWAP(AVFrame *, enc->current_frame, enc->last_frame); | |
918 FFSWAP(motion_vect *, enc->last_motion4, enc->this_motion4); | |
919 FFSWAP(motion_vect *, enc->last_motion8, enc->this_motion8); | |
920 | |
921 av_free(tempData.cel_evals); | |
922 av_free(tempData.closest_cb2); | |
923 | |
924 enc->framesSinceKeyframe++; | |
925 } | |
926 | |
927 static int roq_encode_init(AVCodecContext *avctx) | |
928 { | |
929 RoqContext *enc = avctx->priv_data; | |
930 | |
931 av_init_random(1, &enc->randctx); | |
932 | |
933 enc->framesSinceKeyframe = 0; | |
934 if ((avctx->width & 0xf) || (avctx->height & 0xf)) { | |
935 av_log(avctx, AV_LOG_ERROR, "Dimensions must be divisible by 16\n"); | |
936 return -1; | |
937 } | |
938 | |
939 if (((avctx->width)&(avctx->width-1))||((avctx->height)&(avctx->height-1))) | |
940 av_log(avctx, AV_LOG_ERROR, "Warning: dimensions not power of two\n"); | |
941 | |
942 if (avcodec_check_dimensions(avctx, avctx->width, avctx->height)) { | |
943 av_log(avctx, AV_LOG_ERROR, "Invalid dimensions (%dx%d)\n", | |
944 avctx->width, avctx->height); | |
945 return -1; | |
946 } | |
947 | |
948 enc->width = avctx->width; | |
949 enc->height = avctx->height; | |
950 | |
951 enc->framesSinceKeyframe = 0; | |
952 enc->first_frame = 1; | |
953 | |
954 enc->last_frame = &enc->frames[0]; | |
955 enc->current_frame = &enc->frames[1]; | |
956 | |
957 enc->this_motion4 = | |
958 av_mallocz((enc->width*enc->height/16)*sizeof(motion_vect)); | |
959 | |
960 enc->last_motion4 = | |
961 av_malloc ((enc->width*enc->height/16)*sizeof(motion_vect)); | |
962 | |
963 enc->this_motion8 = | |
964 av_mallocz((enc->width*enc->height/64)*sizeof(motion_vect)); | |
965 | |
966 enc->last_motion8 = | |
967 av_malloc ((enc->width*enc->height/64)*sizeof(motion_vect)); | |
968 | |
969 return 0; | |
970 } | |
971 | |
972 static void roq_write_video_info_chunk(RoqContext *enc) | |
973 { | |
974 /* ROQ info chunk */ | |
975 bytestream_put_le16(&enc->out_buf, RoQ_INFO); | |
976 | |
977 /* Size: 8 bytes */ | |
978 bytestream_put_le32(&enc->out_buf, 8); | |
979 | |
980 /* Unused argument */ | |
981 bytestream_put_byte(&enc->out_buf, 0x00); | |
982 bytestream_put_byte(&enc->out_buf, 0x00); | |
983 | |
984 /* Width */ | |
985 bytestream_put_le16(&enc->out_buf, enc->width); | |
986 | |
987 /* Height */ | |
988 bytestream_put_le16(&enc->out_buf, enc->height); | |
989 | |
990 /* Unused in Quake 3, mimics the output of the real encoder */ | |
991 bytestream_put_byte(&enc->out_buf, 0x08); | |
992 bytestream_put_byte(&enc->out_buf, 0x00); | |
993 bytestream_put_byte(&enc->out_buf, 0x04); | |
994 bytestream_put_byte(&enc->out_buf, 0x00); | |
995 } | |
996 | |
997 static int roq_encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data) | |
998 { | |
999 RoqContext *enc = avctx->priv_data; | |
1000 AVFrame *frame= data; | |
1001 uint8_t *buf_start = buf; | |
1002 | |
1003 enc->out_buf = buf; | |
1004 enc->avctx = avctx; | |
1005 | |
1006 enc->frame_to_enc = frame; | |
1007 | |
1008 if (frame->quality) | |
1009 enc->lambda = frame->quality - 1; | |
1010 else | |
1011 enc->lambda = 2*ROQ_LAMBDA_SCALE; | |
1012 | |
1013 /* 138 bits max per 8x8 block + | |
1014 * 256 codebooks*(6 bytes 2x2 + 4 bytes 4x4) + 8 bytes frame header */ | |
1015 if (((enc->width*enc->height/64)*138+7)/8 + 256*(6+4) + 8 > buf_size) { | |
1016 av_log(avctx, AV_LOG_ERROR, " RoQ: Output buffer too small!\n"); | |
1017 return -1; | |
1018 } | |
1019 | |
1020 /* Check for I frame */ | |
1021 if (enc->framesSinceKeyframe == avctx->gop_size) | |
1022 enc->framesSinceKeyframe = 0; | |
1023 | |
1024 if (enc->first_frame) { | |
1025 /* Alloc memory for the reconstruction data (we must know the stride | |
1026 for that) */ | |
1027 if (avctx->get_buffer(avctx, enc->current_frame) || | |
1028 avctx->get_buffer(avctx, enc->last_frame)) { | |
1029 av_log(avctx, AV_LOG_ERROR, " RoQ: get_buffer() failed\n"); | |
1030 return -1; | |
1031 } | |
1032 | |
1033 /* Before the first video frame, write a "video info" chunk */ | |
1034 roq_write_video_info_chunk(enc); | |
1035 | |
1036 enc->first_frame = 0; | |
1037 } | |
1038 | |
1039 /* Encode the actual frame */ | |
1040 roq_encode_video(enc); | |
1041 | |
1042 return enc->out_buf - buf_start; | |
1043 } | |
1044 | |
1045 static int roq_encode_end(AVCodecContext *avctx) | |
1046 { | |
1047 RoqContext *enc = avctx->priv_data; | |
1048 | |
1049 avctx->release_buffer(avctx, enc->last_frame); | |
1050 avctx->release_buffer(avctx, enc->current_frame); | |
1051 | |
1052 av_free(enc->this_motion4); | |
1053 av_free(enc->last_motion4); | |
1054 av_free(enc->this_motion8); | |
1055 av_free(enc->last_motion8); | |
1056 | |
1057 return 0; | |
1058 } | |
1059 | |
1060 AVCodec roq_encoder = | |
1061 { | |
1062 "roqvideo", | |
1063 CODEC_TYPE_VIDEO, | |
1064 CODEC_ID_ROQ, | |
1065 sizeof(RoqContext), | |
1066 roq_encode_init, | |
1067 roq_encode_frame, | |
1068 roq_encode_end, | |
1069 .supported_framerates = (AVRational[]){{30,1}, {0,0}}, | |
6788 | 1070 .pix_fmts = (enum PixelFormat[]){PIX_FMT_YUV444P, PIX_FMT_NONE}, |
7040
e943e1409077
Make AVCodec long_names definition conditional depending on CONFIG_SMALL.
stefano
parents:
6812
diff
changeset
|
1071 .long_name = NULL_IF_CONFIG_SMALL("id RoQ video"), |
5184 | 1072 }; |