Mercurial > libavcodec.hg
annotate roqvideoenc.c @ 8991:ca768cb2bfb6 libavcodec
Use last decoded SPS as current SPS in order to parse picture timing SEI
correctly. This works around an apparent H.264 standard deficiency.
Patch by Ivan Schreter, schreter gmx net
author | cehoyos |
---|---|
date | Fri, 20 Feb 2009 16:20:01 +0000 |
parents | e9d9d946f213 |
children | aa459306ee59 |
rev | line source |
---|---|
5184 | 1 /* |
2 * RoQ Video Encoder. | |
3 * | |
5219 | 4 * Copyright (C) 2007 Vitor Sessak <vitor1001@gmail.com> |
5184 | 5 * Copyright (C) 2004-2007 Eric Lasota |
6 * Based on RoQ specs (C) 2001 Tim Ferguson | |
7 * | |
8 * This file is part of FFmpeg. | |
9 * | |
10 * FFmpeg is free software; you can redistribute it and/or | |
11 * modify it under the terms of the GNU Lesser General Public | |
12 * License as published by the Free Software Foundation; either | |
13 * version 2.1 of the License, or (at your option) any later version. | |
14 * | |
15 * FFmpeg is distributed in the hope that it will be useful, | |
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 * Lesser General Public License for more details. | |
19 * | |
20 * You should have received a copy of the GNU Lesser General Public | |
21 * License along with FFmpeg; if not, write to the Free Software | |
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
23 */ | |
24 | |
25 /** | |
8718
e9d9d946f213
Use full internal pathname in doxygen @file directives.
diego
parents:
8643
diff
changeset
|
26 * @file libavcodec/roqvideoenc.c |
6812
0d01bae8d207
cosmetics: s/Id/id/ in libavcodec where Id refers to id Software.
diego
parents:
6788
diff
changeset
|
27 * id RoQ encoder by Vitor. Based on the Switchblade3 library and the |
5184 | 28 * Switchblade3 FFmpeg glue by Eric Lasota. |
29 */ | |
30 | |
31 /* | |
32 * COSTS: | |
33 * Level 1: | |
34 * SKIP - 2 bits | |
35 * MOTION - 2 + 8 bits | |
36 * CODEBOOK - 2 + 8 bits | |
37 * SUBDIVIDE - 2 + combined subcel cost | |
38 * | |
39 * Level 2: | |
40 * SKIP - 2 bits | |
41 * MOTION - 2 + 8 bits | |
42 * CODEBOOK - 2 + 8 bits | |
43 * SUBDIVIDE - 2 + 4*8 bits | |
44 * | |
45 * Maximum cost: 138 bits per cel | |
46 * | |
47 * Proper evaluation requires LCD fraction comparison, which requires | |
48 * Squared Error (SE) loss * savings increase | |
49 * | |
50 * Maximum savings increase: 136 bits | |
51 * Maximum SE loss without overflow: 31580641 | |
52 * Components in 8x8 supercel: 192 | |
53 * Maximum SE precision per component: 164482 | |
54 * >65025, so no truncation is needed (phew) | |
55 */ | |
56 | |
57 #include <string.h> | |
58 #include <unistd.h> | |
59 | |
60 #include "roqvideo.h" | |
61 #include "bytestream.h" | |
62 #include "elbg.h" | |
8627
d6bab465b82c
moves mid_pred() into mathops.h (with arch specific code split by directory)
aurel
parents:
8296
diff
changeset
|
63 #include "mathops.h" |
5184 | 64 |
65 #define CHROMA_BIAS 1 | |
66 | |
67 /** | |
68 * Maximum number of generated 4x4 codebooks. Can't be 256 to workaround a | |
69 * Quake 3 bug. | |
70 */ | |
71 #define MAX_CBS_4x4 255 | |
72 | |
73 #define MAX_CBS_2x2 256 ///< Maximum number of 2x2 codebooks. | |
74 | |
75 /* The cast is useful when multiplying it by INT_MAX */ | |
76 #define ROQ_LAMBDA_SCALE ((uint64_t) FF_LAMBDA_SCALE) | |
77 | |
78 /* Macroblock support functions */ | |
79 static void unpack_roq_cell(roq_cell *cell, uint8_t u[4*3]) | |
80 { | |
81 memcpy(u , cell->y, 4); | |
82 memset(u+4, cell->u, 4); | |
83 memset(u+8, cell->v, 4); | |
84 } | |
85 | |
86 static void unpack_roq_qcell(uint8_t cb2[], roq_qcell *qcell, uint8_t u[4*4*3]) | |
87 { | |
88 int i,cp; | |
89 static const int offsets[4] = {0, 2, 8, 10}; | |
90 | |
91 for (cp=0; cp<3; cp++) | |
92 for (i=0; i<4; i++) { | |
93 u[4*4*cp + offsets[i] ] = cb2[qcell->idx[i]*2*2*3 + 4*cp ]; | |
94 u[4*4*cp + offsets[i]+1] = cb2[qcell->idx[i]*2*2*3 + 4*cp+1]; | |
95 u[4*4*cp + offsets[i]+4] = cb2[qcell->idx[i]*2*2*3 + 4*cp+2]; | |
96 u[4*4*cp + offsets[i]+5] = cb2[qcell->idx[i]*2*2*3 + 4*cp+3]; | |
97 } | |
98 } | |
99 | |
100 | |
101 static void enlarge_roq_mb4(uint8_t base[3*16], uint8_t u[3*64]) | |
102 { | |
103 int x,y,cp; | |
104 | |
105 for(cp=0; cp<3; cp++) | |
106 for(y=0; y<8; y++) | |
107 for(x=0; x<8; x++) | |
108 *u++ = base[(y/2)*4 + (x/2) + 16*cp]; | |
109 } | |
110 | |
111 static inline int square(int x) | |
112 { | |
113 return x*x; | |
114 } | |
115 | |
116 static inline int eval_sse(uint8_t *a, uint8_t *b, int count) | |
117 { | |
118 int diff=0; | |
119 | |
120 while(count--) | |
121 diff += square(*b++ - *a++); | |
122 | |
123 return diff; | |
124 } | |
125 | |
126 // FIXME Could use DSPContext.sse, but it is not so speed critical (used | |
127 // just for motion estimation). | |
128 static int block_sse(uint8_t **buf1, uint8_t **buf2, int x1, int y1, int x2, | |
5198 | 129 int y2, int *stride1, int *stride2, int size) |
5184 | 130 { |
131 int i, k; | |
132 int sse=0; | |
133 | |
134 for (k=0; k<3; k++) { | |
135 int bias = (k ? CHROMA_BIAS : 4); | |
136 for (i=0; i<size; i++) | |
5198 | 137 sse += bias*eval_sse(buf1[k] + (y1+i)*stride1[k] + x1, |
138 buf2[k] + (y2+i)*stride2[k] + x2, size); | |
5184 | 139 } |
140 | |
141 return sse; | |
142 } | |
143 | |
144 static int eval_motion_dist(RoqContext *enc, int x, int y, motion_vect vect, | |
145 int size) | |
146 { | |
147 int mx=vect.d[0]; | |
148 int my=vect.d[1]; | |
149 | |
150 if (mx < -7 || mx > 7) | |
151 return INT_MAX; | |
152 | |
153 if (my < -7 || my > 7) | |
154 return INT_MAX; | |
155 | |
156 mx += x; | |
157 my += y; | |
158 | |
159 if ((unsigned) mx > enc->width-size || (unsigned) my > enc->height-size) | |
160 return INT_MAX; | |
161 | |
162 return block_sse(enc->frame_to_enc->data, enc->last_frame->data, x, y, | |
5198 | 163 mx, my, |
164 enc->frame_to_enc->linesize, enc->last_frame->linesize, | |
165 size); | |
5184 | 166 } |
167 | |
168 /** | |
169 * Returns distortion between two macroblocks | |
170 */ | |
171 static inline int squared_diff_macroblock(uint8_t a[], uint8_t b[], int size) | |
172 { | |
173 int cp, sdiff=0; | |
174 | |
175 for(cp=0;cp<3;cp++) { | |
176 int bias = (cp ? CHROMA_BIAS : 4); | |
177 sdiff += bias*eval_sse(a, b, size*size); | |
178 a += size*size; | |
179 b += size*size; | |
180 } | |
181 | |
182 return sdiff; | |
183 } | |
184 | |
185 typedef struct | |
186 { | |
187 int eval_dist[4]; | |
188 int best_bit_use; | |
189 int best_coding; | |
190 | |
191 int subCels[4]; | |
192 motion_vect motion; | |
193 int cbEntry; | |
8296 | 194 } SubcelEvaluation; |
5184 | 195 |
196 typedef struct | |
197 { | |
198 int eval_dist[4]; | |
199 int best_coding; | |
200 | |
8296 | 201 SubcelEvaluation subCels[4]; |
5184 | 202 |
203 motion_vect motion; | |
204 int cbEntry; | |
205 | |
206 int sourceX, sourceY; | |
8296 | 207 } CelEvaluation; |
5184 | 208 |
209 typedef struct | |
210 { | |
211 int numCB4; | |
212 int numCB2; | |
213 int usedCB2[MAX_CBS_2x2]; | |
214 int usedCB4[MAX_CBS_4x4]; | |
215 uint8_t unpacked_cb2[MAX_CBS_2x2*2*2*3]; | |
216 uint8_t unpacked_cb4[MAX_CBS_4x4*4*4*3]; | |
217 uint8_t unpacked_cb4_enlarged[MAX_CBS_4x4*8*8*3]; | |
8296 | 218 } RoqCodebooks; |
5184 | 219 |
220 /** | |
221 * Temporary vars | |
222 */ | |
8643 | 223 typedef struct RoqTempData |
5184 | 224 { |
8296 | 225 CelEvaluation *cel_evals; |
5184 | 226 |
227 int f2i4[MAX_CBS_4x4]; | |
228 int i2f4[MAX_CBS_4x4]; | |
229 int f2i2[MAX_CBS_2x2]; | |
230 int i2f2[MAX_CBS_2x2]; | |
231 | |
232 int mainChunkSize; | |
233 | |
234 int numCB4; | |
235 int numCB2; | |
236 | |
8296 | 237 RoqCodebooks codebooks; |
5184 | 238 |
239 int *closest_cb2; | |
240 int used_option[4]; | |
8296 | 241 } RoqTempdata; |
5184 | 242 |
243 /** | |
244 * Initializes cel evaluators and sets their source coordinates | |
245 */ | |
8296 | 246 static void create_cel_evals(RoqContext *enc, RoqTempdata *tempData) |
5184 | 247 { |
248 int n=0, x, y, i; | |
249 | |
8296 | 250 tempData->cel_evals = av_malloc(enc->width*enc->height/64 * sizeof(CelEvaluation)); |
5184 | 251 |
252 /* Map to the ROQ quadtree order */ | |
253 for (y=0; y<enc->height; y+=16) | |
254 for (x=0; x<enc->width; x+=16) | |
255 for(i=0; i<4; i++) { | |
256 tempData->cel_evals[n ].sourceX = x + (i&1)*8; | |
257 tempData->cel_evals[n++].sourceY = y + (i&2)*4; | |
258 } | |
259 } | |
260 | |
261 /** | |
262 * Get macroblocks from parts of the image | |
263 */ | |
264 static void get_frame_mb(AVFrame *frame, int x, int y, uint8_t mb[], int dim) | |
265 { | |
266 int i, j, cp; | |
267 | |
5191 | 268 for (cp=0; cp<3; cp++) { |
269 int stride = frame->linesize[cp]; | |
5184 | 270 for (i=0; i<dim; i++) |
271 for (j=0; j<dim; j++) | |
272 *mb++ = frame->data[cp][(y+i)*stride + x + j]; | |
5191 | 273 } |
5184 | 274 } |
275 | |
276 /** | |
277 * Find the codebook with the lowest distortion from an image | |
278 */ | |
279 static int index_mb(uint8_t cluster[], uint8_t cb[], int numCB, | |
280 int *outIndex, int dim) | |
281 { | |
282 int i, lDiff = INT_MAX, pick=0; | |
283 | |
284 /* Diff against the others */ | |
285 for (i=0; i<numCB; i++) { | |
286 int diff = squared_diff_macroblock(cluster, cb + i*dim*dim*3, dim); | |
287 if (diff < lDiff) { | |
288 lDiff = diff; | |
289 pick = i; | |
290 } | |
291 } | |
292 | |
293 *outIndex = pick; | |
294 return lDiff; | |
295 } | |
296 | |
297 #define EVAL_MOTION(MOTION) \ | |
298 do { \ | |
299 diff = eval_motion_dist(enc, j, i, MOTION, blocksize); \ | |
300 \ | |
301 if (diff < lowestdiff) { \ | |
302 lowestdiff = diff; \ | |
303 bestpick = MOTION; \ | |
304 } \ | |
305 } while(0) | |
306 | |
307 static void motion_search(RoqContext *enc, int blocksize) | |
308 { | |
309 static const motion_vect offsets[8] = { | |
310 {{ 0,-1}}, | |
311 {{ 0, 1}}, | |
312 {{-1, 0}}, | |
313 {{ 1, 0}}, | |
314 {{-1, 1}}, | |
315 {{ 1,-1}}, | |
316 {{-1,-1}}, | |
317 {{ 1, 1}}, | |
318 }; | |
319 | |
320 int diff, lowestdiff, oldbest; | |
321 int off[3]; | |
322 motion_vect bestpick = {{0,0}}; | |
323 int i, j, k, offset; | |
324 | |
325 motion_vect *last_motion; | |
326 motion_vect *this_motion; | |
327 motion_vect vect, vect2; | |
328 | |
329 int max=(enc->width/blocksize)*enc->height/blocksize; | |
330 | |
331 if (blocksize == 4) { | |
332 last_motion = enc->last_motion4; | |
333 this_motion = enc->this_motion4; | |
334 } else { | |
335 last_motion = enc->last_motion8; | |
336 this_motion = enc->this_motion8; | |
337 } | |
338 | |
339 for (i=0; i<enc->height; i+=blocksize) | |
340 for (j=0; j<enc->width; j+=blocksize) { | |
341 lowestdiff = eval_motion_dist(enc, j, i, (motion_vect) {{0,0}}, | |
342 blocksize); | |
343 bestpick.d[0] = 0; | |
344 bestpick.d[1] = 0; | |
345 | |
346 if (blocksize == 4) | |
347 EVAL_MOTION(enc->this_motion8[(i/8)*(enc->width/8) + j/8]); | |
348 | |
349 offset = (i/blocksize)*enc->width/blocksize + j/blocksize; | |
350 if (offset < max && offset >= 0) | |
351 EVAL_MOTION(last_motion[offset]); | |
352 | |
353 offset++; | |
354 if (offset < max && offset >= 0) | |
355 EVAL_MOTION(last_motion[offset]); | |
356 | |
357 offset = (i/blocksize + 1)*enc->width/blocksize + j/blocksize; | |
358 if (offset < max && offset >= 0) | |
359 EVAL_MOTION(last_motion[offset]); | |
360 | |
361 off[0]= (i/blocksize)*enc->width/blocksize + j/blocksize - 1; | |
362 off[1]= off[0] - enc->width/blocksize + 1; | |
363 off[2]= off[1] + 1; | |
364 | |
365 if (i) { | |
366 | |
367 for(k=0; k<2; k++) | |
368 vect.d[k]= mid_pred(this_motion[off[0]].d[k], | |
369 this_motion[off[1]].d[k], | |
370 this_motion[off[2]].d[k]); | |
371 | |
372 EVAL_MOTION(vect); | |
373 for(k=0; k<3; k++) | |
374 EVAL_MOTION(this_motion[off[k]]); | |
375 } else if(j) | |
376 EVAL_MOTION(this_motion[off[0]]); | |
377 | |
378 vect = bestpick; | |
379 | |
380 oldbest = -1; | |
381 while (oldbest != lowestdiff) { | |
382 oldbest = lowestdiff; | |
383 for (k=0; k<8; k++) { | |
384 vect2 = vect; | |
385 vect2.d[0] += offsets[k].d[0]; | |
386 vect2.d[1] += offsets[k].d[1]; | |
387 EVAL_MOTION(vect2); | |
388 } | |
389 vect = bestpick; | |
390 } | |
391 offset = (i/blocksize)*enc->width/blocksize + j/blocksize; | |
392 this_motion[offset] = bestpick; | |
393 } | |
394 } | |
395 | |
396 /** | |
397 * Gets distortion for all options available to a subcel | |
398 */ | |
8296 | 399 static void gather_data_for_subcel(SubcelEvaluation *subcel, int x, |
400 int y, RoqContext *enc, RoqTempdata *tempData) | |
5184 | 401 { |
402 uint8_t mb4[4*4*3]; | |
403 uint8_t mb2[2*2*3]; | |
404 int cluster_index; | |
405 int i, best_dist; | |
406 | |
407 static const int bitsUsed[4] = {2, 10, 10, 34}; | |
408 | |
409 if (enc->framesSinceKeyframe >= 1) { | |
410 subcel->motion = enc->this_motion4[y*enc->width/16 + x/4]; | |
411 | |
412 subcel->eval_dist[RoQ_ID_FCC] = | |
413 eval_motion_dist(enc, x, y, | |
414 enc->this_motion4[y*enc->width/16 + x/4], 4); | |
415 } else | |
416 subcel->eval_dist[RoQ_ID_FCC] = INT_MAX; | |
417 | |
418 if (enc->framesSinceKeyframe >= 2) | |
419 subcel->eval_dist[RoQ_ID_MOT] = block_sse(enc->frame_to_enc->data, | |
420 enc->current_frame->data, x, | |
5191 | 421 y, x, y, |
422 enc->frame_to_enc->linesize, | |
5198 | 423 enc->current_frame->linesize, |
5191 | 424 4); |
5184 | 425 else |
426 subcel->eval_dist[RoQ_ID_MOT] = INT_MAX; | |
427 | |
428 cluster_index = y*enc->width/16 + x/4; | |
429 | |
430 get_frame_mb(enc->frame_to_enc, x, y, mb4, 4); | |
431 | |
432 subcel->eval_dist[RoQ_ID_SLD] = index_mb(mb4, | |
433 tempData->codebooks.unpacked_cb4, | |
434 tempData->codebooks.numCB4, | |
435 &subcel->cbEntry, 4); | |
436 | |
437 subcel->eval_dist[RoQ_ID_CCC] = 0; | |
438 | |
439 for(i=0;i<4;i++) { | |
440 subcel->subCels[i] = tempData->closest_cb2[cluster_index*4+i]; | |
441 | |
442 get_frame_mb(enc->frame_to_enc, x+2*(i&1), | |
443 y+(i&2), mb2, 2); | |
444 | |
445 subcel->eval_dist[RoQ_ID_CCC] += | |
446 squared_diff_macroblock(tempData->codebooks.unpacked_cb2 + subcel->subCels[i]*2*2*3, mb2, 2); | |
447 } | |
448 | |
449 best_dist = INT_MAX; | |
450 for (i=0; i<4; i++) | |
451 if (ROQ_LAMBDA_SCALE*subcel->eval_dist[i] + enc->lambda*bitsUsed[i] < | |
452 best_dist) { | |
453 subcel->best_coding = i; | |
454 subcel->best_bit_use = bitsUsed[i]; | |
455 best_dist = ROQ_LAMBDA_SCALE*subcel->eval_dist[i] + | |
456 enc->lambda*bitsUsed[i]; | |
457 } | |
458 } | |
459 | |
460 /** | |
461 * Gets distortion for all options available to a cel | |
462 */ | |
8296 | 463 static void gather_data_for_cel(CelEvaluation *cel, RoqContext *enc, |
464 RoqTempdata *tempData) | |
5184 | 465 { |
466 uint8_t mb8[8*8*3]; | |
467 int index = cel->sourceY*enc->width/64 + cel->sourceX/8; | |
468 int i, j, best_dist, divide_bit_use; | |
469 | |
470 int bitsUsed[4] = {2, 10, 10, 0}; | |
471 | |
472 if (enc->framesSinceKeyframe >= 1) { | |
473 cel->motion = enc->this_motion8[index]; | |
474 | |
475 cel->eval_dist[RoQ_ID_FCC] = | |
476 eval_motion_dist(enc, cel->sourceX, cel->sourceY, | |
477 enc->this_motion8[index], 8); | |
478 } else | |
479 cel->eval_dist[RoQ_ID_FCC] = INT_MAX; | |
480 | |
481 if (enc->framesSinceKeyframe >= 2) | |
482 cel->eval_dist[RoQ_ID_MOT] = block_sse(enc->frame_to_enc->data, | |
483 enc->current_frame->data, | |
484 cel->sourceX, cel->sourceY, | |
485 cel->sourceX, cel->sourceY, | |
5198 | 486 enc->frame_to_enc->linesize, |
487 enc->current_frame->linesize,8); | |
5184 | 488 else |
489 cel->eval_dist[RoQ_ID_MOT] = INT_MAX; | |
490 | |
491 get_frame_mb(enc->frame_to_enc, cel->sourceX, cel->sourceY, mb8, 8); | |
492 | |
493 cel->eval_dist[RoQ_ID_SLD] = | |
494 index_mb(mb8, tempData->codebooks.unpacked_cb4_enlarged, | |
495 tempData->codebooks.numCB4, &cel->cbEntry, 8); | |
496 | |
497 gather_data_for_subcel(cel->subCels + 0, cel->sourceX+0, cel->sourceY+0, enc, tempData); | |
498 gather_data_for_subcel(cel->subCels + 1, cel->sourceX+4, cel->sourceY+0, enc, tempData); | |
499 gather_data_for_subcel(cel->subCels + 2, cel->sourceX+0, cel->sourceY+4, enc, tempData); | |
500 gather_data_for_subcel(cel->subCels + 3, cel->sourceX+4, cel->sourceY+4, enc, tempData); | |
501 | |
502 cel->eval_dist[RoQ_ID_CCC] = 0; | |
503 divide_bit_use = 0; | |
504 for (i=0; i<4; i++) { | |
505 cel->eval_dist[RoQ_ID_CCC] += | |
506 cel->subCels[i].eval_dist[cel->subCels[i].best_coding]; | |
507 divide_bit_use += cel->subCels[i].best_bit_use; | |
508 } | |
509 | |
510 best_dist = INT_MAX; | |
511 bitsUsed[3] = 2 + divide_bit_use; | |
512 | |
513 for (i=0; i<4; i++) | |
514 if (ROQ_LAMBDA_SCALE*cel->eval_dist[i] + enc->lambda*bitsUsed[i] < | |
515 best_dist) { | |
516 cel->best_coding = i; | |
517 best_dist = ROQ_LAMBDA_SCALE*cel->eval_dist[i] + | |
518 enc->lambda*bitsUsed[i]; | |
519 } | |
520 | |
521 tempData->used_option[cel->best_coding]++; | |
522 tempData->mainChunkSize += bitsUsed[cel->best_coding]; | |
523 | |
524 if (cel->best_coding == RoQ_ID_SLD) | |
525 tempData->codebooks.usedCB4[cel->cbEntry]++; | |
526 | |
527 if (cel->best_coding == RoQ_ID_CCC) | |
528 for (i=0; i<4; i++) { | |
529 if (cel->subCels[i].best_coding == RoQ_ID_SLD) | |
530 tempData->codebooks.usedCB4[cel->subCels[i].cbEntry]++; | |
531 else if (cel->subCels[i].best_coding == RoQ_ID_CCC) | |
532 for (j=0; j<4; j++) | |
533 tempData->codebooks.usedCB2[cel->subCels[i].subCels[j]]++; | |
534 } | |
535 } | |
536 | |
8296 | 537 static void remap_codebooks(RoqContext *enc, RoqTempdata *tempData) |
5184 | 538 { |
539 int i, j, idx=0; | |
540 | |
541 /* Make remaps for the final codebook usage */ | |
542 for (i=0; i<MAX_CBS_4x4; i++) { | |
543 if (tempData->codebooks.usedCB4[i]) { | |
544 tempData->i2f4[i] = idx; | |
545 tempData->f2i4[idx] = i; | |
546 for (j=0; j<4; j++) | |
547 tempData->codebooks.usedCB2[enc->cb4x4[i].idx[j]]++; | |
548 idx++; | |
549 } | |
550 } | |
551 | |
552 tempData->numCB4 = idx; | |
553 | |
554 idx = 0; | |
555 for (i=0; i<MAX_CBS_2x2; i++) { | |
556 if (tempData->codebooks.usedCB2[i]) { | |
557 tempData->i2f2[i] = idx; | |
558 tempData->f2i2[idx] = i; | |
559 idx++; | |
560 } | |
561 } | |
562 tempData->numCB2 = idx; | |
563 | |
564 } | |
565 | |
566 /** | |
567 * Write codebook chunk | |
568 */ | |
8296 | 569 static void write_codebooks(RoqContext *enc, RoqTempdata *tempData) |
5184 | 570 { |
571 int i, j; | |
572 uint8_t **outp= &enc->out_buf; | |
573 | |
574 if (tempData->numCB2) { | |
575 bytestream_put_le16(outp, RoQ_QUAD_CODEBOOK); | |
576 bytestream_put_le32(outp, tempData->numCB2*6 + tempData->numCB4*4); | |
577 bytestream_put_byte(outp, tempData->numCB4); | |
578 bytestream_put_byte(outp, tempData->numCB2); | |
579 | |
580 for (i=0; i<tempData->numCB2; i++) { | |
581 bytestream_put_buffer(outp, enc->cb2x2[tempData->f2i2[i]].y, 4); | |
582 bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].u); | |
583 bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].v); | |
584 } | |
585 | |
586 for (i=0; i<tempData->numCB4; i++) | |
587 for (j=0; j<4; j++) | |
588 bytestream_put_byte(outp, tempData->i2f2[enc->cb4x4[tempData->f2i4[i]].idx[j]]); | |
589 | |
590 } | |
591 } | |
592 | |
593 static inline uint8_t motion_arg(motion_vect mot) | |
594 { | |
595 uint8_t ax = 8 - ((uint8_t) mot.d[0]); | |
596 uint8_t ay = 8 - ((uint8_t) mot.d[1]); | |
597 return ((ax&15)<<4) | (ay&15); | |
598 } | |
599 | |
600 typedef struct | |
601 { | |
602 int typeSpool; | |
603 int typeSpoolLength; | |
604 uint8_t argumentSpool[64]; | |
605 uint8_t *args; | |
606 uint8_t **pout; | |
607 } CodingSpool; | |
608 | |
609 /* NOTE: Typecodes must be spooled AFTER arguments!! */ | |
610 static void write_typecode(CodingSpool *s, uint8_t type) | |
611 { | |
5205 | 612 s->typeSpool |= (type & 3) << (14 - s->typeSpoolLength); |
5184 | 613 s->typeSpoolLength += 2; |
614 if (s->typeSpoolLength == 16) { | |
615 bytestream_put_le16(s->pout, s->typeSpool); | |
616 bytestream_put_buffer(s->pout, s->argumentSpool, | |
617 s->args - s->argumentSpool); | |
618 s->typeSpoolLength = 0; | |
619 s->typeSpool = 0; | |
620 s->args = s->argumentSpool; | |
621 } | |
622 } | |
623 | |
8296 | 624 static void reconstruct_and_encode_image(RoqContext *enc, RoqTempdata *tempData, int w, int h, int numBlocks) |
5184 | 625 { |
626 int i, j, k; | |
627 int x, y; | |
628 int subX, subY; | |
629 int dist=0; | |
630 | |
631 roq_qcell *qcell; | |
8296 | 632 CelEvaluation *eval; |
5184 | 633 |
634 CodingSpool spool; | |
635 | |
636 spool.typeSpool=0; | |
637 spool.typeSpoolLength=0; | |
638 spool.args = spool.argumentSpool; | |
639 spool.pout = &enc->out_buf; | |
640 | |
641 if (tempData->used_option[RoQ_ID_CCC]%2) | |
642 tempData->mainChunkSize+=8; //FIXME | |
643 | |
644 /* Write the video chunk header */ | |
645 bytestream_put_le16(&enc->out_buf, RoQ_QUAD_VQ); | |
646 bytestream_put_le32(&enc->out_buf, tempData->mainChunkSize/8); | |
647 bytestream_put_byte(&enc->out_buf, 0x0); | |
648 bytestream_put_byte(&enc->out_buf, 0x0); | |
649 | |
650 for (i=0; i<numBlocks; i++) { | |
651 eval = tempData->cel_evals + i; | |
652 | |
653 x = eval->sourceX; | |
654 y = eval->sourceY; | |
655 dist += eval->eval_dist[eval->best_coding]; | |
656 | |
657 switch (eval->best_coding) { | |
658 case RoQ_ID_MOT: | |
659 write_typecode(&spool, RoQ_ID_MOT); | |
660 break; | |
661 | |
662 case RoQ_ID_FCC: | |
663 bytestream_put_byte(&spool.args, motion_arg(eval->motion)); | |
664 | |
665 write_typecode(&spool, RoQ_ID_FCC); | |
666 ff_apply_motion_8x8(enc, x, y, | |
667 eval->motion.d[0], eval->motion.d[1]); | |
668 break; | |
669 | |
670 case RoQ_ID_SLD: | |
671 bytestream_put_byte(&spool.args, tempData->i2f4[eval->cbEntry]); | |
672 write_typecode(&spool, RoQ_ID_SLD); | |
673 | |
674 qcell = enc->cb4x4 + eval->cbEntry; | |
675 ff_apply_vector_4x4(enc, x , y , enc->cb2x2 + qcell->idx[0]); | |
676 ff_apply_vector_4x4(enc, x+4, y , enc->cb2x2 + qcell->idx[1]); | |
677 ff_apply_vector_4x4(enc, x , y+4, enc->cb2x2 + qcell->idx[2]); | |
678 ff_apply_vector_4x4(enc, x+4, y+4, enc->cb2x2 + qcell->idx[3]); | |
679 break; | |
680 | |
681 case RoQ_ID_CCC: | |
682 write_typecode(&spool, RoQ_ID_CCC); | |
683 | |
684 for (j=0; j<4; j++) { | |
685 subX = x + 4*(j&1); | |
686 subY = y + 2*(j&2); | |
687 | |
688 switch(eval->subCels[j].best_coding) { | |
689 case RoQ_ID_MOT: | |
690 break; | |
691 | |
692 case RoQ_ID_FCC: | |
693 bytestream_put_byte(&spool.args, | |
694 motion_arg(eval->subCels[j].motion)); | |
695 | |
696 ff_apply_motion_4x4(enc, subX, subY, | |
697 eval->subCels[j].motion.d[0], | |
698 eval->subCels[j].motion.d[1]); | |
699 break; | |
700 | |
701 case RoQ_ID_SLD: | |
702 bytestream_put_byte(&spool.args, | |
703 tempData->i2f4[eval->subCels[j].cbEntry]); | |
704 | |
705 qcell = enc->cb4x4 + eval->subCels[j].cbEntry; | |
706 | |
707 ff_apply_vector_2x2(enc, subX , subY , | |
708 enc->cb2x2 + qcell->idx[0]); | |
709 ff_apply_vector_2x2(enc, subX+2, subY , | |
710 enc->cb2x2 + qcell->idx[1]); | |
711 ff_apply_vector_2x2(enc, subX , subY+2, | |
712 enc->cb2x2 + qcell->idx[2]); | |
713 ff_apply_vector_2x2(enc, subX+2, subY+2, | |
714 enc->cb2x2 + qcell->idx[3]); | |
715 break; | |
716 | |
717 case RoQ_ID_CCC: | |
718 for (k=0; k<4; k++) { | |
719 int cb_idx = eval->subCels[j].subCels[k]; | |
720 bytestream_put_byte(&spool.args, | |
721 tempData->i2f2[cb_idx]); | |
722 | |
723 ff_apply_vector_2x2(enc, subX + 2*(k&1), subY + (k&2), | |
724 enc->cb2x2 + cb_idx); | |
725 } | |
726 break; | |
727 } | |
728 write_typecode(&spool, eval->subCels[j].best_coding); | |
729 } | |
730 break; | |
731 } | |
732 } | |
733 | |
734 /* Flush the remainder of the argument/type spool */ | |
735 while (spool.typeSpoolLength) | |
736 write_typecode(&spool, 0x0); | |
737 | |
738 #if 0 | |
739 uint8_t *fdata[3] = {enc->frame_to_enc->data[0], | |
740 enc->frame_to_enc->data[1], | |
741 enc->frame_to_enc->data[2]}; | |
742 uint8_t *cdata[3] = {enc->current_frame->data[0], | |
743 enc->current_frame->data[1], | |
744 enc->current_frame->data[2]}; | |
745 av_log(enc->avctx, AV_LOG_ERROR, "Expected distortion: %i Actual: %i\n", | |
746 dist, | |
747 block_sse(fdata, cdata, 0, 0, 0, 0, | |
5198 | 748 enc->frame_to_enc->linesize, |
749 enc->current_frame->linesize, | |
750 enc->width)); //WARNING: Square dimensions implied... | |
5184 | 751 #endif |
752 } | |
753 | |
754 | |
755 /** | |
756 * Create a single YUV cell from a 2x2 section of the image | |
757 */ | |
758 static inline void frame_block_to_cell(uint8_t *block, uint8_t **data, | |
759 int top, int left, int *stride) | |
760 { | |
761 int i, j, u=0, v=0; | |
762 | |
763 for (i=0; i<2; i++) | |
764 for (j=0; j<2; j++) { | |
765 int x = (top+i)*stride[0] + left + j; | |
766 *block++ = data[0][x]; | |
5191 | 767 x = (top+i)*stride[1] + left + j; |
5184 | 768 u += data[1][x]; |
769 v += data[2][x]; | |
770 } | |
771 | |
772 *block++ = (u+2)/4; | |
773 *block++ = (v+2)/4; | |
774 } | |
775 | |
776 /** | |
777 * Creates YUV clusters for the entire image | |
778 */ | |
779 static void create_clusters(AVFrame *frame, int w, int h, uint8_t *yuvClusters) | |
780 { | |
781 int i, j, k, l; | |
782 | |
783 for (i=0; i<h; i+=4) | |
784 for (j=0; j<w; j+=4) { | |
785 for (k=0; k < 2; k++) | |
786 for (l=0; l < 2; l++) | |
787 frame_block_to_cell(yuvClusters + (l + 2*k)*6, frame->data, | |
788 i+2*k, j+2*l, frame->linesize); | |
789 yuvClusters += 24; | |
790 } | |
791 } | |
792 | |
8296 | 793 static void generate_codebook(RoqContext *enc, RoqTempdata *tempdata, |
5184 | 794 int *points, int inputCount, roq_cell *results, |
795 int size, int cbsize) | |
796 { | |
797 int i, j, k; | |
798 int c_size = size*size/4; | |
799 int *buf = points; | |
800 int *codebook = av_malloc(6*c_size*cbsize*sizeof(int)); | |
801 int *closest_cb; | |
802 | |
803 if (size == 4) | |
804 closest_cb = av_malloc(6*c_size*inputCount*sizeof(int)); | |
805 else | |
806 closest_cb = tempdata->closest_cb2; | |
807 | |
808 ff_init_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx); | |
809 ff_do_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx); | |
810 | |
811 if (size == 4) | |
812 av_free(closest_cb); | |
813 | |
814 buf = codebook; | |
815 for (i=0; i<cbsize; i++) | |
816 for (k=0; k<c_size; k++) { | |
817 for(j=0; j<4; j++) | |
818 results->y[j] = *buf++; | |
819 | |
820 results->u = (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS; | |
821 results->v = (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS; | |
822 results++; | |
823 } | |
824 | |
825 av_free(codebook); | |
826 } | |
827 | |
8296 | 828 static void generate_new_codebooks(RoqContext *enc, RoqTempdata *tempData) |
5184 | 829 { |
830 int i,j; | |
8296 | 831 RoqCodebooks *codebooks = &tempData->codebooks; |
5184 | 832 int max = enc->width*enc->height/16; |
833 uint8_t mb2[3*4]; | |
834 roq_cell *results4 = av_malloc(sizeof(roq_cell)*MAX_CBS_4x4*4); | |
835 uint8_t *yuvClusters=av_malloc(sizeof(int)*max*6*4); | |
836 int *points = av_malloc(max*6*4*sizeof(int)); | |
837 int bias; | |
838 | |
839 /* Subsample YUV data */ | |
840 create_clusters(enc->frame_to_enc, enc->width, enc->height, yuvClusters); | |
841 | |
842 /* Cast to integer and apply chroma bias */ | |
843 for (i=0; i<max*24; i++) { | |
844 bias = ((i%6)<4) ? 1 : CHROMA_BIAS; | |
845 points[i] = bias*yuvClusters[i]; | |
846 } | |
847 | |
848 /* Create 4x4 codebooks */ | |
849 generate_codebook(enc, tempData, points, max, results4, 4, MAX_CBS_4x4); | |
850 | |
851 codebooks->numCB4 = MAX_CBS_4x4; | |
852 | |
853 tempData->closest_cb2 = av_malloc(max*4*sizeof(int)); | |
854 | |
855 /* Create 2x2 codebooks */ | |
856 generate_codebook(enc, tempData, points, max*4, enc->cb2x2, 2, MAX_CBS_2x2); | |
857 | |
858 codebooks->numCB2 = MAX_CBS_2x2; | |
859 | |
860 /* Unpack 2x2 codebook clusters */ | |
861 for (i=0; i<codebooks->numCB2; i++) | |
862 unpack_roq_cell(enc->cb2x2 + i, codebooks->unpacked_cb2 + i*2*2*3); | |
863 | |
864 /* Index all 4x4 entries to the 2x2 entries, unpack, and enlarge */ | |
865 for (i=0; i<codebooks->numCB4; i++) { | |
866 for (j=0; j<4; j++) { | |
867 unpack_roq_cell(&results4[4*i + j], mb2); | |
868 index_mb(mb2, codebooks->unpacked_cb2, codebooks->numCB2, | |
869 &enc->cb4x4[i].idx[j], 2); | |
870 } | |
871 unpack_roq_qcell(codebooks->unpacked_cb2, enc->cb4x4 + i, | |
872 codebooks->unpacked_cb4 + i*4*4*3); | |
873 enlarge_roq_mb4(codebooks->unpacked_cb4 + i*4*4*3, | |
874 codebooks->unpacked_cb4_enlarged + i*8*8*3); | |
875 } | |
876 | |
877 av_free(yuvClusters); | |
878 av_free(points); | |
879 av_free(results4); | |
880 } | |
881 | |
882 static void roq_encode_video(RoqContext *enc) | |
883 { | |
8643 | 884 RoqTempdata *tempData = enc->tmpData; |
5184 | 885 int i; |
886 | |
8643 | 887 memset(tempData, 0, sizeof(*tempData)); |
5184 | 888 |
8643 | 889 create_cel_evals(enc, tempData); |
5184 | 890 |
8643 | 891 generate_new_codebooks(enc, tempData); |
5184 | 892 |
893 if (enc->framesSinceKeyframe >= 1) { | |
894 motion_search(enc, 8); | |
895 motion_search(enc, 4); | |
896 } | |
897 | |
898 retry_encode: | |
899 for (i=0; i<enc->width*enc->height/64; i++) | |
8643 | 900 gather_data_for_cel(tempData->cel_evals + i, enc, tempData); |
5184 | 901 |
902 /* Quake 3 can't handle chunks bigger than 65536 bytes */ | |
8643 | 903 if (tempData->mainChunkSize/8 > 65536) { |
5184 | 904 enc->lambda *= .8; |
905 goto retry_encode; | |
906 } | |
907 | |
8643 | 908 remap_codebooks(enc, tempData); |
5184 | 909 |
8643 | 910 write_codebooks(enc, tempData); |
5184 | 911 |
8643 | 912 reconstruct_and_encode_image(enc, tempData, enc->width, enc->height, |
5184 | 913 enc->width*enc->height/64); |
914 | |
7308
29319d07aeb4
Set avctx->coded_frame in RoQ encoder. At some point in
vitor
parents:
7040
diff
changeset
|
915 enc->avctx->coded_frame = enc->current_frame; |
29319d07aeb4
Set avctx->coded_frame in RoQ encoder. At some point in
vitor
parents:
7040
diff
changeset
|
916 |
5184 | 917 /* Rotate frame history */ |
918 FFSWAP(AVFrame *, enc->current_frame, enc->last_frame); | |
919 FFSWAP(motion_vect *, enc->last_motion4, enc->this_motion4); | |
920 FFSWAP(motion_vect *, enc->last_motion8, enc->this_motion8); | |
921 | |
8643 | 922 av_free(tempData->cel_evals); |
923 av_free(tempData->closest_cb2); | |
5184 | 924 |
925 enc->framesSinceKeyframe++; | |
926 } | |
927 | |
928 static int roq_encode_init(AVCodecContext *avctx) | |
929 { | |
930 RoqContext *enc = avctx->priv_data; | |
931 | |
8628
e49a2e1f0020
Replace calls to the deprecated function av_init_random() with
stefano
parents:
8627
diff
changeset
|
932 av_random_init(&enc->randctx, 1); |
5184 | 933 |
934 enc->framesSinceKeyframe = 0; | |
935 if ((avctx->width & 0xf) || (avctx->height & 0xf)) { | |
936 av_log(avctx, AV_LOG_ERROR, "Dimensions must be divisible by 16\n"); | |
937 return -1; | |
938 } | |
939 | |
940 if (((avctx->width)&(avctx->width-1))||((avctx->height)&(avctx->height-1))) | |
941 av_log(avctx, AV_LOG_ERROR, "Warning: dimensions not power of two\n"); | |
942 | |
943 if (avcodec_check_dimensions(avctx, avctx->width, avctx->height)) { | |
944 av_log(avctx, AV_LOG_ERROR, "Invalid dimensions (%dx%d)\n", | |
945 avctx->width, avctx->height); | |
946 return -1; | |
947 } | |
948 | |
949 enc->width = avctx->width; | |
950 enc->height = avctx->height; | |
951 | |
952 enc->framesSinceKeyframe = 0; | |
953 enc->first_frame = 1; | |
954 | |
955 enc->last_frame = &enc->frames[0]; | |
956 enc->current_frame = &enc->frames[1]; | |
957 | |
8643 | 958 enc->tmpData = av_malloc(sizeof(RoqTempdata)); |
959 | |
5184 | 960 enc->this_motion4 = |
961 av_mallocz((enc->width*enc->height/16)*sizeof(motion_vect)); | |
962 | |
963 enc->last_motion4 = | |
964 av_malloc ((enc->width*enc->height/16)*sizeof(motion_vect)); | |
965 | |
966 enc->this_motion8 = | |
967 av_mallocz((enc->width*enc->height/64)*sizeof(motion_vect)); | |
968 | |
969 enc->last_motion8 = | |
970 av_malloc ((enc->width*enc->height/64)*sizeof(motion_vect)); | |
971 | |
972 return 0; | |
973 } | |
974 | |
975 static void roq_write_video_info_chunk(RoqContext *enc) | |
976 { | |
977 /* ROQ info chunk */ | |
978 bytestream_put_le16(&enc->out_buf, RoQ_INFO); | |
979 | |
980 /* Size: 8 bytes */ | |
981 bytestream_put_le32(&enc->out_buf, 8); | |
982 | |
983 /* Unused argument */ | |
984 bytestream_put_byte(&enc->out_buf, 0x00); | |
985 bytestream_put_byte(&enc->out_buf, 0x00); | |
986 | |
987 /* Width */ | |
988 bytestream_put_le16(&enc->out_buf, enc->width); | |
989 | |
990 /* Height */ | |
991 bytestream_put_le16(&enc->out_buf, enc->height); | |
992 | |
993 /* Unused in Quake 3, mimics the output of the real encoder */ | |
994 bytestream_put_byte(&enc->out_buf, 0x08); | |
995 bytestream_put_byte(&enc->out_buf, 0x00); | |
996 bytestream_put_byte(&enc->out_buf, 0x04); | |
997 bytestream_put_byte(&enc->out_buf, 0x00); | |
998 } | |
999 | |
1000 static int roq_encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data) | |
1001 { | |
1002 RoqContext *enc = avctx->priv_data; | |
1003 AVFrame *frame= data; | |
1004 uint8_t *buf_start = buf; | |
1005 | |
1006 enc->out_buf = buf; | |
1007 enc->avctx = avctx; | |
1008 | |
1009 enc->frame_to_enc = frame; | |
1010 | |
1011 if (frame->quality) | |
1012 enc->lambda = frame->quality - 1; | |
1013 else | |
1014 enc->lambda = 2*ROQ_LAMBDA_SCALE; | |
1015 | |
1016 /* 138 bits max per 8x8 block + | |
1017 * 256 codebooks*(6 bytes 2x2 + 4 bytes 4x4) + 8 bytes frame header */ | |
1018 if (((enc->width*enc->height/64)*138+7)/8 + 256*(6+4) + 8 > buf_size) { | |
1019 av_log(avctx, AV_LOG_ERROR, " RoQ: Output buffer too small!\n"); | |
1020 return -1; | |
1021 } | |
1022 | |
1023 /* Check for I frame */ | |
1024 if (enc->framesSinceKeyframe == avctx->gop_size) | |
1025 enc->framesSinceKeyframe = 0; | |
1026 | |
1027 if (enc->first_frame) { | |
1028 /* Alloc memory for the reconstruction data (we must know the stride | |
1029 for that) */ | |
1030 if (avctx->get_buffer(avctx, enc->current_frame) || | |
1031 avctx->get_buffer(avctx, enc->last_frame)) { | |
1032 av_log(avctx, AV_LOG_ERROR, " RoQ: get_buffer() failed\n"); | |
1033 return -1; | |
1034 } | |
1035 | |
1036 /* Before the first video frame, write a "video info" chunk */ | |
1037 roq_write_video_info_chunk(enc); | |
1038 | |
1039 enc->first_frame = 0; | |
1040 } | |
1041 | |
1042 /* Encode the actual frame */ | |
1043 roq_encode_video(enc); | |
1044 | |
1045 return enc->out_buf - buf_start; | |
1046 } | |
1047 | |
1048 static int roq_encode_end(AVCodecContext *avctx) | |
1049 { | |
1050 RoqContext *enc = avctx->priv_data; | |
1051 | |
1052 avctx->release_buffer(avctx, enc->last_frame); | |
1053 avctx->release_buffer(avctx, enc->current_frame); | |
1054 | |
8643 | 1055 av_free(enc->tmpData); |
5184 | 1056 av_free(enc->this_motion4); |
1057 av_free(enc->last_motion4); | |
1058 av_free(enc->this_motion8); | |
1059 av_free(enc->last_motion8); | |
1060 | |
1061 return 0; | |
1062 } | |
1063 | |
1064 AVCodec roq_encoder = | |
1065 { | |
1066 "roqvideo", | |
1067 CODEC_TYPE_VIDEO, | |
1068 CODEC_ID_ROQ, | |
1069 sizeof(RoqContext), | |
1070 roq_encode_init, | |
1071 roq_encode_frame, | |
1072 roq_encode_end, | |
1073 .supported_framerates = (AVRational[]){{30,1}, {0,0}}, | |
6788 | 1074 .pix_fmts = (enum PixelFormat[]){PIX_FMT_YUV444P, PIX_FMT_NONE}, |
7040
e943e1409077
Make AVCodec long_names definition conditional depending on CONFIG_SMALL.
stefano
parents:
6812
diff
changeset
|
1075 .long_name = NULL_IF_CONFIG_SMALL("id RoQ video"), |
5184 | 1076 }; |