Mercurial > libavcodec.hg
annotate msmpeg4.c @ 204:fceb435fae6b libavcodec
(commit by michael)
16-bit divide instead of 32-bit on x86 in msmpeg_pred_dc()
author | arpi_esp |
---|---|
date | Mon, 14 Jan 2002 04:34:52 +0000 |
parents | 91ed656e7339 |
children | 2eb04d6be309 |
rev | line source |
---|---|
0 | 1 /* |
2 * MSMPEG4 backend for ffmpeg encoder and decoder | |
3 * Copyright (c) 2001 Gerard Lantau. | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
18 */ | |
19 #include <stdlib.h> | |
20 #include <stdio.h> | |
21 #include "common.h" | |
22 #include "dsputil.h" | |
23 #include "mpegvideo.h" | |
24 | |
25 /* | |
26 * You can also call this codec : MPEG4 with a twist ! | |
27 * | |
28 * TODO: | |
29 * - (encoding) select best mv table (two choices) | |
30 * - (encoding) select best vlc/dc table | |
31 * - (decoding) handle slice indication | |
32 */ | |
33 //#define DEBUG | |
34 | |
35 /* motion vector table */ | |
36 typedef struct MVTable { | |
37 int n; | |
38 const UINT16 *table_mv_code; | |
39 const UINT8 *table_mv_bits; | |
40 const UINT8 *table_mvx; | |
41 const UINT8 *table_mvy; | |
42 UINT16 *table_mv_index; /* encoding: convert mv to index in table_mv */ | |
43 VLC vlc; /* decoding: vlc */ | |
44 } MVTable; | |
45 | |
46 static void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n); | |
47 static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |
48 int n, int coded); | |
49 static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr); | |
50 static int msmpeg4_decode_motion(MpegEncContext * s, | |
51 int *mx_ptr, int *my_ptr); | |
52 | |
53 #ifdef DEBUG | |
54 int intra_count = 0; | |
55 int frame_count = 0; | |
56 #endif | |
57 /* XXX: move it to mpegvideo.h */ | |
58 | |
59 static int init_done = 0; | |
60 | |
61 #include "msmpeg4data.h" | |
62 | |
63 #ifdef STATS | |
64 | |
65 const char *st_names[ST_NB] = { | |
66 "unknown", | |
67 "dc", | |
68 "intra_ac", | |
69 "inter_ac", | |
70 "intra_mb", | |
71 "inter_mb", | |
72 "mv", | |
73 }; | |
74 | |
75 int st_current_index = 0; | |
76 unsigned int st_bit_counts[ST_NB]; | |
77 unsigned int st_out_bit_counts[ST_NB]; | |
78 | |
79 #define set_stat(var) st_current_index = var; | |
80 | |
81 void print_stats(void) | |
82 { | |
83 unsigned int total; | |
84 int i; | |
85 | |
86 printf("Input:\n"); | |
87 total = 0; | |
88 for(i=0;i<ST_NB;i++) | |
89 total += st_bit_counts[i]; | |
90 if (total == 0) | |
91 total = 1; | |
92 for(i=0;i<ST_NB;i++) { | |
93 printf("%-10s : %10.1f %5.1f%%\n", | |
94 st_names[i], | |
95 (double)st_bit_counts[i] / 8.0, | |
96 (double)st_bit_counts[i] * 100.0 / total); | |
97 } | |
98 printf("%-10s : %10.1f %5.1f%%\n", | |
99 "total", | |
100 (double)total / 8.0, | |
101 100.0); | |
102 | |
103 printf("Output:\n"); | |
104 total = 0; | |
105 for(i=0;i<ST_NB;i++) | |
106 total += st_out_bit_counts[i]; | |
107 if (total == 0) | |
108 total = 1; | |
109 for(i=0;i<ST_NB;i++) { | |
110 printf("%-10s : %10.1f %5.1f%%\n", | |
111 st_names[i], | |
112 (double)st_out_bit_counts[i] / 8.0, | |
113 (double)st_out_bit_counts[i] * 100.0 / total); | |
114 } | |
115 printf("%-10s : %10.1f %5.1f%%\n", | |
116 "total", | |
117 (double)total / 8.0, | |
118 100.0); | |
119 } | |
120 | |
121 #else | |
122 | |
123 #define set_stat(var) | |
124 | |
125 #endif | |
126 | |
127 /* build the table which associate a (x,y) motion vector to a vlc */ | |
128 static void init_mv_table(MVTable *tab) | |
129 { | |
130 int i, x, y; | |
131 | |
132 tab->table_mv_index = malloc(sizeof(UINT16) * 4096); | |
133 /* mark all entries as not used */ | |
134 for(i=0;i<4096;i++) | |
135 tab->table_mv_index[i] = tab->n; | |
136 | |
137 for(i=0;i<tab->n;i++) { | |
138 x = tab->table_mvx[i]; | |
139 y = tab->table_mvy[i]; | |
140 tab->table_mv_index[(x << 6) | y] = i; | |
141 } | |
142 } | |
143 | |
144 static void code012(PutBitContext *pb, int n) | |
145 { | |
146 if (n == 0) { | |
147 put_bits(pb, 1, 0); | |
148 } else { | |
149 put_bits(pb, 1, 1); | |
150 put_bits(pb, 1, (n >= 2)); | |
151 } | |
152 } | |
153 | |
154 /* write MSMPEG4 V3 compatible frame header */ | |
155 void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number) | |
156 { | |
157 int i; | |
158 | |
159 align_put_bits(&s->pb); | |
160 | |
161 put_bits(&s->pb, 2, s->pict_type - 1); | |
162 | |
163 put_bits(&s->pb, 5, s->qscale); | |
164 | |
165 s->rl_table_index = 2; | |
166 s->rl_chroma_table_index = 1; /* only for I frame */ | |
167 s->dc_table_index = 1; | |
168 s->mv_table_index = 1; /* only if P frame */ | |
169 s->use_skip_mb_code = 1; /* only if P frame */ | |
170 | |
171 if (s->pict_type == I_TYPE) { | |
172 put_bits(&s->pb, 5, 0x17); /* indicate only one "slice" */ | |
173 | |
174 code012(&s->pb, s->rl_chroma_table_index); | |
175 code012(&s->pb, s->rl_table_index); | |
176 | |
177 put_bits(&s->pb, 1, s->dc_table_index); | |
178 s->no_rounding = 1; | |
179 } else { | |
180 put_bits(&s->pb, 1, s->use_skip_mb_code); | |
181 | |
182 s->rl_chroma_table_index = s->rl_table_index; | |
183 code012(&s->pb, s->rl_table_index); | |
184 | |
185 put_bits(&s->pb, 1, s->dc_table_index); | |
186 | |
187 put_bits(&s->pb, 1, s->mv_table_index); | |
188 s->no_rounding ^= 1; | |
189 } | |
190 | |
191 if (!init_done) { | |
192 /* init various encoding tables */ | |
193 init_done = 1; | |
194 init_mv_table(&mv_tables[0]); | |
195 init_mv_table(&mv_tables[1]); | |
196 for(i=0;i<NB_RL_TABLES;i++) | |
197 init_rl(&rl_table[i]); | |
198 } | |
199 | |
200 #ifdef DEBUG | |
201 intra_count = 0; | |
202 printf("*****frame %d:\n", frame_count++); | |
203 #endif | |
204 } | |
205 | |
206 /* predict coded block */ | |
207 static inline int coded_block_pred(MpegEncContext * s, int n, UINT8 **coded_block_ptr) | |
208 { | |
209 int x, y, wrap, pred, a, b, c; | |
210 | |
211 x = 2 * s->mb_x + 1 + (n & 1); | |
212 y = 2 * s->mb_y + 1 + ((n & 2) >> 1); | |
213 wrap = s->mb_width * 2 + 2; | |
214 | |
215 /* B C | |
216 * A X | |
217 */ | |
218 a = s->coded_block[(x - 1) + (y) * wrap]; | |
219 b = s->coded_block[(x - 1) + (y - 1) * wrap]; | |
220 c = s->coded_block[(x) + (y - 1) * wrap]; | |
221 | |
222 if (b == c) { | |
223 pred = a; | |
224 } else { | |
225 pred = c; | |
226 } | |
227 | |
228 /* store value */ | |
229 *coded_block_ptr = &s->coded_block[(x) + (y) * wrap]; | |
230 | |
231 return pred; | |
232 } | |
233 | |
234 static void msmpeg4_encode_motion(MpegEncContext * s, | |
235 int mx, int my) | |
236 { | |
237 int code; | |
238 MVTable *mv; | |
239 | |
240 /* modulo encoding */ | |
241 /* WARNING : you cannot reach all the MVs even with the modulo | |
242 encoding. This is a somewhat strange compromise they took !!! */ | |
243 if (mx <= -64) | |
244 mx += 64; | |
245 else if (mx >= 64) | |
246 mx -= 64; | |
247 if (my <= -64) | |
248 my += 64; | |
249 else if (my >= 64) | |
250 my -= 64; | |
251 | |
252 mx += 32; | |
253 my += 32; | |
254 #if 0 | |
255 if ((unsigned)mx >= 64 || | |
256 (unsigned)my >= 64) | |
257 fprintf(stderr, "error mx=%d my=%d\n", mx, my); | |
258 #endif | |
259 mv = &mv_tables[s->mv_table_index]; | |
260 | |
261 code = mv->table_mv_index[(mx << 6) | my]; | |
262 set_stat(ST_MV); | |
263 put_bits(&s->pb, | |
264 mv->table_mv_bits[code], | |
265 mv->table_mv_code[code]); | |
266 if (code == mv->n) { | |
267 /* escape : code litterally */ | |
268 put_bits(&s->pb, 6, mx); | |
269 put_bits(&s->pb, 6, my); | |
270 } | |
271 } | |
272 | |
273 void msmpeg4_encode_mb(MpegEncContext * s, | |
274 DCTELEM block[6][64], | |
275 int motion_x, int motion_y) | |
276 { | |
277 int cbp, coded_cbp, i; | |
278 int pred_x, pred_y; | |
279 UINT8 *coded_block; | |
280 | |
281 if (!s->mb_intra) { | |
282 /* compute cbp */ | |
283 set_stat(ST_INTER_MB); | |
284 cbp = 0; | |
285 for (i = 0; i < 6; i++) { | |
286 if (s->block_last_index[i] >= 0) | |
287 cbp |= 1 << (5 - i); | |
288 } | |
289 if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) { | |
290 /* skip macroblock */ | |
291 put_bits(&s->pb, 1, 1); | |
292 return; | |
293 } | |
294 if (s->use_skip_mb_code) | |
295 put_bits(&s->pb, 1, 0); /* mb coded */ | |
296 | |
297 put_bits(&s->pb, | |
298 table_mb_non_intra[cbp + 64][1], | |
299 table_mb_non_intra[cbp + 64][0]); | |
300 | |
301 /* motion vector */ | |
302 h263_pred_motion(s, 0, &pred_x, &pred_y); | |
303 msmpeg4_encode_motion(s, motion_x - pred_x, | |
304 motion_y - pred_y); | |
305 } else { | |
306 /* compute cbp */ | |
307 cbp = 0; | |
308 coded_cbp = 0; | |
309 for (i = 0; i < 6; i++) { | |
310 int val, pred; | |
311 val = (s->block_last_index[i] >= 1); | |
312 cbp |= val << (5 - i); | |
313 if (i < 4) { | |
314 /* predict value for close blocks only for luma */ | |
315 pred = coded_block_pred(s, i, &coded_block); | |
316 *coded_block = val; | |
317 val = val ^ pred; | |
318 } | |
319 coded_cbp |= val << (5 - i); | |
320 } | |
321 #if 0 | |
322 if (coded_cbp) | |
323 printf("cbp=%x %x\n", cbp, coded_cbp); | |
324 #endif | |
325 | |
326 if (s->pict_type == I_TYPE) { | |
327 set_stat(ST_INTRA_MB); | |
328 put_bits(&s->pb, | |
329 table_mb_intra[coded_cbp][1], table_mb_intra[coded_cbp][0]); | |
330 } else { | |
331 if (s->use_skip_mb_code) | |
332 put_bits(&s->pb, 1, 0); /* mb coded */ | |
333 put_bits(&s->pb, | |
334 table_mb_non_intra[cbp][1], | |
335 table_mb_non_intra[cbp][0]); | |
336 } | |
337 set_stat(ST_INTRA_MB); | |
338 put_bits(&s->pb, 1, 0); /* no AC prediction yet */ | |
339 } | |
340 | |
341 for (i = 0; i < 6; i++) { | |
342 msmpeg4_encode_block(s, block[i], i); | |
343 } | |
344 } | |
345 | |
346 | |
347 /* strongly inspirated from MPEG4, but not exactly the same ! */ | |
348 void msmpeg4_dc_scale(MpegEncContext * s) | |
349 { | |
195
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
350 if (s->qscale < 5){ |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
351 s->y_dc_scale = 8; |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
352 s->c_dc_scale = 8; |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
353 // s->c_dc_scale = (s->qscale + 13)>>1; |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
354 }else if (s->qscale < 9){ |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
355 s->y_dc_scale = 2 * s->qscale; |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
356 s->c_dc_scale = (s->qscale + 13)>>1; |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
357 }else{ |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
358 s->y_dc_scale = s->qscale + 8; |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
359 s->c_dc_scale = (s->qscale + 13)>>1; |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
360 } |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
361 // this differs for quant >24 from mpeg4 |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
362 |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
363 // if(s->qscale==13) s->c_dc_scale=14; |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
364 |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
365 // if(s->qscale>=6) |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
366 // printf("%d", s->qscale); |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
367 |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
368 /* s->c_dc_scale values (found by Michael Nidermayer) |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
369 qscale=2 -> 8 (yes iam sure about that) |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
370 qscale=3 -> 8 |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
371 qscale=4 -> 8 |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
372 qscale=5 -> 9 |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
373 qscale=6 -> 9 |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
374 qscale=7 -> 10 |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
375 qscale=8 -> 10 |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
376 qscale=9 -> 11 |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
377 qscale=10-> 11 |
92f726205082
s->c_dc_scale was 7 if s->qscale==2 but should be 8 (the bug is visible in deep red areas in high bitrate clips) - patch by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents:
186
diff
changeset
|
378 */ |
0 | 379 } |
380 | |
381 /* dir = 0: left, dir = 1: top prediction */ | |
382 static int msmpeg4_pred_dc(MpegEncContext * s, int n, | |
25 | 383 INT16 **dc_val_ptr, int *dir_ptr) |
0 | 384 { |
385 int a, b, c, x, y, wrap, pred, scale; | |
25 | 386 INT16 *dc_val; |
0 | 387 |
388 /* find prediction */ | |
389 if (n < 4) { | |
390 x = 2 * s->mb_x + 1 + (n & 1); | |
391 y = 2 * s->mb_y + 1 + ((n & 2) >> 1); | |
392 wrap = s->mb_width * 2 + 2; | |
393 dc_val = s->dc_val[0]; | |
394 scale = s->y_dc_scale; | |
395 } else { | |
396 x = s->mb_x + 1; | |
397 y = s->mb_y + 1; | |
398 wrap = s->mb_width + 2; | |
399 dc_val = s->dc_val[n - 4 + 1]; | |
400 scale = s->c_dc_scale; | |
401 } | |
402 | |
403 /* B C | |
404 * A X | |
405 */ | |
406 a = dc_val[(x - 1) + (y) * wrap]; | |
407 b = dc_val[(x - 1) + (y - 1) * wrap]; | |
408 c = dc_val[(x) + (y - 1) * wrap]; | |
409 | |
410 /* XXX: the following solution consumes divisions, but it does not | |
411 necessitate to modify mpegvideo.c. The problem comes from the | |
412 fact they decided to store the quantized DC (which would lead | |
413 to problems if Q could vary !) */ | |
204 | 414 #ifdef ARCH_X86 |
415 /* using 16bit divisions as they are large enough and 2x as fast */ | |
416 asm volatile( | |
417 "movl %3, %%eax \n\t" | |
418 "shrl $1, %%eax \n\t" | |
419 "addl %%eax, %2 \n\t" | |
420 "addl %%eax, %1 \n\t" | |
421 "addl %0, %%eax \n\t" | |
422 "xorl %%edx, %%edx \n\t" | |
423 "divw %w3 \n\t" | |
424 "movzwl %%ax, %0 \n\t" | |
425 "movl %1, %%eax \n\t" | |
426 "xorl %%edx, %%edx \n\t" | |
427 "divw %w3 \n\t" | |
428 "movzwl %%ax, %1 \n\t" | |
429 "movl %2, %%eax \n\t" | |
430 "xorl %%edx, %%edx \n\t" | |
431 "divw %w3 \n\t" | |
432 "movzwl %%ax, %2 \n\t" | |
433 : "+r" (a), "+r" (b), "+r" (c) | |
434 : "r" (scale) | |
435 : "%eax", "%edx" | |
436 ); | |
437 #else | |
0 | 438 a = (a + (scale >> 1)) / scale; |
439 b = (b + (scale >> 1)) / scale; | |
440 c = (c + (scale >> 1)) / scale; | |
204 | 441 #endif |
0 | 442 /* XXX: WARNING: they did not choose the same test as MPEG4. This |
443 is very important ! */ | |
444 if (abs(a - b) <= abs(b - c)) { | |
445 pred = c; | |
446 *dir_ptr = 1; | |
447 } else { | |
448 pred = a; | |
449 *dir_ptr = 0; | |
450 } | |
451 | |
452 /* update predictor */ | |
453 *dc_val_ptr = &dc_val[(x) + (y) * wrap]; | |
454 return pred; | |
455 } | |
456 | |
457 #define DC_MAX 119 | |
458 | |
459 static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr) | |
460 { | |
461 int sign, code; | |
462 int pred; | |
25 | 463 INT16 *dc_val; |
0 | 464 |
465 pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr); | |
466 | |
467 /* update predictor */ | |
468 if (n < 4) { | |
469 *dc_val = level * s->y_dc_scale; | |
470 } else { | |
471 *dc_val = level * s->c_dc_scale; | |
472 } | |
473 | |
474 /* do the prediction */ | |
475 level -= pred; | |
476 | |
477 sign = 0; | |
478 if (level < 0) { | |
479 level = -level; | |
480 sign = 1; | |
481 } | |
482 | |
483 code = level; | |
484 if (code > DC_MAX) | |
485 code = DC_MAX; | |
486 | |
487 if (s->dc_table_index == 0) { | |
488 if (n < 4) { | |
489 put_bits(&s->pb, table0_dc_lum[code][1], table0_dc_lum[code][0]); | |
490 } else { | |
491 put_bits(&s->pb, table0_dc_chroma[code][1], table0_dc_chroma[code][0]); | |
492 } | |
493 } else { | |
494 if (n < 4) { | |
495 put_bits(&s->pb, table1_dc_lum[code][1], table1_dc_lum[code][0]); | |
496 } else { | |
497 put_bits(&s->pb, table1_dc_chroma[code][1], table1_dc_chroma[code][0]); | |
498 } | |
499 } | |
500 | |
501 if (code == DC_MAX) | |
502 put_bits(&s->pb, 8, level); | |
503 | |
504 if (level != 0) { | |
505 put_bits(&s->pb, 1, sign); | |
506 } | |
507 } | |
508 | |
509 /* Encoding of a block. Very similar to MPEG4 except for a different | |
510 escape coding (same as H263) and more vlc tables. | |
511 */ | |
512 static void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n) | |
513 { | |
514 int level, run, last, i, j, last_index; | |
515 int last_non_zero, sign, slevel; | |
516 int code, run_diff, dc_pred_dir; | |
517 const RLTable *rl; | |
518 | |
519 if (s->mb_intra) { | |
520 set_stat(ST_DC); | |
521 msmpeg4_encode_dc(s, block[0], n, &dc_pred_dir); | |
522 i = 1; | |
523 if (n < 4) { | |
524 rl = &rl_table[s->rl_table_index]; | |
525 } else { | |
526 rl = &rl_table[3 + s->rl_chroma_table_index]; | |
527 } | |
528 run_diff = 0; | |
529 set_stat(ST_INTRA_AC); | |
530 } else { | |
531 i = 0; | |
532 rl = &rl_table[3 + s->rl_table_index]; | |
533 run_diff = 1; | |
534 set_stat(ST_INTER_AC); | |
535 } | |
536 | |
537 /* AC coefs */ | |
538 last_index = s->block_last_index[n]; | |
539 last_non_zero = i - 1; | |
540 for (; i <= last_index; i++) { | |
541 j = zigzag_direct[i]; | |
542 level = block[j]; | |
543 if (level) { | |
544 run = i - last_non_zero - 1; | |
545 last = (i == last_index); | |
546 sign = 0; | |
547 slevel = level; | |
548 if (level < 0) { | |
549 sign = 1; | |
550 level = -level; | |
551 } | |
552 code = get_rl_index(rl, last, run, level); | |
553 put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); | |
554 if (code == rl->n) { | |
555 int level1, run1; | |
556 | |
557 level1 = level - rl->max_level[last][run]; | |
558 if (level1 < 1) | |
559 goto esc2; | |
560 code = get_rl_index(rl, last, run, level1); | |
561 if (code == rl->n) { | |
562 esc2: | |
563 put_bits(&s->pb, 1, 0); | |
564 if (level > MAX_LEVEL) | |
565 goto esc3; | |
566 run1 = run - rl->max_run[last][level] - run_diff; | |
567 if (run1 < 0) | |
568 goto esc3; | |
569 code = get_rl_index(rl, last, run1, level); | |
570 if (code == rl->n) { | |
571 esc3: | |
572 /* third escape */ | |
573 put_bits(&s->pb, 1, 0); | |
574 put_bits(&s->pb, 1, last); | |
575 put_bits(&s->pb, 6, run); | |
576 put_bits(&s->pb, 8, slevel & 0xff); | |
577 } else { | |
578 /* second escape */ | |
579 put_bits(&s->pb, 1, 1); | |
580 put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); | |
581 put_bits(&s->pb, 1, sign); | |
582 } | |
583 } else { | |
584 /* first escape */ | |
585 put_bits(&s->pb, 1, 1); | |
586 put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); | |
587 put_bits(&s->pb, 1, sign); | |
588 } | |
589 } else { | |
590 put_bits(&s->pb, 1, sign); | |
591 } | |
592 last_non_zero = i; | |
593 } | |
594 } | |
595 } | |
596 | |
597 /****************************************/ | |
598 /* decoding stuff */ | |
599 | |
600 static VLC mb_non_intra_vlc; | |
601 static VLC mb_intra_vlc; | |
602 static VLC dc_lum_vlc[2]; | |
603 static VLC dc_chroma_vlc[2]; | |
604 | |
605 /* init all vlc decoding tables */ | |
606 int msmpeg4_decode_init_vlc(MpegEncContext *s) | |
607 { | |
608 int i; | |
609 MVTable *mv; | |
610 | |
611 for(i=0;i<NB_RL_TABLES;i++) { | |
612 init_rl(&rl_table[i]); | |
613 init_vlc_rl(&rl_table[i]); | |
614 } | |
615 for(i=0;i<2;i++) { | |
616 mv = &mv_tables[i]; | |
617 init_vlc(&mv->vlc, 9, mv->n + 1, | |
618 mv->table_mv_bits, 1, 1, | |
619 mv->table_mv_code, 2, 2); | |
620 } | |
621 | |
622 init_vlc(&dc_lum_vlc[0], 9, 120, | |
623 &table0_dc_lum[0][1], 8, 4, | |
624 &table0_dc_lum[0][0], 8, 4); | |
625 init_vlc(&dc_chroma_vlc[0], 9, 120, | |
626 &table0_dc_chroma[0][1], 8, 4, | |
627 &table0_dc_chroma[0][0], 8, 4); | |
628 init_vlc(&dc_lum_vlc[1], 9, 120, | |
629 &table1_dc_lum[0][1], 8, 4, | |
630 &table1_dc_lum[0][0], 8, 4); | |
631 init_vlc(&dc_chroma_vlc[1], 9, 120, | |
632 &table1_dc_chroma[0][1], 8, 4, | |
633 &table1_dc_chroma[0][0], 8, 4); | |
634 | |
635 init_vlc(&mb_non_intra_vlc, 9, 128, | |
636 &table_mb_non_intra[0][1], 8, 4, | |
637 &table_mb_non_intra[0][0], 8, 4); | |
48 | 638 init_vlc(&mb_intra_vlc, 9, 64, |
0 | 639 &table_mb_intra[0][1], 4, 2, |
640 &table_mb_intra[0][0], 4, 2); | |
641 return 0; | |
642 } | |
643 | |
644 static int decode012(GetBitContext *gb) | |
645 { | |
646 int n; | |
21 | 647 n = get_bits1(gb); |
0 | 648 if (n == 0) |
649 return 0; | |
650 else | |
21 | 651 return get_bits1(gb) + 1; |
0 | 652 } |
653 | |
654 int msmpeg4_decode_picture_header(MpegEncContext * s) | |
655 { | |
656 int code; | |
200 | 657 static int weirdAl=0; |
0 | 658 |
659 s->pict_type = get_bits(&s->gb, 2) + 1; | |
660 if (s->pict_type != I_TYPE && | |
661 s->pict_type != P_TYPE) | |
662 return -1; | |
663 | |
664 s->qscale = get_bits(&s->gb, 5); | |
665 | |
666 if (s->pict_type == I_TYPE) { | |
667 code = get_bits(&s->gb, 5); | |
668 /* 0x17: one slice, 0x18: three slices */ | |
669 /* XXX: implement it */ | |
200 | 670 //printf("%d %d %d\n", code, s->slice_height, s->first_slice_line); |
0 | 671 if (code < 0x17) |
672 return -1; | |
673 s->slice_height = s->mb_height / (code - 0x16); | |
674 s->rl_chroma_table_index = decode012(&s->gb); | |
675 s->rl_table_index = decode012(&s->gb); | |
676 | |
21 | 677 s->dc_table_index = get_bits1(&s->gb); |
0 | 678 s->no_rounding = 1; |
200 | 679 /* printf(" %d %d %d %d \n", |
680 s->qscale, | |
681 s->rl_chroma_table_index, | |
682 s->rl_table_index, | |
683 s->dc_table_index);*/ | |
0 | 684 } else { |
21 | 685 s->use_skip_mb_code = get_bits1(&s->gb); |
0 | 686 |
687 s->rl_table_index = decode012(&s->gb); | |
688 s->rl_chroma_table_index = s->rl_table_index; | |
689 | |
21 | 690 s->dc_table_index = get_bits1(&s->gb); |
0 | 691 |
21 | 692 s->mv_table_index = get_bits1(&s->gb); |
200 | 693 /* printf(" %d %d %d %d %d \n", |
694 s->use_skip_mb_code, | |
695 s->rl_table_index, | |
696 s->rl_chroma_table_index, | |
697 s->dc_table_index, | |
698 s->mv_table_index);*/ | |
699 if(weirdAl) | |
700 s->no_rounding = 0; | |
701 else | |
702 s->no_rounding ^= 1; | |
0 | 703 } |
704 #ifdef DEBUG | |
705 printf("*****frame %d:\n", frame_count++); | |
706 #endif | |
707 return 0; | |
708 } | |
709 | |
710 void memsetw(short *tab, int val, int n) | |
711 { | |
712 int i; | |
713 for(i=0;i<n;i++) | |
714 tab[i] = val; | |
715 } | |
716 | |
717 int msmpeg4_decode_mb(MpegEncContext *s, | |
718 DCTELEM block[6][64]) | |
719 { | |
720 int cbp, code, i; | |
721 int pred, val; | |
722 UINT8 *coded_val; | |
723 | |
724 /* special slice handling */ | |
725 if (s->mb_x == 0) { | |
122 | 726 if (s->slice_height && (s->mb_y % s->slice_height) == 0) { |
0 | 727 int wrap; |
728 /* reset DC pred (set previous line to 1024) */ | |
729 wrap = 2 * s->mb_width + 2; | |
730 memsetw(&s->dc_val[0][(1) + (2 * s->mb_y) * wrap], | |
731 1024, 2 * s->mb_width); | |
732 wrap = s->mb_width + 2; | |
733 memsetw(&s->dc_val[1][(1) + (s->mb_y) * wrap], | |
734 1024, s->mb_width); | |
735 memsetw(&s->dc_val[2][(1) + (s->mb_y) * wrap], | |
736 1024, s->mb_width); | |
186
cf37da86d990
fix slices when code=0x18, patch by Michael Niedermayer <michael@mplayer.dev.hu>
arpi_esp
parents:
122
diff
changeset
|
737 |
cf37da86d990
fix slices when code=0x18, patch by Michael Niedermayer <michael@mplayer.dev.hu>
arpi_esp
parents:
122
diff
changeset
|
738 /* reset AC pred (set previous line to 0) */ |
cf37da86d990
fix slices when code=0x18, patch by Michael Niedermayer <michael@mplayer.dev.hu>
arpi_esp
parents:
122
diff
changeset
|
739 wrap = s->mb_width * 2 + 2; |
cf37da86d990
fix slices when code=0x18, patch by Michael Niedermayer <michael@mplayer.dev.hu>
arpi_esp
parents:
122
diff
changeset
|
740 memsetw(s->ac_val[0][0] + (1 + (2 * s->mb_y) * wrap)*16, |
cf37da86d990
fix slices when code=0x18, patch by Michael Niedermayer <michael@mplayer.dev.hu>
arpi_esp
parents:
122
diff
changeset
|
741 0, 2 * s->mb_width*16); |
cf37da86d990
fix slices when code=0x18, patch by Michael Niedermayer <michael@mplayer.dev.hu>
arpi_esp
parents:
122
diff
changeset
|
742 wrap = s->mb_width + 2; |
cf37da86d990
fix slices when code=0x18, patch by Michael Niedermayer <michael@mplayer.dev.hu>
arpi_esp
parents:
122
diff
changeset
|
743 memsetw(s->ac_val[1][0] + (1 + (s->mb_y) * wrap)*16, |
cf37da86d990
fix slices when code=0x18, patch by Michael Niedermayer <michael@mplayer.dev.hu>
arpi_esp
parents:
122
diff
changeset
|
744 0, s->mb_width*16); |
cf37da86d990
fix slices when code=0x18, patch by Michael Niedermayer <michael@mplayer.dev.hu>
arpi_esp
parents:
122
diff
changeset
|
745 memsetw(s->ac_val[2][0] + (1 + (s->mb_y) * wrap)*16, |
cf37da86d990
fix slices when code=0x18, patch by Michael Niedermayer <michael@mplayer.dev.hu>
arpi_esp
parents:
122
diff
changeset
|
746 0, s->mb_width*16); |
0 | 747 |
748 s->first_slice_line = 1; | |
749 } else { | |
750 s->first_slice_line = 0; | |
751 } | |
752 } | |
753 | |
754 if (s->pict_type == P_TYPE) { | |
755 set_stat(ST_INTER_MB); | |
756 if (s->use_skip_mb_code) { | |
21 | 757 if (get_bits1(&s->gb)) { |
0 | 758 /* skip mb */ |
759 s->mb_intra = 0; | |
760 for(i=0;i<6;i++) | |
761 s->block_last_index[i] = -1; | |
762 s->mv_dir = MV_DIR_FORWARD; | |
763 s->mv_type = MV_TYPE_16X16; | |
764 s->mv[0][0][0] = 0; | |
765 s->mv[0][0][1] = 0; | |
7
1d3ac9654178
added skip macroblock optimization (big perf win on black regions for example)
glantau
parents:
0
diff
changeset
|
766 s->mb_skiped = 1; |
0 | 767 return 0; |
768 } | |
769 } | |
770 | |
771 code = get_vlc(&s->gb, &mb_non_intra_vlc); | |
772 if (code < 0) | |
773 return -1; | |
774 if (code & 0x40) | |
775 s->mb_intra = 0; | |
776 else | |
777 s->mb_intra = 1; | |
778 | |
779 cbp = code & 0x3f; | |
780 } else { | |
781 set_stat(ST_INTRA_MB); | |
782 s->mb_intra = 1; | |
783 code = get_vlc(&s->gb, &mb_intra_vlc); | |
784 if (code < 0) | |
785 return -1; | |
786 /* predict coded block pattern */ | |
787 cbp = 0; | |
788 for(i=0;i<6;i++) { | |
789 val = ((code >> (5 - i)) & 1); | |
790 if (i < 4) { | |
791 pred = coded_block_pred(s, i, &coded_val); | |
792 val = val ^ pred; | |
793 *coded_val = val; | |
794 } | |
795 cbp |= val << (5 - i); | |
796 } | |
797 } | |
798 | |
799 if (!s->mb_intra) { | |
800 int mx, my; | |
801 set_stat(ST_MV); | |
802 h263_pred_motion(s, 0, &mx, &my); | |
803 if (msmpeg4_decode_motion(s, &mx, &my) < 0) | |
804 return -1; | |
805 s->mv_dir = MV_DIR_FORWARD; | |
806 s->mv_type = MV_TYPE_16X16; | |
807 s->mv[0][0][0] = mx; | |
808 s->mv[0][0][1] = my; | |
809 } else { | |
810 set_stat(ST_INTRA_MB); | |
21 | 811 s->ac_pred = get_bits1(&s->gb); |
0 | 812 } |
813 | |
814 for (i = 0; i < 6; i++) { | |
815 if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0) | |
816 return -1; | |
817 } | |
818 return 0; | |
819 } | |
820 | |
821 static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |
822 int n, int coded) | |
823 { | |
824 int code, level, i, j, last, run, run_diff; | |
825 int dc_pred_dir; | |
826 RLTable *rl; | |
827 const UINT8 *scan_table; | |
200 | 828 int qmul, qadd; |
0 | 829 |
830 if (s->mb_intra) { | |
200 | 831 qmul=1; |
832 qadd=0; | |
833 | |
0 | 834 /* DC coef */ |
835 set_stat(ST_DC); | |
836 level = msmpeg4_decode_dc(s, n, &dc_pred_dir); | |
837 if (level < 0) | |
838 return -1; | |
839 block[0] = level; | |
840 if (n < 4) { | |
841 rl = &rl_table[s->rl_table_index]; | |
842 } else { | |
843 rl = &rl_table[3 + s->rl_chroma_table_index]; | |
844 } | |
200 | 845 |
0 | 846 run_diff = 0; |
847 i = 1; | |
848 if (!coded) { | |
849 goto not_coded; | |
850 } | |
851 if (s->ac_pred) { | |
852 if (dc_pred_dir == 0) | |
853 scan_table = ff_alternate_vertical_scan; /* left */ | |
854 else | |
855 scan_table = ff_alternate_horizontal_scan; /* top */ | |
856 } else { | |
857 scan_table = zigzag_direct; | |
858 } | |
859 set_stat(ST_INTRA_AC); | |
860 } else { | |
200 | 861 qmul = s->qscale << 1; |
862 qadd = (s->qscale - 1) | 1; | |
0 | 863 i = 0; |
864 rl = &rl_table[3 + s->rl_table_index]; | |
865 run_diff = 1; | |
866 if (!coded) { | |
867 s->block_last_index[n] = i - 1; | |
868 return 0; | |
869 } | |
870 scan_table = zigzag_direct; | |
871 set_stat(ST_INTER_AC); | |
872 } | |
873 | |
874 for(;;) { | |
875 code = get_vlc(&s->gb, &rl->vlc); | |
876 if (code < 0) | |
877 return -1; | |
878 if (code == rl->n) { | |
879 /* escape */ | |
21 | 880 if (get_bits1(&s->gb) == 0) { |
881 if (get_bits1(&s->gb) == 0) { | |
0 | 882 /* third escape */ |
21 | 883 last = get_bits1(&s->gb); |
0 | 884 run = get_bits(&s->gb, 6); |
885 level = get_bits(&s->gb, 8); | |
886 level = (level << 24) >> 24; /* sign extend */ | |
200 | 887 if(level>0) level= level * qmul + qadd; |
888 else level= level * qmul - qadd; | |
0 | 889 } else { |
890 /* second escape */ | |
891 code = get_vlc(&s->gb, &rl->vlc); | |
892 if (code < 0 || code >= rl->n) | |
893 return -1; | |
894 run = rl->table_run[code]; | |
201 | 895 level = rl->table_level[code]; |
0 | 896 last = code >= rl->last; |
897 run += rl->max_run[last][level] + run_diff; | |
201 | 898 level= level * qmul + qadd; |
21 | 899 if (get_bits1(&s->gb)) |
0 | 900 level = -level; |
901 } | |
902 } else { | |
903 /* first escape */ | |
904 code = get_vlc(&s->gb, &rl->vlc); | |
905 if (code < 0 || code >= rl->n) | |
906 return -1; | |
907 run = rl->table_run[code]; | |
908 level = rl->table_level[code]; | |
909 last = code >= rl->last; | |
910 level += rl->max_level[last][run]; | |
200 | 911 level= level * qmul + qadd; |
21 | 912 if (get_bits1(&s->gb)) |
0 | 913 level = -level; |
914 } | |
915 } else { | |
916 run = rl->table_run[code]; | |
200 | 917 level = rl->table_level[code] * qmul + qadd; |
0 | 918 last = code >= rl->last; |
21 | 919 if (get_bits1(&s->gb)) |
0 | 920 level = -level; |
921 } | |
922 i += run; | |
923 if (i >= 64) | |
924 return -1; | |
925 j = scan_table[i]; | |
926 block[j] = level; | |
927 i++; | |
928 if (last) | |
929 break; | |
930 } | |
931 not_coded: | |
932 if (s->mb_intra) { | |
933 mpeg4_pred_ac(s, block, n, dc_pred_dir); | |
934 if (s->ac_pred) { | |
935 i = 64; /* XXX: not optimal */ | |
936 } | |
937 } | |
938 s->block_last_index[n] = i - 1; | |
939 | |
940 return 0; | |
941 } | |
942 | |
943 static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr) | |
944 { | |
945 int level, pred; | |
25 | 946 INT16 *dc_val; |
0 | 947 |
948 if (n < 4) { | |
949 level = get_vlc(&s->gb, &dc_lum_vlc[s->dc_table_index]); | |
950 } else { | |
951 level = get_vlc(&s->gb, &dc_chroma_vlc[s->dc_table_index]); | |
952 } | |
953 if (level < 0) | |
954 return -1; | |
955 | |
956 if (level == DC_MAX) { | |
957 level = get_bits(&s->gb, 8); | |
21 | 958 if (get_bits1(&s->gb)) |
0 | 959 level = -level; |
960 } else if (level != 0) { | |
21 | 961 if (get_bits1(&s->gb)) |
0 | 962 level = -level; |
963 } | |
964 | |
965 pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr); | |
966 level += pred; | |
967 | |
968 /* update predictor */ | |
969 if (n < 4) { | |
970 *dc_val = level * s->y_dc_scale; | |
971 } else { | |
972 *dc_val = level * s->c_dc_scale; | |
973 } | |
974 | |
975 return level; | |
976 } | |
977 | |
978 static int msmpeg4_decode_motion(MpegEncContext * s, | |
979 int *mx_ptr, int *my_ptr) | |
980 { | |
981 MVTable *mv; | |
982 int code, mx, my; | |
983 | |
984 mv = &mv_tables[s->mv_table_index]; | |
985 | |
986 code = get_vlc(&s->gb, &mv->vlc); | |
987 if (code < 0) | |
988 return -1; | |
989 if (code == mv->n) { | |
990 mx = get_bits(&s->gb, 6); | |
991 my = get_bits(&s->gb, 6); | |
992 } else { | |
993 mx = mv->table_mvx[code]; | |
994 my = mv->table_mvy[code]; | |
995 } | |
996 | |
997 mx += *mx_ptr - 32; | |
998 my += *my_ptr - 32; | |
999 /* WARNING : they do not do exactly modulo encoding */ | |
1000 if (mx <= -64) | |
1001 mx += 64; | |
1002 else if (mx >= 64) | |
1003 mx -= 64; | |
1004 | |
1005 if (my <= -64) | |
1006 my += 64; | |
1007 else if (my >= 64) | |
1008 my -= 64; | |
1009 *mx_ptr = mx; | |
1010 *my_ptr = my; | |
1011 return 0; | |
1012 } |