Mercurial > libavcodec.hg
annotate jpeglsdec.c @ 12197:fbf4d5b1b664 libavcodec
Remove FF_MM_SSE2/3 flags for CPUs where this is generally not faster than
regular MMX code. Examples of this are the Core1 CPU. Instead, set a new flag,
FF_MM_SSE2/3SLOW, which can be checked for particular SSE2/3 functions that
have been checked specifically on such CPUs and are actually faster than
their MMX counterparts.
In addition, use this flag to enable particular VP8 and LPC SSE2 functions
that are faster than their MMX counterparts.
Based on a patch by Loren Merritt <lorenm AT u washington edu>.
author | rbultje |
---|---|
date | Mon, 19 Jul 2010 22:38:23 +0000 |
parents | 7dd2a45249a9 |
children |
rev | line source |
---|---|
5003 | 1 /* |
2 * JPEG-LS decoder | |
3 * Copyright (c) 2003 Michael Niedermayer | |
4 * Copyright (c) 2006 Konstantin Shishkov | |
5 * | |
6 * This file is part of FFmpeg. | |
7 * | |
8 * FFmpeg is free software; you can redistribute it and/or | |
9 * modify it under the terms of the GNU Lesser General Public | |
10 * License as published by the Free Software Foundation; either | |
11 * version 2.1 of the License, or (at your option) any later version. | |
12 * | |
13 * FFmpeg is distributed in the hope that it will be useful, | |
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 * Lesser General Public License for more details. | |
17 * | |
18 * You should have received a copy of the GNU Lesser General Public | |
19 * License along with FFmpeg; if not, write to the Free Software | |
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 */ | |
22 | |
23 /** | |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11560
diff
changeset
|
24 * @file |
5003 | 25 * JPEG-LS decoder. |
26 */ | |
27 | |
28 #include "avcodec.h" | |
9428 | 29 #include "get_bits.h" |
5003 | 30 #include "golomb.h" |
8627
d6bab465b82c
moves mid_pred() into mathops.h (with arch specific code split by directory)
aurel
parents:
7040
diff
changeset
|
31 #include "mathops.h" |
5003 | 32 #include "mjpeg.h" |
5041 | 33 #include "mjpegdec.h" |
5003 | 34 #include "jpegls.h" |
35 #include "jpeglsdec.h" | |
36 | |
37 | |
38 /* | |
39 * Uncomment this to significantly speed up decoding of broken JPEG-LS | |
40 * (or test broken JPEG-LS decoder) and slow down ordinary decoding a bit. | |
41 * | |
42 * There is no Golomb code with length >= 32 bits possible, so check and | |
43 * avoid situation of 32 zeros, FFmpeg Golomb decoder is painfully slow | |
44 * on this errors. | |
45 */ | |
46 //#define JLS_BROKEN | |
47 | |
48 | |
49 /** | |
50 * Decode LSE block with initialization parameters | |
51 */ | |
52 int ff_jpegls_decode_lse(MJpegDecodeContext *s) | |
53 { | |
54 int len, id; | |
55 | |
56 /* XXX: verify len field validity */ | |
57 len = get_bits(&s->gb, 16); | |
58 id = get_bits(&s->gb, 8); | |
59 | |
60 switch(id){ | |
61 case 1: | |
62 s->maxval= get_bits(&s->gb, 16); | |
63 s->t1= get_bits(&s->gb, 16); | |
64 s->t2= get_bits(&s->gb, 16); | |
65 s->t3= get_bits(&s->gb, 16); | |
66 s->reset= get_bits(&s->gb, 16); | |
67 | |
68 // ff_jpegls_reset_coding_parameters(s, 0); | |
69 //FIXME quant table? | |
70 break; | |
71 case 2: | |
72 case 3: | |
73 av_log(s->avctx, AV_LOG_ERROR, "palette not supported\n"); | |
74 return -1; | |
75 case 4: | |
76 av_log(s->avctx, AV_LOG_ERROR, "oversize image not supported\n"); | |
77 return -1; | |
78 default: | |
79 av_log(s->avctx, AV_LOG_ERROR, "invalid id %d\n", id); | |
80 return -1; | |
81 } | |
82 // av_log(s->avctx, AV_LOG_DEBUG, "ID=%i, T=%i,%i,%i\n", id, s->t1, s->t2, s->t3); | |
83 | |
84 return 0; | |
85 } | |
86 | |
87 /** | |
88 * Get context-dependent Golomb code, decode it and update context | |
89 */ | |
90 static inline int ls_get_code_regular(GetBitContext *gb, JLSState *state, int Q){ | |
91 int k, ret; | |
92 | |
93 for(k = 0; (state->N[Q] << k) < state->A[Q]; k++); | |
94 | |
95 #ifdef JLS_BROKEN | |
96 if(!show_bits_long(gb, 32))return -1; | |
97 #endif | |
98 ret = get_ur_golomb_jpegls(gb, k, state->limit, state->qbpp); | |
99 | |
100 /* decode mapped error */ | |
101 if(ret & 1) | |
102 ret = -((ret + 1) >> 1); | |
103 else | |
104 ret >>= 1; | |
105 | |
106 /* for NEAR=0, k=0 and 2*B[Q] <= - N[Q] mapping is reversed */ | |
107 if(!state->near && !k && (2 * state->B[Q] <= -state->N[Q])) | |
108 ret = -(ret + 1); | |
109 | |
110 ret= ff_jpegls_update_state_regular(state, Q, ret); | |
111 | |
112 return ret; | |
113 } | |
114 | |
115 /** | |
116 * Get Golomb code, decode it and update state for run termination | |
117 */ | |
118 static inline int ls_get_code_runterm(GetBitContext *gb, JLSState *state, int RItype, int limit_add){ | |
119 int k, ret, temp, map; | |
120 int Q = 365 + RItype; | |
121 | |
122 temp= state->A[Q]; | |
123 if(RItype) | |
124 temp += state->N[Q] >> 1; | |
125 | |
126 for(k = 0; (state->N[Q] << k) < temp; k++); | |
127 | |
128 #ifdef JLS_BROKEN | |
129 if(!show_bits_long(gb, 32))return -1; | |
130 #endif | |
131 ret = get_ur_golomb_jpegls(gb, k, state->limit - limit_add - 1, state->qbpp); | |
132 | |
133 /* decode mapped error */ | |
134 map = 0; | |
135 if(!k && (RItype || ret) && (2 * state->B[Q] < state->N[Q])) | |
136 map = 1; | |
137 ret += RItype + map; | |
138 | |
139 if(ret & 1){ | |
140 ret = map - ((ret + 1) >> 1); | |
141 state->B[Q]++; | |
142 } else { | |
143 ret = ret >> 1; | |
144 } | |
145 | |
146 /* update state */ | |
147 state->A[Q] += FFABS(ret) - RItype; | |
148 ret *= state->twonear; | |
149 ff_jpegls_downscale_state(state, Q); | |
150 | |
151 return ret; | |
152 } | |
153 | |
154 /** | |
155 * Decode one line of image | |
156 */ | |
157 static inline void ls_decode_line(JLSState *state, MJpegDecodeContext *s, void *last, void *dst, int last2, int w, int stride, int comp, int bits){ | |
158 int i, x = 0; | |
159 int Ra, Rb, Rc, Rd; | |
160 int D0, D1, D2; | |
161 | |
162 while(x < w) { | |
163 int err, pred; | |
164 | |
165 /* compute gradients */ | |
166 Ra = x ? R(dst, x - stride) : R(last, x); | |
167 Rb = R(last, x); | |
168 Rc = x ? R(last, x - stride) : last2; | |
169 Rd = (x >= w - stride) ? R(last, x) : R(last, x + stride); | |
170 D0 = Rd - Rb; | |
171 D1 = Rb - Rc; | |
172 D2 = Rc - Ra; | |
173 /* run mode */ | |
174 if((FFABS(D0) <= state->near) && (FFABS(D1) <= state->near) && (FFABS(D2) <= state->near)) { | |
175 int r; | |
176 int RItype; | |
177 | |
178 /* decode full runs while available */ | |
179 while(get_bits1(&s->gb)) { | |
180 int r; | |
181 r = 1 << ff_log2_run[state->run_index[comp]]; | |
182 if(x + r * stride > w) { | |
183 r = (w - x) / stride; | |
184 } | |
185 for(i = 0; i < r; i++) { | |
186 W(dst, x, Ra); | |
187 x += stride; | |
188 } | |
189 /* if EOL reached, we stop decoding */ | |
190 if(r != (1 << ff_log2_run[state->run_index[comp]])) | |
191 return; | |
192 if(state->run_index[comp] < 31) | |
193 state->run_index[comp]++; | |
194 if(x + stride > w) | |
195 return; | |
196 } | |
197 /* decode aborted run */ | |
198 r = ff_log2_run[state->run_index[comp]]; | |
199 if(r) | |
200 r = get_bits_long(&s->gb, r); | |
201 for(i = 0; i < r; i++) { | |
202 W(dst, x, Ra); | |
203 x += stride; | |
204 } | |
205 | |
206 /* decode run termination value */ | |
207 Rb = R(last, x); | |
208 RItype = (FFABS(Ra - Rb) <= state->near) ? 1 : 0; | |
209 err = ls_get_code_runterm(&s->gb, state, RItype, ff_log2_run[state->run_index[comp]]); | |
210 if(state->run_index[comp]) | |
211 state->run_index[comp]--; | |
212 | |
213 if(state->near && RItype){ | |
214 pred = Ra + err; | |
215 } else { | |
216 if(Rb < Ra) | |
217 pred = Rb - err; | |
218 else | |
219 pred = Rb + err; | |
220 } | |
221 } else { /* regular mode */ | |
222 int context, sign; | |
223 | |
224 context = ff_jpegls_quantize(state, D0) * 81 + ff_jpegls_quantize(state, D1) * 9 + ff_jpegls_quantize(state, D2); | |
225 pred = mid_pred(Ra, Ra + Rb - Rc, Rb); | |
226 | |
227 if(context < 0){ | |
228 context = -context; | |
229 sign = 1; | |
230 }else{ | |
231 sign = 0; | |
232 } | |
233 | |
234 if(sign){ | |
235 pred = av_clip(pred - state->C[context], 0, state->maxval); | |
236 err = -ls_get_code_regular(&s->gb, state, context); | |
237 } else { | |
238 pred = av_clip(pred + state->C[context], 0, state->maxval); | |
239 err = ls_get_code_regular(&s->gb, state, context); | |
240 } | |
241 | |
242 /* we have to do something more for near-lossless coding */ | |
243 pred += err; | |
244 } | |
245 if(state->near){ | |
246 if(pred < -state->near) | |
247 pred += state->range * state->twonear; | |
248 else if(pred > state->maxval + state->near) | |
249 pred -= state->range * state->twonear; | |
250 pred = av_clip(pred, 0, state->maxval); | |
251 } | |
252 | |
253 pred &= state->maxval; | |
254 W(dst, x, pred); | |
255 x += stride; | |
256 } | |
257 } | |
258 | |
259 int ff_jpegls_decode_picture(MJpegDecodeContext *s, int near, int point_transform, int ilv){ | |
260 int i, t = 0; | |
261 uint8_t *zero, *last, *cur; | |
262 JLSState *state; | |
263 int off = 0, stride = 1, width, shift; | |
264 | |
265 zero = av_mallocz(s->picture.linesize[0]); | |
266 last = zero; | |
267 cur = s->picture.data[0]; | |
268 | |
269 state = av_mallocz(sizeof(JLSState)); | |
270 /* initialize JPEG-LS state from JPEG parameters */ | |
271 state->near = near; | |
272 state->bpp = (s->bits < 2) ? 2 : s->bits; | |
273 state->maxval = s->maxval; | |
274 state->T1 = s->t1; | |
275 state->T2 = s->t2; | |
276 state->T3 = s->t3; | |
277 state->reset = s->reset; | |
278 ff_jpegls_reset_coding_parameters(state, 0); | |
279 ff_jpegls_init_state(state); | |
280 | |
281 if(s->bits <= 8) | |
282 shift = point_transform + (8 - s->bits); | |
283 else | |
284 shift = point_transform + (16 - s->bits); | |
285 | |
286 // av_log(s->avctx, AV_LOG_DEBUG, "JPEG-LS params: %ix%i NEAR=%i MV=%i T(%i,%i,%i) RESET=%i, LIMIT=%i, qbpp=%i, RANGE=%i\n",s->width,s->height,state->near,state->maxval,state->T1,state->T2,state->T3,state->reset,state->limit,state->qbpp, state->range); | |
287 // av_log(s->avctx, AV_LOG_DEBUG, "JPEG params: ILV=%i Pt=%i BPP=%i, scan = %i\n", ilv, point_transform, s->bits, s->cur_scan); | |
288 if(ilv == 0) { /* separate planes */ | |
289 off = s->cur_scan - 1; | |
290 stride = (s->nb_components > 1) ? 3 : 1; | |
291 width = s->width * stride; | |
292 cur += off; | |
293 for(i = 0; i < s->height; i++) { | |
294 if(s->bits <= 8){ | |
295 ls_decode_line(state, s, last, cur, t, width, stride, off, 8); | |
296 t = last[0]; | |
297 }else{ | |
298 ls_decode_line(state, s, last, cur, t, width, stride, off, 16); | |
299 t = *((uint16_t*)last); | |
300 } | |
301 last = cur; | |
302 cur += s->picture.linesize[0]; | |
303 | |
304 if (s->restart_interval && !--s->restart_count) { | |
305 align_get_bits(&s->gb); | |
306 skip_bits(&s->gb, 16); /* skip RSTn */ | |
307 } | |
308 } | |
309 } else if(ilv == 1) { /* line interleaving */ | |
310 int j; | |
311 int Rc[3] = {0, 0, 0}; | |
312 memset(cur, 0, s->picture.linesize[0]); | |
313 width = s->width * 3; | |
314 for(i = 0; i < s->height; i++) { | |
315 for(j = 0; j < 3; j++) { | |
316 ls_decode_line(state, s, last + j, cur + j, Rc[j], width, 3, j, 8); | |
317 Rc[j] = last[j]; | |
318 | |
319 if (s->restart_interval && !--s->restart_count) { | |
320 align_get_bits(&s->gb); | |
321 skip_bits(&s->gb, 16); /* skip RSTn */ | |
322 } | |
323 } | |
324 last = cur; | |
325 cur += s->picture.linesize[0]; | |
326 } | |
327 } else if(ilv == 2) { /* sample interleaving */ | |
328 av_log(s->avctx, AV_LOG_ERROR, "Sample interleaved images are not supported.\n"); | |
329 av_free(state); | |
330 av_free(zero); | |
331 return -1; | |
332 } | |
333 | |
334 if(shift){ /* we need to do point transform or normalize samples */ | |
335 int x, w; | |
336 | |
337 w = s->width * s->nb_components; | |
338 | |
339 if(s->bits <= 8){ | |
340 uint8_t *src = s->picture.data[0]; | |
341 | |
342 for(i = 0; i < s->height; i++){ | |
343 for(x = off; x < w; x+= stride){ | |
344 src[x] <<= shift; | |
345 } | |
346 src += s->picture.linesize[0]; | |
347 } | |
348 }else{ | |
349 uint16_t *src = (uint16_t*) s->picture.data[0]; | |
350 | |
351 for(i = 0; i < s->height; i++){ | |
352 for(x = 0; x < w; x++){ | |
353 src[x] <<= shift; | |
354 } | |
355 src += s->picture.linesize[0]/2; | |
356 } | |
357 } | |
358 } | |
359 av_free(state); | |
360 av_free(zero); | |
361 | |
362 return 0; | |
363 } | |
5041 | 364 |
365 | |
366 AVCodec jpegls_decoder = { | |
367 "jpegls", | |
11560
8a4984c5cacc
Define AVMediaType enum, and use it instead of enum CodecType, which
stefano
parents:
9428
diff
changeset
|
368 AVMEDIA_TYPE_VIDEO, |
5041 | 369 CODEC_ID_JPEGLS, |
370 sizeof(MJpegDecodeContext), | |
371 ff_mjpeg_decode_init, | |
372 NULL, | |
373 ff_mjpeg_decode_end, | |
374 ff_mjpeg_decode_frame, | |
375 CODEC_CAP_DR1, | |
7040
e943e1409077
Make AVCodec long_names definition conditional depending on CONFIG_SMALL.
stefano
parents:
6712
diff
changeset
|
376 .long_name = NULL_IF_CONFIG_SMALL("JPEG-LS"), |
5041 | 377 }; |