Mercurial > libavcodec.hg
comparison pngdec.c @ 6384:0a403ade8c81 libavcodec
simd and unroll png_filter_row
cycles per 1000 pixels on core2:
left: 9211->5170
top: 9283->2138
avg: 12215->7611
paeth: 64024->17360
overall rgb png decoding speed: +45%
overall greyscale png decoding speed: +6%
author | lorenm |
---|---|
date | Thu, 21 Feb 2008 07:10:46 +0000 |
parents | 817e302aae08 |
children | 1a9af4a496f2 |
comparison
equal
deleted
inserted
replaced
6383:7ba06222bda7 | 6384:0a403ade8c81 |
---|---|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 */ | 20 */ |
21 #include "avcodec.h" | 21 #include "avcodec.h" |
22 #include "bytestream.h" | 22 #include "bytestream.h" |
23 #include "png.h" | 23 #include "png.h" |
24 #include "dsputil.h" | |
24 | 25 |
25 /* TODO: | 26 /* TODO: |
26 * - add 2, 4 and 16 bit depth support | 27 * - add 2, 4 and 16 bit depth support |
27 */ | 28 */ |
28 | 29 |
29 #include <zlib.h> | 30 #include <zlib.h> |
30 | 31 |
31 //#define DEBUG | 32 //#define DEBUG |
32 | 33 |
33 typedef struct PNGDecContext { | 34 typedef struct PNGDecContext { |
35 DSPContext dsp; | |
36 | |
34 const uint8_t *bytestream; | 37 const uint8_t *bytestream; |
35 const uint8_t *bytestream_start; | 38 const uint8_t *bytestream_start; |
36 const uint8_t *bytestream_end; | 39 const uint8_t *bytestream_end; |
37 AVFrame picture; | 40 AVFrame picture; |
38 | 41 |
127 } | 130 } |
128 break; | 131 break; |
129 } | 132 } |
130 } | 133 } |
131 | 134 |
132 /* XXX: optimize */ | 135 void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp) |
136 { | |
137 int i; | |
138 for(i = 0; i < w; i++) { | |
139 int a, b, c, p, pa, pb, pc; | |
140 | |
141 a = dst[i - bpp]; | |
142 b = top[i]; | |
143 c = top[i - bpp]; | |
144 | |
145 p = b - c; | |
146 pc = a - c; | |
147 | |
148 pa = abs(p); | |
149 pb = abs(pc); | |
150 pc = abs(p + pc); | |
151 | |
152 if (pa <= pb && pa <= pc) | |
153 p = a; | |
154 else if (pb <= pc) | |
155 p = b; | |
156 else | |
157 p = c; | |
158 dst[i] = p + src[i]; | |
159 } | |
160 } | |
161 | |
162 #define UNROLL1(bpp, op) {\ | |
163 r = dst[0];\ | |
164 if(bpp >= 2) g = dst[1];\ | |
165 if(bpp >= 3) b = dst[2];\ | |
166 if(bpp >= 4) a = dst[3];\ | |
167 for(; i < size; i+=bpp) {\ | |
168 dst[i+0] = r = op(r, src[i+0], last[i+0]);\ | |
169 if(bpp == 1) continue;\ | |
170 dst[i+1] = g = op(g, src[i+1], last[i+1]);\ | |
171 if(bpp == 2) continue;\ | |
172 dst[i+2] = b = op(b, src[i+2], last[i+2]);\ | |
173 if(bpp == 3) continue;\ | |
174 dst[i+3] = a = op(a, src[i+3], last[i+3]);\ | |
175 }\ | |
176 } | |
177 | |
178 #define UNROLL_FILTER(op)\ | |
179 if(bpp == 1) UNROLL1(1, op)\ | |
180 else if(bpp == 2) UNROLL1(2, op)\ | |
181 else if(bpp == 3) UNROLL1(3, op)\ | |
182 else if(bpp == 4) UNROLL1(4, op)\ | |
183 | |
133 /* NOTE: 'dst' can be equal to 'last' */ | 184 /* NOTE: 'dst' can be equal to 'last' */ |
134 static void png_filter_row(uint8_t *dst, int filter_type, | 185 static void png_filter_row(DSPContext *dsp, uint8_t *dst, int filter_type, |
135 uint8_t *src, uint8_t *last, int size, int bpp) | 186 uint8_t *src, uint8_t *last, int size, int bpp) |
136 { | 187 { |
137 int i, p; | 188 int i, p, r, g, b, a; |
138 | 189 |
139 switch(filter_type) { | 190 switch(filter_type) { |
140 case PNG_FILTER_VALUE_NONE: | 191 case PNG_FILTER_VALUE_NONE: |
141 memcpy(dst, src, size); | 192 memcpy(dst, src, size); |
142 break; | 193 break; |
143 case PNG_FILTER_VALUE_SUB: | 194 case PNG_FILTER_VALUE_SUB: |
144 for(i = 0; i < bpp; i++) { | 195 for(i = 0; i < bpp; i++) { |
145 dst[i] = src[i]; | 196 dst[i] = src[i]; |
146 } | 197 } |
147 for(i = bpp; i < size; i++) { | 198 if(bpp == 4) { |
148 p = dst[i - bpp]; | 199 p = *(int*)dst; |
149 dst[i] = p + src[i]; | 200 for(; i < size; i+=bpp) { |
201 int s = *(int*)(src+i); | |
202 p = ((s&0x7f7f7f7f) + (p&0x7f7f7f7f)) ^ ((s^p)&0x80808080); | |
203 *(int*)(dst+i) = p; | |
204 } | |
205 } else { | |
206 #define OP_SUB(x,s,l) x+s | |
207 UNROLL_FILTER(OP_SUB); | |
150 } | 208 } |
151 break; | 209 break; |
152 case PNG_FILTER_VALUE_UP: | 210 case PNG_FILTER_VALUE_UP: |
153 for(i = 0; i < size; i++) { | 211 dsp->add_bytes_l2(dst, src, last, size); |
154 p = last[i]; | |
155 dst[i] = p + src[i]; | |
156 } | |
157 break; | 212 break; |
158 case PNG_FILTER_VALUE_AVG: | 213 case PNG_FILTER_VALUE_AVG: |
159 for(i = 0; i < bpp; i++) { | 214 for(i = 0; i < bpp; i++) { |
160 p = (last[i] >> 1); | 215 p = (last[i] >> 1); |
161 dst[i] = p + src[i]; | 216 dst[i] = p + src[i]; |
162 } | 217 } |
163 for(i = bpp; i < size; i++) { | 218 #define OP_AVG(x,s,l) (((x + l) >> 1) + s) & 0xff |
164 p = ((dst[i - bpp] + last[i]) >> 1); | 219 UNROLL_FILTER(OP_AVG); |
165 dst[i] = p + src[i]; | |
166 } | |
167 break; | 220 break; |
168 case PNG_FILTER_VALUE_PAETH: | 221 case PNG_FILTER_VALUE_PAETH: |
169 for(i = 0; i < bpp; i++) { | 222 for(i = 0; i < bpp; i++) { |
170 p = last[i]; | 223 p = last[i]; |
171 dst[i] = p + src[i]; | 224 dst[i] = p + src[i]; |
172 } | 225 } |
173 for(i = bpp; i < size; i++) { | 226 if(bpp > 1 && size > 4) { |
174 int a, b, c, pa, pb, pc; | 227 // would write off the end of the array if we let it process the last pixel with bpp=3 |
175 | 228 int w = bpp==4 ? size : size-3; |
176 a = dst[i - bpp]; | 229 dsp->add_png_paeth_prediction(dst+i, src+i, last+i, w-i, bpp); |
177 b = last[i]; | 230 i = w; |
178 c = last[i - bpp]; | 231 } |
179 | 232 ff_add_png_paeth_prediction(dst+i, src+i, last+i, size-i, bpp); |
180 p = b - c; | |
181 pc = a - c; | |
182 | |
183 pa = abs(p); | |
184 pb = abs(pc); | |
185 pc = abs(p + pc); | |
186 | |
187 if (pa <= pb && pa <= pc) | |
188 p = a; | |
189 else if (pb <= pc) | |
190 p = b; | |
191 else | |
192 p = c; | |
193 dst[i] = p + src[i]; | |
194 } | |
195 break; | 233 break; |
196 } | 234 } |
197 } | 235 } |
198 | 236 |
199 static void convert_to_rgb32(uint8_t *dst, const uint8_t *src, int width) | 237 static void convert_to_rgb32(uint8_t *dst, const uint8_t *src, int width) |
220 | 258 |
221 if (!s->interlace_type) { | 259 if (!s->interlace_type) { |
222 ptr = s->image_buf + s->image_linesize * s->y; | 260 ptr = s->image_buf + s->image_linesize * s->y; |
223 /* need to swap bytes correctly for RGB_ALPHA */ | 261 /* need to swap bytes correctly for RGB_ALPHA */ |
224 if (s->color_type == PNG_COLOR_TYPE_RGB_ALPHA) { | 262 if (s->color_type == PNG_COLOR_TYPE_RGB_ALPHA) { |
225 png_filter_row(s->tmp_row, s->crow_buf[0], s->crow_buf + 1, | 263 png_filter_row(&s->dsp, s->tmp_row, s->crow_buf[0], s->crow_buf + 1, |
226 s->last_row, s->row_size, s->bpp); | 264 s->last_row, s->row_size, s->bpp); |
227 memcpy(s->last_row, s->tmp_row, s->row_size); | 265 memcpy(s->last_row, s->tmp_row, s->row_size); |
228 convert_to_rgb32(ptr, s->tmp_row, s->width); | 266 convert_to_rgb32(ptr, s->tmp_row, s->width); |
229 } else { | 267 } else { |
230 /* in normal case, we avoid one copy */ | 268 /* in normal case, we avoid one copy */ |
231 if (s->y == 0) | 269 if (s->y == 0) |
232 last_row = s->last_row; | 270 last_row = s->last_row; |
233 else | 271 else |
234 last_row = ptr - s->image_linesize; | 272 last_row = ptr - s->image_linesize; |
235 | 273 |
236 png_filter_row(ptr, s->crow_buf[0], s->crow_buf + 1, | 274 png_filter_row(&s->dsp, ptr, s->crow_buf[0], s->crow_buf + 1, |
237 last_row, s->row_size, s->bpp); | 275 last_row, s->row_size, s->bpp); |
238 } | 276 } |
239 s->y++; | 277 s->y++; |
240 if (s->y == s->height) { | 278 if (s->y == s->height) { |
241 s->state |= PNG_ALLIMAGE; | 279 s->state |= PNG_ALLIMAGE; |
247 if ((ff_png_pass_ymask[s->pass] << (s->y & 7)) & 0x80) { | 285 if ((ff_png_pass_ymask[s->pass] << (s->y & 7)) & 0x80) { |
248 /* if we already read one row, it is time to stop to | 286 /* if we already read one row, it is time to stop to |
249 wait for the next one */ | 287 wait for the next one */ |
250 if (got_line) | 288 if (got_line) |
251 break; | 289 break; |
252 png_filter_row(s->tmp_row, s->crow_buf[0], s->crow_buf + 1, | 290 png_filter_row(&s->dsp, s->tmp_row, s->crow_buf[0], s->crow_buf + 1, |
253 s->last_row, s->pass_row_size, s->bpp); | 291 s->last_row, s->pass_row_size, s->bpp); |
254 memcpy(s->last_row, s->tmp_row, s->pass_row_size); | 292 memcpy(s->last_row, s->tmp_row, s->pass_row_size); |
255 got_line = 1; | 293 got_line = 1; |
256 } | 294 } |
257 if ((png_pass_dsp_ymask[s->pass] << (s->y & 7)) & 0x80) { | 295 if ((png_pass_dsp_ymask[s->pass] << (s->y & 7)) & 0x80) { |
532 static int png_dec_init(AVCodecContext *avctx){ | 570 static int png_dec_init(AVCodecContext *avctx){ |
533 PNGDecContext *s = avctx->priv_data; | 571 PNGDecContext *s = avctx->priv_data; |
534 | 572 |
535 avcodec_get_frame_defaults((AVFrame*)&s->picture); | 573 avcodec_get_frame_defaults((AVFrame*)&s->picture); |
536 avctx->coded_frame= (AVFrame*)&s->picture; | 574 avctx->coded_frame= (AVFrame*)&s->picture; |
575 dsputil_init(&s->dsp, avctx); | |
537 | 576 |
538 return 0; | 577 return 0; |
539 } | 578 } |
540 | 579 |
541 AVCodec png_decoder = { | 580 AVCodec png_decoder = { |