Mercurial > libavcodec.hg
comparison imgresample.c @ 0:986e461dc072 libavcodec
Initial revision
author | glantau |
---|---|
date | Sun, 22 Jul 2001 14:18:56 +0000 |
parents | |
children | 2e2c46c87460 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:986e461dc072 |
---|---|
1 /* | |
2 * High quality image resampling with polyphase filters | |
3 * Copyright (c) 2001 Gerard Lantau. | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
18 */ | |
19 #include <stdlib.h> | |
20 #include <stdio.h> | |
21 #include <string.h> | |
22 #include <math.h> | |
23 #include "dsputil.h" | |
24 #include "avcodec.h" | |
25 | |
26 #define NB_COMPONENTS 3 | |
27 | |
28 #define PHASE_BITS 4 | |
29 #define NB_PHASES (1 << PHASE_BITS) | |
30 #define NB_TAPS 4 | |
31 #define FCENTER 1 /* index of the center of the filter */ | |
32 | |
33 #define POS_FRAC_BITS 16 | |
34 #define POS_FRAC (1 << POS_FRAC_BITS) | |
35 /* 6 bits precision is needed for MMX */ | |
36 #define FILTER_BITS 8 | |
37 | |
38 #define LINE_BUF_HEIGHT (NB_TAPS * 4) | |
39 | |
40 struct ImgReSampleContext { | |
41 int iwidth, iheight, owidth, oheight; | |
42 int h_incr, v_incr; | |
43 INT16 h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */ | |
44 INT16 v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */ | |
45 UINT8 *line_buf; | |
46 }; | |
47 | |
48 static inline int get_phase(int pos) | |
49 { | |
50 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1); | |
51 } | |
52 | |
53 /* This function must be optimized */ | |
54 static void h_resample_fast(UINT8 *dst, int dst_width, UINT8 *src, int src_width, | |
55 int src_start, int src_incr, INT16 *filters) | |
56 { | |
57 int src_pos, phase, sum, i; | |
58 UINT8 *s; | |
59 INT16 *filter; | |
60 | |
61 src_pos = src_start; | |
62 for(i=0;i<dst_width;i++) { | |
63 #ifdef TEST | |
64 /* test */ | |
65 if ((src_pos >> POS_FRAC_BITS) < 0 || | |
66 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS)) | |
67 abort(); | |
68 #endif | |
69 s = src + (src_pos >> POS_FRAC_BITS); | |
70 phase = get_phase(src_pos); | |
71 filter = filters + phase * NB_TAPS; | |
72 #if NB_TAPS == 4 | |
73 sum = s[0] * filter[0] + | |
74 s[1] * filter[1] + | |
75 s[2] * filter[2] + | |
76 s[3] * filter[3]; | |
77 #else | |
78 { | |
79 int j; | |
80 sum = 0; | |
81 for(j=0;j<NB_TAPS;j++) | |
82 sum += s[j] * filter[j]; | |
83 } | |
84 #endif | |
85 sum = sum >> FILTER_BITS; | |
86 if (sum < 0) | |
87 sum = 0; | |
88 else if (sum > 255) | |
89 sum = 255; | |
90 dst[0] = sum; | |
91 src_pos += src_incr; | |
92 dst++; | |
93 } | |
94 } | |
95 | |
96 /* This function must be optimized */ | |
97 static void v_resample(UINT8 *dst, int dst_width, UINT8 *src, int wrap, | |
98 INT16 *filter) | |
99 { | |
100 int sum, i; | |
101 UINT8 *s; | |
102 | |
103 s = src; | |
104 for(i=0;i<dst_width;i++) { | |
105 #if NB_TAPS == 4 | |
106 sum = s[0 * wrap] * filter[0] + | |
107 s[1 * wrap] * filter[1] + | |
108 s[2 * wrap] * filter[2] + | |
109 s[3 * wrap] * filter[3]; | |
110 #else | |
111 { | |
112 int j; | |
113 UINT8 *s1 = s; | |
114 | |
115 sum = 0; | |
116 for(j=0;j<NB_TAPS;j++) { | |
117 sum += s1[0] * filter[j]; | |
118 s1 += wrap; | |
119 } | |
120 } | |
121 #endif | |
122 sum = sum >> FILTER_BITS; | |
123 if (sum < 0) | |
124 sum = 0; | |
125 else if (sum > 255) | |
126 sum = 255; | |
127 dst[0] = sum; | |
128 dst++; | |
129 s++; | |
130 } | |
131 } | |
132 | |
133 #ifdef CONFIG_MMX | |
134 | |
135 #include "i386/mmx.h" | |
136 | |
137 #define FILTER4(reg) \ | |
138 {\ | |
139 s = src + (src_pos >> POS_FRAC_BITS);\ | |
140 phase = get_phase(src_pos);\ | |
141 filter = filters + phase * NB_TAPS;\ | |
142 movq_m2r(*s, reg);\ | |
143 punpcklbw_r2r(mm7, reg);\ | |
144 movq_m2r(*filter, mm6);\ | |
145 pmaddwd_r2r(reg, mm6);\ | |
146 movq_r2r(mm6, reg);\ | |
147 psrlq_i2r(32, reg);\ | |
148 paddd_r2r(mm6, reg);\ | |
149 psrad_i2r(FILTER_BITS, reg);\ | |
150 src_pos += src_incr;\ | |
151 } | |
152 | |
153 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); | |
154 | |
155 /* XXX: do four pixels at a time */ | |
156 static void h_resample_fast4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int src_width, | |
157 int src_start, int src_incr, INT16 *filters) | |
158 { | |
159 int src_pos, phase; | |
160 UINT8 *s; | |
161 INT16 *filter; | |
162 mmx_t tmp; | |
163 | |
164 src_pos = src_start; | |
165 pxor_r2r(mm7, mm7); | |
166 | |
167 while (dst_width >= 4) { | |
168 | |
169 FILTER4(mm0); | |
170 FILTER4(mm1); | |
171 FILTER4(mm2); | |
172 FILTER4(mm3); | |
173 | |
174 packuswb_r2r(mm7, mm0); | |
175 packuswb_r2r(mm7, mm1); | |
176 packuswb_r2r(mm7, mm3); | |
177 packuswb_r2r(mm7, mm2); | |
178 movq_r2m(mm0, tmp); | |
179 dst[0] = tmp.ub[0]; | |
180 movq_r2m(mm1, tmp); | |
181 dst[1] = tmp.ub[0]; | |
182 movq_r2m(mm2, tmp); | |
183 dst[2] = tmp.ub[0]; | |
184 movq_r2m(mm3, tmp); | |
185 dst[3] = tmp.ub[0]; | |
186 dst += 4; | |
187 dst_width -= 4; | |
188 } | |
189 while (dst_width > 0) { | |
190 FILTER4(mm0); | |
191 packuswb_r2r(mm7, mm0); | |
192 movq_r2m(mm0, tmp); | |
193 dst[0] = tmp.ub[0]; | |
194 dst++; | |
195 dst_width--; | |
196 } | |
197 emms(); | |
198 } | |
199 | |
200 static void v_resample4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int wrap, | |
201 INT16 *filter) | |
202 { | |
203 int sum, i, v; | |
204 UINT8 *s; | |
205 mmx_t tmp; | |
206 mmx_t coefs[4]; | |
207 | |
208 for(i=0;i<4;i++) { | |
209 v = filter[i]; | |
210 coefs[i].uw[0] = v; | |
211 coefs[i].uw[1] = v; | |
212 coefs[i].uw[2] = v; | |
213 coefs[i].uw[3] = v; | |
214 } | |
215 | |
216 pxor_r2r(mm7, mm7); | |
217 s = src; | |
218 while (dst_width >= 4) { | |
219 movq_m2r(s[0 * wrap], mm0); | |
220 punpcklbw_r2r(mm7, mm0); | |
221 movq_m2r(s[1 * wrap], mm1); | |
222 punpcklbw_r2r(mm7, mm1); | |
223 movq_m2r(s[2 * wrap], mm2); | |
224 punpcklbw_r2r(mm7, mm2); | |
225 movq_m2r(s[3 * wrap], mm3); | |
226 punpcklbw_r2r(mm7, mm3); | |
227 | |
228 pmullw_m2r(coefs[0], mm0); | |
229 pmullw_m2r(coefs[1], mm1); | |
230 pmullw_m2r(coefs[2], mm2); | |
231 pmullw_m2r(coefs[3], mm3); | |
232 | |
233 paddw_r2r(mm1, mm0); | |
234 paddw_r2r(mm3, mm2); | |
235 paddw_r2r(mm2, mm0); | |
236 psraw_i2r(FILTER_BITS, mm0); | |
237 | |
238 packuswb_r2r(mm7, mm0); | |
239 movq_r2m(mm0, tmp); | |
240 | |
241 *(UINT32 *)dst = tmp.ud[0]; | |
242 dst += 4; | |
243 s += 4; | |
244 dst_width -= 4; | |
245 } | |
246 while (dst_width > 0) { | |
247 sum = s[0 * wrap] * filter[0] + | |
248 s[1 * wrap] * filter[1] + | |
249 s[2 * wrap] * filter[2] + | |
250 s[3 * wrap] * filter[3]; | |
251 sum = sum >> FILTER_BITS; | |
252 if (sum < 0) | |
253 sum = 0; | |
254 else if (sum > 255) | |
255 sum = 255; | |
256 dst[0] = sum; | |
257 dst++; | |
258 s++; | |
259 dst_width--; | |
260 } | |
261 emms(); | |
262 } | |
263 #endif | |
264 | |
265 /* slow version to handle limit cases. Does not need optimisation */ | |
266 static void h_resample_slow(UINT8 *dst, int dst_width, UINT8 *src, int src_width, | |
267 int src_start, int src_incr, INT16 *filters) | |
268 { | |
269 int src_pos, phase, sum, j, v, i; | |
270 UINT8 *s, *src_end; | |
271 INT16 *filter; | |
272 | |
273 src_end = src + src_width; | |
274 src_pos = src_start; | |
275 for(i=0;i<dst_width;i++) { | |
276 s = src + (src_pos >> POS_FRAC_BITS); | |
277 phase = get_phase(src_pos); | |
278 filter = filters + phase * NB_TAPS; | |
279 sum = 0; | |
280 for(j=0;j<NB_TAPS;j++) { | |
281 if (s < src) | |
282 v = src[0]; | |
283 else if (s >= src_end) | |
284 v = src_end[-1]; | |
285 else | |
286 v = s[0]; | |
287 sum += v * filter[j]; | |
288 s++; | |
289 } | |
290 sum = sum >> FILTER_BITS; | |
291 if (sum < 0) | |
292 sum = 0; | |
293 else if (sum > 255) | |
294 sum = 255; | |
295 dst[0] = sum; | |
296 src_pos += src_incr; | |
297 dst++; | |
298 } | |
299 } | |
300 | |
301 static void h_resample(UINT8 *dst, int dst_width, UINT8 *src, int src_width, | |
302 int src_start, int src_incr, INT16 *filters) | |
303 { | |
304 int n, src_end; | |
305 | |
306 if (src_start < 0) { | |
307 n = (0 - src_start + src_incr - 1) / src_incr; | |
308 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters); | |
309 dst += n; | |
310 dst_width -= n; | |
311 src_start += n * src_incr; | |
312 } | |
313 src_end = src_start + dst_width * src_incr; | |
314 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) { | |
315 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / | |
316 src_incr; | |
317 } else { | |
318 n = dst_width; | |
319 } | |
320 #ifdef CONFIG_MMX | |
321 if ((mm_flags & MM_MMX) && NB_TAPS == 4) | |
322 h_resample_fast4_mmx(dst, n, | |
323 src, src_width, src_start, src_incr, filters); | |
324 else | |
325 #endif | |
326 h_resample_fast(dst, n, | |
327 src, src_width, src_start, src_incr, filters); | |
328 if (n < dst_width) { | |
329 dst += n; | |
330 dst_width -= n; | |
331 src_start += n * src_incr; | |
332 h_resample_slow(dst, dst_width, | |
333 src, src_width, src_start, src_incr, filters); | |
334 } | |
335 } | |
336 | |
337 static void component_resample(ImgReSampleContext *s, | |
338 UINT8 *output, int owrap, int owidth, int oheight, | |
339 UINT8 *input, int iwrap, int iwidth, int iheight) | |
340 { | |
341 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y; | |
342 UINT8 *new_line, *src_line; | |
343 | |
344 last_src_y = - FCENTER - 1; | |
345 /* position of the bottom of the filter in the source image */ | |
346 src_y = (last_src_y + NB_TAPS) * POS_FRAC; | |
347 ring_y = NB_TAPS; /* position in ring buffer */ | |
348 for(y=0;y<oheight;y++) { | |
349 /* apply horizontal filter on new lines from input if needed */ | |
350 src_y1 = src_y >> POS_FRAC_BITS; | |
351 while (last_src_y < src_y1) { | |
352 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS) | |
353 ring_y = NB_TAPS; | |
354 last_src_y++; | |
355 /* handle limit conditions : replicate line (slighly | |
356 inefficient because we filter multiple times */ | |
357 y1 = last_src_y; | |
358 if (y1 < 0) { | |
359 y1 = 0; | |
360 } else if (y1 >= iheight) { | |
361 y1 = iheight - 1; | |
362 } | |
363 src_line = input + y1 * iwrap; | |
364 new_line = s->line_buf + ring_y * owidth; | |
365 /* apply filter and handle limit cases correctly */ | |
366 h_resample(new_line, owidth, | |
367 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, | |
368 &s->h_filters[0][0]); | |
369 /* handle ring buffer wraping */ | |
370 if (ring_y >= LINE_BUF_HEIGHT) { | |
371 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth, | |
372 new_line, owidth); | |
373 } | |
374 } | |
375 /* apply vertical filter */ | |
376 phase_y = get_phase(src_y); | |
377 #ifdef CONFIG_MMX | |
378 /* desactivated MMX because loss of precision */ | |
379 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0) | |
380 v_resample4_mmx(output, owidth, | |
381 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, | |
382 &s->v_filters[phase_y][0]); | |
383 else | |
384 #endif | |
385 v_resample(output, owidth, | |
386 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, | |
387 &s->v_filters[phase_y][0]); | |
388 | |
389 src_y += s->v_incr; | |
390 output += owrap; | |
391 } | |
392 } | |
393 | |
394 /* XXX: the following filter is quite naive, but it seems to suffice | |
395 for 4 taps */ | |
396 static void build_filter(INT16 *filter, float factor) | |
397 { | |
398 int ph, i, v; | |
399 float x, y, tab[NB_TAPS], norm, mult; | |
400 | |
401 /* if upsampling, only need to interpolate, no filter */ | |
402 if (factor > 1.0) | |
403 factor = 1.0; | |
404 | |
405 for(ph=0;ph<NB_PHASES;ph++) { | |
406 norm = 0; | |
407 for(i=0;i<NB_TAPS;i++) { | |
408 | |
409 x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor; | |
410 if (x == 0) | |
411 y = 1.0; | |
412 else | |
413 y = sin(x) / x; | |
414 tab[i] = y; | |
415 norm += y; | |
416 } | |
417 | |
418 /* normalize so that an uniform color remains the same */ | |
419 mult = (float)(1 << FILTER_BITS) / norm; | |
420 for(i=0;i<NB_TAPS;i++) { | |
421 v = (int)(tab[i] * mult); | |
422 filter[ph * NB_TAPS + i] = v; | |
423 } | |
424 } | |
425 } | |
426 | |
427 ImgReSampleContext *img_resample_init(int owidth, int oheight, | |
428 int iwidth, int iheight) | |
429 { | |
430 ImgReSampleContext *s; | |
431 | |
432 s = av_mallocz(sizeof(ImgReSampleContext)); | |
433 if (!s) | |
434 return NULL; | |
435 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS)); | |
436 if (!s->line_buf) | |
437 goto fail; | |
438 | |
439 s->owidth = owidth; | |
440 s->oheight = oheight; | |
441 s->iwidth = iwidth; | |
442 s->iheight = iheight; | |
443 | |
444 s->h_incr = (iwidth * POS_FRAC) / owidth; | |
445 s->v_incr = (iheight * POS_FRAC) / oheight; | |
446 | |
447 build_filter(&s->h_filters[0][0], (float)owidth / (float)iwidth); | |
448 build_filter(&s->v_filters[0][0], (float)oheight / (float)iheight); | |
449 | |
450 return s; | |
451 fail: | |
452 free(s); | |
453 return NULL; | |
454 } | |
455 | |
456 void img_resample(ImgReSampleContext *s, | |
457 AVPicture *output, AVPicture *input) | |
458 { | |
459 int i, shift; | |
460 | |
461 for(i=0;i<3;i++) { | |
462 shift = (i == 0) ? 0 : 1; | |
463 component_resample(s, output->data[i], output->linesize[i], | |
464 s->owidth >> shift, s->oheight >> shift, | |
465 input->data[i], input->linesize[i], | |
466 s->iwidth >> shift, s->iheight >> shift); | |
467 } | |
468 } | |
469 | |
470 void img_resample_close(ImgReSampleContext *s) | |
471 { | |
472 free(s->line_buf); | |
473 free(s); | |
474 } | |
475 | |
476 #ifdef TEST | |
477 | |
478 void *av_mallocz(int size) | |
479 { | |
480 void *ptr; | |
481 ptr = malloc(size); | |
482 memset(ptr, 0, size); | |
483 return ptr; | |
484 } | |
485 | |
486 /* input */ | |
487 #define XSIZE 256 | |
488 #define YSIZE 256 | |
489 UINT8 img[XSIZE * YSIZE]; | |
490 | |
491 /* output */ | |
492 #define XSIZE1 512 | |
493 #define YSIZE1 512 | |
494 UINT8 img1[XSIZE1 * YSIZE1]; | |
495 UINT8 img2[XSIZE1 * YSIZE1]; | |
496 | |
497 void save_pgm(const char *filename, UINT8 *img, int xsize, int ysize) | |
498 { | |
499 FILE *f; | |
500 f=fopen(filename,"w"); | |
501 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255); | |
502 fwrite(img,1, xsize * ysize,f); | |
503 fclose(f); | |
504 } | |
505 | |
506 static void dump_filter(INT16 *filter) | |
507 { | |
508 int i, ph; | |
509 | |
510 for(ph=0;ph<NB_PHASES;ph++) { | |
511 printf("%2d: ", ph); | |
512 for(i=0;i<NB_TAPS;i++) { | |
513 printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0); | |
514 } | |
515 printf("\n"); | |
516 } | |
517 } | |
518 | |
519 #ifdef CONFIG_MMX | |
520 int mm_flags; | |
521 #endif | |
522 | |
523 int main(int argc, char **argv) | |
524 { | |
525 int x, y, v, i, xsize, ysize; | |
526 ImgReSampleContext *s; | |
527 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 }; | |
528 char buf[256]; | |
529 | |
530 /* build test image */ | |
531 for(y=0;y<YSIZE;y++) { | |
532 for(x=0;x<XSIZE;x++) { | |
533 if (x < XSIZE/2 && y < YSIZE/2) { | |
534 if (x < XSIZE/4 && y < YSIZE/4) { | |
535 if ((x % 10) <= 6 && | |
536 (y % 10) <= 6) | |
537 v = 0xff; | |
538 else | |
539 v = 0x00; | |
540 } else if (x < XSIZE/4) { | |
541 if (x & 1) | |
542 v = 0xff; | |
543 else | |
544 v = 0; | |
545 } else if (y < XSIZE/4) { | |
546 if (y & 1) | |
547 v = 0xff; | |
548 else | |
549 v = 0; | |
550 } else { | |
551 if (y < YSIZE*3/8) { | |
552 if ((y+x) & 1) | |
553 v = 0xff; | |
554 else | |
555 v = 0; | |
556 } else { | |
557 if (((x+3) % 4) <= 1 && | |
558 ((y+3) % 4) <= 1) | |
559 v = 0xff; | |
560 else | |
561 v = 0x00; | |
562 } | |
563 } | |
564 } else if (x < XSIZE/2) { | |
565 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2); | |
566 } else if (y < XSIZE/2) { | |
567 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2); | |
568 } else { | |
569 v = ((x + y - XSIZE) * 255) / XSIZE; | |
570 } | |
571 img[y * XSIZE + x] = v; | |
572 } | |
573 } | |
574 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE); | |
575 for(i=0;i<sizeof(factors)/sizeof(float);i++) { | |
576 fact = factors[i]; | |
577 xsize = (int)(XSIZE * fact); | |
578 ysize = (int)(YSIZE * fact); | |
579 s = img_resample_init(xsize, ysize, XSIZE, YSIZE); | |
580 printf("Factor=%0.2f\n", fact); | |
581 dump_filter(&s->h_filters[0][0]); | |
582 component_resample(s, img1, xsize, xsize, ysize, | |
583 img, XSIZE, XSIZE, YSIZE); | |
584 img_resample_close(s); | |
585 | |
586 sprintf(buf, "/tmp/out%d.pgm", i); | |
587 save_pgm(buf, img1, xsize, ysize); | |
588 } | |
589 | |
590 /* mmx test */ | |
591 #ifdef CONFIG_MMX | |
592 printf("MMX test\n"); | |
593 fact = 0.72; | |
594 xsize = (int)(XSIZE * fact); | |
595 ysize = (int)(YSIZE * fact); | |
596 mm_flags = MM_MMX; | |
597 s = img_resample_init(xsize, ysize, XSIZE, YSIZE); | |
598 component_resample(s, img1, xsize, xsize, ysize, | |
599 img, XSIZE, XSIZE, YSIZE); | |
600 | |
601 mm_flags = 0; | |
602 s = img_resample_init(xsize, ysize, XSIZE, YSIZE); | |
603 component_resample(s, img2, xsize, xsize, ysize, | |
604 img, XSIZE, XSIZE, YSIZE); | |
605 if (memcmp(img1, img2, xsize * ysize) != 0) { | |
606 fprintf(stderr, "mmx error\n"); | |
607 exit(1); | |
608 } | |
609 printf("MMX OK\n"); | |
610 #endif | |
611 return 0; | |
612 } | |
613 | |
614 #endif |