Mercurial > libavcodec.hg
annotate imgresample.c @ 2892:41315d0120b3 libavcodec
replace a few mov + psrlq with pshufw, there are more cases which could benefit from this but they would require us to duplicate some functions ...
the trick is from various places (my own code in libpostproc, a patch on the x264 list, ...)
author | michael |
---|---|
date | Wed, 21 Sep 2005 21:17:09 +0000 |
parents | 40765c51a7a9 |
children | 84404858c529 |
rev | line source |
---|---|
0 | 1 /* |
2 * High quality image resampling with polyphase filters | |
429 | 3 * Copyright (c) 2001 Fabrice Bellard. |
0 | 4 * |
429 | 5 * This library is free software; you can redistribute it and/or |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
0 | 9 * |
429 | 10 * This library is distributed in the hope that it will be useful, |
0 | 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 * Lesser General Public License for more details. | |
0 | 14 * |
429 | 15 * You should have received a copy of the GNU Lesser General Public |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
0 | 18 */ |
1106 | 19 |
20 /** | |
21 * @file imgresample.c | |
22 * High quality image resampling with polyphase filters . | |
23 */ | |
24 | |
396
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
18
diff
changeset
|
25 #include "avcodec.h" |
0 | 26 #include "dsputil.h" |
27 | |
17 | 28 #ifdef USE_FASTMEMCPY |
29 #include "fastmemcpy.h" | |
30 #endif | |
31 | |
0 | 32 #define NB_COMPONENTS 3 |
33 | |
34 #define PHASE_BITS 4 | |
35 #define NB_PHASES (1 << PHASE_BITS) | |
36 #define NB_TAPS 4 | |
37 #define FCENTER 1 /* index of the center of the filter */ | |
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
38 //#define TEST 1 /* Test it */ |
0 | 39 |
40 #define POS_FRAC_BITS 16 | |
41 #define POS_FRAC (1 << POS_FRAC_BITS) | |
42 /* 6 bits precision is needed for MMX */ | |
43 #define FILTER_BITS 8 | |
44 | |
45 #define LINE_BUF_HEIGHT (NB_TAPS * 4) | |
46 | |
47 struct ImgReSampleContext { | |
1928
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
48 int iwidth, iheight, owidth, oheight; |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
49 int topBand, bottomBand, leftBand, rightBand; |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
50 int padtop, padbottom, padleft, padright; |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
51 int pad_owidth, pad_oheight; |
0 | 52 int h_incr, v_incr; |
1064 | 53 int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */ |
54 int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */ | |
55 uint8_t *line_buf; | |
0 | 56 }; |
57 | |
2082
3dc9bbe1b152
polyphase kaiser windowed sinc and blackman nuttall windowed sinc audio resample filters
michael
parents:
2064
diff
changeset
|
58 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type); |
3dc9bbe1b152
polyphase kaiser windowed sinc and blackman nuttall windowed sinc audio resample filters
michael
parents:
2064
diff
changeset
|
59 |
0 | 60 static inline int get_phase(int pos) |
61 { | |
62 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1); | |
63 } | |
64 | |
65 /* This function must be optimized */ | |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
66 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src, |
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
67 int src_width, int src_start, int src_incr, |
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
68 int16_t *filters) |
0 | 69 { |
70 int src_pos, phase, sum, i; | |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
71 const uint8_t *s; |
1064 | 72 int16_t *filter; |
0 | 73 |
74 src_pos = src_start; | |
75 for(i=0;i<dst_width;i++) { | |
76 #ifdef TEST | |
77 /* test */ | |
78 if ((src_pos >> POS_FRAC_BITS) < 0 || | |
79 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS)) | |
653 | 80 av_abort(); |
0 | 81 #endif |
82 s = src + (src_pos >> POS_FRAC_BITS); | |
83 phase = get_phase(src_pos); | |
84 filter = filters + phase * NB_TAPS; | |
85 #if NB_TAPS == 4 | |
86 sum = s[0] * filter[0] + | |
87 s[1] * filter[1] + | |
88 s[2] * filter[2] + | |
89 s[3] * filter[3]; | |
90 #else | |
91 { | |
92 int j; | |
93 sum = 0; | |
94 for(j=0;j<NB_TAPS;j++) | |
95 sum += s[j] * filter[j]; | |
96 } | |
97 #endif | |
98 sum = sum >> FILTER_BITS; | |
99 if (sum < 0) | |
100 sum = 0; | |
101 else if (sum > 255) | |
102 sum = 255; | |
103 dst[0] = sum; | |
104 src_pos += src_incr; | |
105 dst++; | |
106 } | |
107 } | |
108 | |
109 /* This function must be optimized */ | |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
110 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src, |
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
111 int wrap, int16_t *filter) |
0 | 112 { |
113 int sum, i; | |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
114 const uint8_t *s; |
0 | 115 |
116 s = src; | |
117 for(i=0;i<dst_width;i++) { | |
118 #if NB_TAPS == 4 | |
119 sum = s[0 * wrap] * filter[0] + | |
120 s[1 * wrap] * filter[1] + | |
121 s[2 * wrap] * filter[2] + | |
122 s[3 * wrap] * filter[3]; | |
123 #else | |
124 { | |
125 int j; | |
1064 | 126 uint8_t *s1 = s; |
0 | 127 |
128 sum = 0; | |
129 for(j=0;j<NB_TAPS;j++) { | |
130 sum += s1[0] * filter[j]; | |
131 s1 += wrap; | |
132 } | |
133 } | |
134 #endif | |
135 sum = sum >> FILTER_BITS; | |
136 if (sum < 0) | |
137 sum = 0; | |
138 else if (sum > 255) | |
139 sum = 255; | |
140 dst[0] = sum; | |
141 dst++; | |
142 s++; | |
143 } | |
144 } | |
145 | |
2 | 146 #ifdef HAVE_MMX |
0 | 147 |
148 #include "i386/mmx.h" | |
149 | |
150 #define FILTER4(reg) \ | |
151 {\ | |
152 s = src + (src_pos >> POS_FRAC_BITS);\ | |
153 phase = get_phase(src_pos);\ | |
154 filter = filters + phase * NB_TAPS;\ | |
155 movq_m2r(*s, reg);\ | |
156 punpcklbw_r2r(mm7, reg);\ | |
157 movq_m2r(*filter, mm6);\ | |
158 pmaddwd_r2r(reg, mm6);\ | |
159 movq_r2r(mm6, reg);\ | |
160 psrlq_i2r(32, reg);\ | |
161 paddd_r2r(mm6, reg);\ | |
162 psrad_i2r(FILTER_BITS, reg);\ | |
163 src_pos += src_incr;\ | |
164 } | |
165 | |
166 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); | |
167 | |
168 /* XXX: do four pixels at a time */ | |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
169 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, |
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
170 const uint8_t *src, int src_width, |
1064 | 171 int src_start, int src_incr, int16_t *filters) |
0 | 172 { |
173 int src_pos, phase; | |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
174 const uint8_t *s; |
1064 | 175 int16_t *filter; |
0 | 176 mmx_t tmp; |
177 | |
178 src_pos = src_start; | |
179 pxor_r2r(mm7, mm7); | |
180 | |
181 while (dst_width >= 4) { | |
182 | |
183 FILTER4(mm0); | |
184 FILTER4(mm1); | |
185 FILTER4(mm2); | |
186 FILTER4(mm3); | |
187 | |
188 packuswb_r2r(mm7, mm0); | |
189 packuswb_r2r(mm7, mm1); | |
190 packuswb_r2r(mm7, mm3); | |
191 packuswb_r2r(mm7, mm2); | |
192 movq_r2m(mm0, tmp); | |
193 dst[0] = tmp.ub[0]; | |
194 movq_r2m(mm1, tmp); | |
195 dst[1] = tmp.ub[0]; | |
196 movq_r2m(mm2, tmp); | |
197 dst[2] = tmp.ub[0]; | |
198 movq_r2m(mm3, tmp); | |
199 dst[3] = tmp.ub[0]; | |
200 dst += 4; | |
201 dst_width -= 4; | |
202 } | |
203 while (dst_width > 0) { | |
204 FILTER4(mm0); | |
205 packuswb_r2r(mm7, mm0); | |
206 movq_r2m(mm0, tmp); | |
207 dst[0] = tmp.ub[0]; | |
208 dst++; | |
209 dst_width--; | |
210 } | |
211 emms(); | |
212 } | |
213 | |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
214 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, |
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
215 int wrap, int16_t *filter) |
0 | 216 { |
217 int sum, i, v; | |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
218 const uint8_t *s; |
0 | 219 mmx_t tmp; |
220 mmx_t coefs[4]; | |
221 | |
222 for(i=0;i<4;i++) { | |
223 v = filter[i]; | |
224 coefs[i].uw[0] = v; | |
225 coefs[i].uw[1] = v; | |
226 coefs[i].uw[2] = v; | |
227 coefs[i].uw[3] = v; | |
228 } | |
229 | |
230 pxor_r2r(mm7, mm7); | |
231 s = src; | |
232 while (dst_width >= 4) { | |
233 movq_m2r(s[0 * wrap], mm0); | |
234 punpcklbw_r2r(mm7, mm0); | |
235 movq_m2r(s[1 * wrap], mm1); | |
236 punpcklbw_r2r(mm7, mm1); | |
237 movq_m2r(s[2 * wrap], mm2); | |
238 punpcklbw_r2r(mm7, mm2); | |
239 movq_m2r(s[3 * wrap], mm3); | |
240 punpcklbw_r2r(mm7, mm3); | |
241 | |
242 pmullw_m2r(coefs[0], mm0); | |
243 pmullw_m2r(coefs[1], mm1); | |
244 pmullw_m2r(coefs[2], mm2); | |
245 pmullw_m2r(coefs[3], mm3); | |
246 | |
247 paddw_r2r(mm1, mm0); | |
248 paddw_r2r(mm3, mm2); | |
249 paddw_r2r(mm2, mm0); | |
250 psraw_i2r(FILTER_BITS, mm0); | |
251 | |
252 packuswb_r2r(mm7, mm0); | |
253 movq_r2m(mm0, tmp); | |
254 | |
1064 | 255 *(uint32_t *)dst = tmp.ud[0]; |
0 | 256 dst += 4; |
257 s += 4; | |
258 dst_width -= 4; | |
259 } | |
260 while (dst_width > 0) { | |
261 sum = s[0 * wrap] * filter[0] + | |
262 s[1 * wrap] * filter[1] + | |
263 s[2 * wrap] * filter[2] + | |
264 s[3 * wrap] * filter[3]; | |
265 sum = sum >> FILTER_BITS; | |
266 if (sum < 0) | |
267 sum = 0; | |
268 else if (sum > 255) | |
269 sum = 255; | |
270 dst[0] = sum; | |
271 dst++; | |
272 s++; | |
273 dst_width--; | |
274 } | |
275 emms(); | |
276 } | |
277 #endif | |
278 | |
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
279 #ifdef HAVE_ALTIVEC |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
280 typedef union { |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
281 vector unsigned char v; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
282 unsigned char c[16]; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
283 } vec_uc_t; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
284 |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
285 typedef union { |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
286 vector signed short v; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
287 signed short s[8]; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
288 } vec_ss_t; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
289 |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
290 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, |
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
291 int wrap, int16_t *filter) |
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
292 { |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
293 int sum, i; |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
294 const uint8_t *s; |
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
295 vector unsigned char *tv, tmp, dstv, zero; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
296 vec_ss_t srchv[4], srclv[4], fv[4]; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
297 vector signed short zeros, sumhv, sumlv; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
298 s = src; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
299 |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
300 for(i=0;i<4;i++) |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
301 { |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
302 /* |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
303 The vec_madds later on does an implicit >>15 on the result. |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
304 Since FILTER_BITS is 8, and we have 15 bits of magnitude in |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
305 a signed short, we have just enough bits to pre-shift our |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
306 filter constants <<7 to compensate for vec_madds. |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
307 */ |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
308 fv[i].s[0] = filter[i] << (15-FILTER_BITS); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
309 fv[i].v = vec_splat(fv[i].v, 0); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
310 } |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
311 |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
312 zero = vec_splat_u8(0); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
313 zeros = vec_splat_s16(0); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
314 |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
315 |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
316 /* |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
317 When we're resampling, we'd ideally like both our input buffers, |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
318 and output buffers to be 16-byte aligned, so we can do both aligned |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
319 reads and writes. Sadly we can't always have this at the moment, so |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
320 we opt for aligned writes, as unaligned writes have a huge overhead. |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
321 To do this, do enough scalar resamples to get dst 16-byte aligned. |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
322 */ |
898
6d5e3fe7aea1
Simplify an expression and eliminate a compile warning
philipjsg
parents:
894
diff
changeset
|
323 i = (-(int)dst) & 0xf; |
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
324 while(i>0) { |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
325 sum = s[0 * wrap] * filter[0] + |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
326 s[1 * wrap] * filter[1] + |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
327 s[2 * wrap] * filter[2] + |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
328 s[3 * wrap] * filter[3]; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
329 sum = sum >> FILTER_BITS; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
330 if (sum<0) sum = 0; else if (sum>255) sum=255; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
331 dst[0] = sum; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
332 dst++; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
333 s++; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
334 dst_width--; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
335 i--; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
336 } |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
337 |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
338 /* Do our altivec resampling on 16 pixels at once. */ |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
339 while(dst_width>=16) { |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
340 /* |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
341 Read 16 (potentially unaligned) bytes from each of |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
342 4 lines into 4 vectors, and split them into shorts. |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
343 Interleave the multipy/accumulate for the resample |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
344 filter with the loads to hide the 3 cycle latency |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
345 the vec_madds have. |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
346 */ |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
347 tv = (vector unsigned char *) &s[0 * wrap]; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
348 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
349 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
350 srclv[0].v = (vector signed short) vec_mergel(zero, tmp); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
351 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
352 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
353 |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
354 tv = (vector unsigned char *) &s[1 * wrap]; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
355 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
356 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
357 srclv[1].v = (vector signed short) vec_mergel(zero, tmp); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
358 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
359 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
360 |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
361 tv = (vector unsigned char *) &s[2 * wrap]; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
362 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
363 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
364 srclv[2].v = (vector signed short) vec_mergel(zero, tmp); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
365 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
366 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
367 |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
368 tv = (vector unsigned char *) &s[3 * wrap]; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
369 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
370 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
371 srclv[3].v = (vector signed short) vec_mergel(zero, tmp); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
372 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
373 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
374 |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
375 /* |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
376 Pack the results into our destination vector, |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
377 and do an aligned write of that back to memory. |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
378 */ |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
379 dstv = vec_packsu(sumhv, sumlv) ; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
380 vec_st(dstv, 0, (vector unsigned char *) dst); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
381 |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
382 dst+=16; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
383 s+=16; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
384 dst_width-=16; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
385 } |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
386 |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
387 /* |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
388 If there are any leftover pixels, resample them |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
389 with the slow scalar method. |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
390 */ |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
391 while(dst_width>0) { |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
392 sum = s[0 * wrap] * filter[0] + |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
393 s[1 * wrap] * filter[1] + |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
394 s[2 * wrap] * filter[2] + |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
395 s[3 * wrap] * filter[3]; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
396 sum = sum >> FILTER_BITS; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
397 if (sum<0) sum = 0; else if (sum>255) sum=255; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
398 dst[0] = sum; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
399 dst++; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
400 s++; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
401 dst_width--; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
402 } |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
403 } |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
404 #endif |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
405 |
0 | 406 /* slow version to handle limit cases. Does not need optimisation */ |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
407 static void h_resample_slow(uint8_t *dst, int dst_width, |
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
408 const uint8_t *src, int src_width, |
1064 | 409 int src_start, int src_incr, int16_t *filters) |
0 | 410 { |
411 int src_pos, phase, sum, j, v, i; | |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
412 const uint8_t *s, *src_end; |
1064 | 413 int16_t *filter; |
0 | 414 |
415 src_end = src + src_width; | |
416 src_pos = src_start; | |
417 for(i=0;i<dst_width;i++) { | |
418 s = src + (src_pos >> POS_FRAC_BITS); | |
419 phase = get_phase(src_pos); | |
420 filter = filters + phase * NB_TAPS; | |
421 sum = 0; | |
422 for(j=0;j<NB_TAPS;j++) { | |
423 if (s < src) | |
424 v = src[0]; | |
425 else if (s >= src_end) | |
426 v = src_end[-1]; | |
427 else | |
428 v = s[0]; | |
429 sum += v * filter[j]; | |
430 s++; | |
431 } | |
432 sum = sum >> FILTER_BITS; | |
433 if (sum < 0) | |
434 sum = 0; | |
435 else if (sum > 255) | |
436 sum = 255; | |
437 dst[0] = sum; | |
438 src_pos += src_incr; | |
439 dst++; | |
440 } | |
441 } | |
442 | |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
443 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src, |
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
444 int src_width, int src_start, int src_incr, |
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
445 int16_t *filters) |
0 | 446 { |
447 int n, src_end; | |
448 | |
449 if (src_start < 0) { | |
450 n = (0 - src_start + src_incr - 1) / src_incr; | |
451 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters); | |
452 dst += n; | |
453 dst_width -= n; | |
454 src_start += n * src_incr; | |
455 } | |
456 src_end = src_start + dst_width * src_incr; | |
457 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) { | |
458 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / | |
459 src_incr; | |
460 } else { | |
461 n = dst_width; | |
462 } | |
2 | 463 #ifdef HAVE_MMX |
0 | 464 if ((mm_flags & MM_MMX) && NB_TAPS == 4) |
465 h_resample_fast4_mmx(dst, n, | |
466 src, src_width, src_start, src_incr, filters); | |
467 else | |
468 #endif | |
469 h_resample_fast(dst, n, | |
470 src, src_width, src_start, src_incr, filters); | |
471 if (n < dst_width) { | |
472 dst += n; | |
473 dst_width -= n; | |
474 src_start += n * src_incr; | |
475 h_resample_slow(dst, dst_width, | |
476 src, src_width, src_start, src_incr, filters); | |
477 } | |
478 } | |
479 | |
480 static void component_resample(ImgReSampleContext *s, | |
1064 | 481 uint8_t *output, int owrap, int owidth, int oheight, |
482 uint8_t *input, int iwrap, int iwidth, int iheight) | |
0 | 483 { |
484 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y; | |
1064 | 485 uint8_t *new_line, *src_line; |
0 | 486 |
487 last_src_y = - FCENTER - 1; | |
488 /* position of the bottom of the filter in the source image */ | |
489 src_y = (last_src_y + NB_TAPS) * POS_FRAC; | |
490 ring_y = NB_TAPS; /* position in ring buffer */ | |
491 for(y=0;y<oheight;y++) { | |
492 /* apply horizontal filter on new lines from input if needed */ | |
493 src_y1 = src_y >> POS_FRAC_BITS; | |
494 while (last_src_y < src_y1) { | |
495 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS) | |
496 ring_y = NB_TAPS; | |
497 last_src_y++; | |
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
498 /* handle limit conditions : replicate line (slightly |
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
499 inefficient because we filter multiple times) */ |
0 | 500 y1 = last_src_y; |
501 if (y1 < 0) { | |
502 y1 = 0; | |
503 } else if (y1 >= iheight) { | |
504 y1 = iheight - 1; | |
505 } | |
506 src_line = input + y1 * iwrap; | |
507 new_line = s->line_buf + ring_y * owidth; | |
508 /* apply filter and handle limit cases correctly */ | |
509 h_resample(new_line, owidth, | |
510 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, | |
511 &s->h_filters[0][0]); | |
512 /* handle ring buffer wraping */ | |
513 if (ring_y >= LINE_BUF_HEIGHT) { | |
514 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth, | |
515 new_line, owidth); | |
516 } | |
517 } | |
518 /* apply vertical filter */ | |
519 phase_y = get_phase(src_y); | |
2 | 520 #ifdef HAVE_MMX |
0 | 521 /* desactivated MMX because loss of precision */ |
522 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0) | |
523 v_resample4_mmx(output, owidth, | |
524 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, | |
525 &s->v_filters[phase_y][0]); | |
526 else | |
527 #endif | |
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
528 #ifdef HAVE_ALTIVEC |
920
a0ad8e3452f2
practically disabling altivec resampling code (some ppl said its broken) patch by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
898
diff
changeset
|
529 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6) |
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
530 v_resample16_altivec(output, owidth, |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
531 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
532 &s->v_filters[phase_y][0]); |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
533 else |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
653
diff
changeset
|
534 #endif |
0 | 535 v_resample(output, owidth, |
536 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, | |
537 &s->v_filters[phase_y][0]); | |
538 | |
539 src_y += s->v_incr; | |
1928
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
540 |
0 | 541 output += owrap; |
542 } | |
543 } | |
544 | |
545 ImgReSampleContext *img_resample_init(int owidth, int oheight, | |
546 int iwidth, int iheight) | |
547 { | |
1928
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
548 return img_resample_full_init(owidth, oheight, iwidth, iheight, |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
549 0, 0, 0, 0, 0, 0, 0, 0); |
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
550 } |
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
551 |
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
552 ImgReSampleContext *img_resample_full_init(int owidth, int oheight, |
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
553 int iwidth, int iheight, |
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
554 int topBand, int bottomBand, |
1928
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
555 int leftBand, int rightBand, |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
556 int padtop, int padbottom, |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
557 int padleft, int padright) |
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
558 { |
0 | 559 ImgReSampleContext *s; |
560 | |
561 s = av_mallocz(sizeof(ImgReSampleContext)); | |
562 if (!s) | |
563 return NULL; | |
2422 | 564 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS)) |
565 return NULL; | |
0 | 566 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS)); |
567 if (!s->line_buf) | |
568 goto fail; | |
569 | |
570 s->owidth = owidth; | |
571 s->oheight = oheight; | |
572 s->iwidth = iwidth; | |
573 s->iheight = iheight; | |
1928
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
574 |
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
575 s->topBand = topBand; |
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
576 s->bottomBand = bottomBand; |
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
577 s->leftBand = leftBand; |
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
578 s->rightBand = rightBand; |
0 | 579 |
1928
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
580 s->padtop = padtop; |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
581 s->padbottom = padbottom; |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
582 s->padleft = padleft; |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
583 s->padright = padright; |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
584 |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
585 s->pad_owidth = owidth - (padleft + padright); |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
586 s->pad_oheight = oheight - (padtop + padbottom); |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
587 |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
588 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth; |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
589 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight; |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
590 |
2082
3dc9bbe1b152
polyphase kaiser windowed sinc and blackman nuttall windowed sinc audio resample filters
michael
parents:
2064
diff
changeset
|
591 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth / |
3dc9bbe1b152
polyphase kaiser windowed sinc and blackman nuttall windowed sinc audio resample filters
michael
parents:
2064
diff
changeset
|
592 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0); |
3dc9bbe1b152
polyphase kaiser windowed sinc and blackman nuttall windowed sinc audio resample filters
michael
parents:
2064
diff
changeset
|
593 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight / |
3dc9bbe1b152
polyphase kaiser windowed sinc and blackman nuttall windowed sinc audio resample filters
michael
parents:
2064
diff
changeset
|
594 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0); |
0 | 595 |
596 return s; | |
1928
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
597 fail: |
396
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
18
diff
changeset
|
598 av_free(s); |
0 | 599 return NULL; |
600 } | |
601 | |
602 void img_resample(ImgReSampleContext *s, | |
1488
766a2f4edbea
avcodec const correctness patch by (Drew Hess <dhess at ilm dot com>)
michaelni
parents:
1106
diff
changeset
|
603 AVPicture *output, const AVPicture *input) |
0 | 604 { |
605 int i, shift; | |
1928
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
606 uint8_t* optr; |
0 | 607 |
1928
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
608 for (i=0;i<3;i++) { |
0 | 609 shift = (i == 0) ? 0 : 1; |
1928
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
610 |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
611 optr = output->data[i] + (((output->linesize[i] * |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
612 s->padtop) + s->padleft) >> shift); |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
613 |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
614 component_resample(s, optr, output->linesize[i], |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
615 s->pad_owidth >> shift, s->pad_oheight >> shift, |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
616 input->data[i] + (input->linesize[i] * |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
617 (s->topBand >> shift)) + (s->leftBand >> shift), |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
618 input->linesize[i], ((s->iwidth - s->leftBand - |
0c23a5564489
padding support in ffmpeg patch by (Todd Kirby <doubleshot at pacbell dot net>)
michael
parents:
1488
diff
changeset
|
619 s->rightBand) >> shift), |
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
620 (s->iheight - s->topBand - s->bottomBand) >> shift); |
0 | 621 } |
622 } | |
623 | |
624 void img_resample_close(ImgReSampleContext *s) | |
625 { | |
396
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
18
diff
changeset
|
626 av_free(s->line_buf); |
fce0a2520551
removed useless header includes - use av memory functions
glantau
parents:
18
diff
changeset
|
627 av_free(s); |
0 | 628 } |
629 | |
630 #ifdef TEST | |
2400
17ec73c65748
imgresample test cleanup patch by (Panagiotis Issaris <takis )( lumumba d0t luc d0t ac.be>)
michael
parents:
2082
diff
changeset
|
631 #include <stdio.h> |
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
632 |
0 | 633 /* input */ |
634 #define XSIZE 256 | |
635 #define YSIZE 256 | |
1064 | 636 uint8_t img[XSIZE * YSIZE]; |
0 | 637 |
638 /* output */ | |
639 #define XSIZE1 512 | |
640 #define YSIZE1 512 | |
1064 | 641 uint8_t img1[XSIZE1 * YSIZE1]; |
642 uint8_t img2[XSIZE1 * YSIZE1]; | |
0 | 643 |
1064 | 644 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize) |
0 | 645 { |
2846
40765c51a7a9
Compilation fixes part 1 patch by (Arvind R. and Burkhard Plaum, plaum, ipf uni-stuttgart de)
michael
parents:
2423
diff
changeset
|
646 #undef fprintf |
0 | 647 FILE *f; |
648 f=fopen(filename,"w"); | |
649 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255); | |
650 fwrite(img,1, xsize * ysize,f); | |
651 fclose(f); | |
2846
40765c51a7a9
Compilation fixes part 1 patch by (Arvind R. and Burkhard Plaum, plaum, ipf uni-stuttgart de)
michael
parents:
2423
diff
changeset
|
652 #define fprintf please_use_av_log |
0 | 653 } |
654 | |
1064 | 655 static void dump_filter(int16_t *filter) |
0 | 656 { |
657 int i, ph; | |
658 | |
659 for(ph=0;ph<NB_PHASES;ph++) { | |
2400
17ec73c65748
imgresample test cleanup patch by (Panagiotis Issaris <takis )( lumumba d0t luc d0t ac.be>)
michael
parents:
2082
diff
changeset
|
660 av_log(NULL, AV_LOG_INFO, "%2d: ", ph); |
0 | 661 for(i=0;i<NB_TAPS;i++) { |
2400
17ec73c65748
imgresample test cleanup patch by (Panagiotis Issaris <takis )( lumumba d0t luc d0t ac.be>)
michael
parents:
2082
diff
changeset
|
662 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0); |
0 | 663 } |
2400
17ec73c65748
imgresample test cleanup patch by (Panagiotis Issaris <takis )( lumumba d0t luc d0t ac.be>)
michael
parents:
2082
diff
changeset
|
664 av_log(NULL, AV_LOG_INFO, "\n"); |
0 | 665 } |
666 } | |
667 | |
2 | 668 #ifdef HAVE_MMX |
644 | 669 int mm_flags; |
0 | 670 #endif |
671 | |
672 int main(int argc, char **argv) | |
673 { | |
674 int x, y, v, i, xsize, ysize; | |
675 ImgReSampleContext *s; | |
676 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 }; | |
677 char buf[256]; | |
678 | |
679 /* build test image */ | |
680 for(y=0;y<YSIZE;y++) { | |
681 for(x=0;x<XSIZE;x++) { | |
682 if (x < XSIZE/2 && y < YSIZE/2) { | |
683 if (x < XSIZE/4 && y < YSIZE/4) { | |
684 if ((x % 10) <= 6 && | |
685 (y % 10) <= 6) | |
686 v = 0xff; | |
687 else | |
688 v = 0x00; | |
689 } else if (x < XSIZE/4) { | |
690 if (x & 1) | |
691 v = 0xff; | |
692 else | |
693 v = 0; | |
694 } else if (y < XSIZE/4) { | |
695 if (y & 1) | |
696 v = 0xff; | |
697 else | |
698 v = 0; | |
699 } else { | |
700 if (y < YSIZE*3/8) { | |
701 if ((y+x) & 1) | |
702 v = 0xff; | |
703 else | |
704 v = 0; | |
705 } else { | |
706 if (((x+3) % 4) <= 1 && | |
707 ((y+3) % 4) <= 1) | |
708 v = 0xff; | |
709 else | |
710 v = 0x00; | |
711 } | |
712 } | |
713 } else if (x < XSIZE/2) { | |
714 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2); | |
715 } else if (y < XSIZE/2) { | |
716 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2); | |
717 } else { | |
718 v = ((x + y - XSIZE) * 255) / XSIZE; | |
719 } | |
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
720 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v; |
0 | 721 } |
722 } | |
723 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE); | |
724 for(i=0;i<sizeof(factors)/sizeof(float);i++) { | |
725 fact = factors[i]; | |
726 xsize = (int)(XSIZE * fact); | |
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
727 ysize = (int)((YSIZE - 100) * fact); |
2400
17ec73c65748
imgresample test cleanup patch by (Panagiotis Issaris <takis )( lumumba d0t luc d0t ac.be>)
michael
parents:
2082
diff
changeset
|
728 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0); |
17ec73c65748
imgresample test cleanup patch by (Panagiotis Issaris <takis )( lumumba d0t luc d0t ac.be>)
michael
parents:
2082
diff
changeset
|
729 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact); |
0 | 730 dump_filter(&s->h_filters[0][0]); |
731 component_resample(s, img1, xsize, xsize, ysize, | |
630
b4ee42142ad1
croping patch by (talus25 at speakeasy dot net) with fixes from atmos & me
michaelni
parents:
429
diff
changeset
|
732 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100); |
0 | 733 img_resample_close(s); |
734 | |
2423 | 735 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i); |
0 | 736 save_pgm(buf, img1, xsize, ysize); |
737 } | |
738 | |
739 /* mmx test */ | |
2 | 740 #ifdef HAVE_MMX |
2400
17ec73c65748
imgresample test cleanup patch by (Panagiotis Issaris <takis )( lumumba d0t luc d0t ac.be>)
michael
parents:
2082
diff
changeset
|
741 av_log(NULL, AV_LOG_INFO, "MMX test\n"); |
0 | 742 fact = 0.72; |
743 xsize = (int)(XSIZE * fact); | |
744 ysize = (int)(YSIZE * fact); | |
745 mm_flags = MM_MMX; | |
746 s = img_resample_init(xsize, ysize, XSIZE, YSIZE); | |
747 component_resample(s, img1, xsize, xsize, ysize, | |
748 img, XSIZE, XSIZE, YSIZE); | |
749 | |
750 mm_flags = 0; | |
751 s = img_resample_init(xsize, ysize, XSIZE, YSIZE); | |
752 component_resample(s, img2, xsize, xsize, ysize, | |
753 img, XSIZE, XSIZE, YSIZE); | |
754 if (memcmp(img1, img2, xsize * ysize) != 0) { | |
2400
17ec73c65748
imgresample test cleanup patch by (Panagiotis Issaris <takis )( lumumba d0t luc d0t ac.be>)
michael
parents:
2082
diff
changeset
|
755 av_log(NULL, AV_LOG_ERROR, "mmx error\n"); |
0 | 756 exit(1); |
757 } | |
2400
17ec73c65748
imgresample test cleanup patch by (Panagiotis Issaris <takis )( lumumba d0t luc d0t ac.be>)
michael
parents:
2082
diff
changeset
|
758 av_log(NULL, AV_LOG_INFO, "MMX OK\n"); |
0 | 759 #endif |
760 return 0; | |
761 } | |
762 | |
763 #endif |