Mercurial > mplayer.hg
annotate libmpcodecs/vf_gradfun.c @ 31824:e26b1c667fd8
Add const to avoid warnings.
The const on the return type is not correct compared to the real win32 API
functions, but that really does not matter for us, avoiding the warning is
more useful.
author | reimar |
---|---|
date | Mon, 02 Aug 2010 17:32:42 +0000 |
parents | 8b6727d2d479 |
children | b4ce15212bfc |
rev | line source |
---|---|
29371 | 1 /* |
2 * Copyright (C) 2009 Loren Merritt <lorenm@u.washignton.edu> | |
3 * | |
4 * This file is part of MPlayer. | |
5 * | |
6 * MPlayer is free software; you can redistribute it and/or modify | |
7 * it under the terms of the GNU General Public License as published by | |
8 * the Free Software Foundation; either version 2 of the License, or | |
9 * (at your option) any later version. | |
10 * | |
11 * MPlayer is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 * GNU General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU General Public License along | |
17 * with MPlayer; if not, write to the Free Software Foundation, Inc., | |
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
19 */ | |
20 | |
21 /* | |
22 * Debanding algorithm (from gradfun2db by prunedtree): | |
23 * Boxblur. | |
24 * Foreach pixel, if it's within threshold of the blurred value, make it closer. | |
25 * So now we have a smoothed and higher bitdepth version of all the shallow | |
26 * gradients, while leaving detailed areas untouched. | |
27 * Dither it back to 8bit. | |
28 */ | |
29 | |
30 #include <stdio.h> | |
31 #include <stdlib.h> | |
32 #include <string.h> | |
33 #include <inttypes.h> | |
34 | |
35 #include "config.h" | |
36 #include "cpudetect.h" | |
37 #include "img_format.h" | |
38 #include "mp_image.h" | |
39 #include "vf.h" | |
40 #include "libvo/fastmemcpy.h" | |
41 #include "libavutil/avutil.h" | |
42 #include "libavutil/x86_cpu.h" | |
43 | |
44 struct vf_priv_s { | |
45 int thresh; | |
46 int radius; | |
47 uint16_t *buf; | |
48 void (*filter_line)(uint8_t *dst, uint8_t *src, uint16_t *dc, | |
49 int width, int thresh, const uint16_t *dithers); | |
50 void (*blur_line)(uint16_t *dc, uint16_t *buf, uint16_t *buf1, | |
51 uint8_t *src, int sstride, int width); | |
52 }; | |
53 | |
54 static const uint16_t __attribute__((aligned(16))) pw_7f[8] = {127,127,127,127,127,127,127,127}; | |
55 static const uint16_t __attribute__((aligned(16))) pw_ff[8] = {255,255,255,255,255,255,255,255}; | |
56 static const uint16_t __attribute__((aligned(16))) dither[8][8] = { | |
57 { 0, 96, 24,120, 6,102, 30,126 }, | |
58 { 64, 32, 88, 56, 70, 38, 94, 62 }, | |
59 { 16,112, 8,104, 22,118, 14,110 }, | |
60 { 80, 48, 72, 40, 86, 54, 78, 46 }, | |
61 { 4,100, 28,124, 2, 98, 26,122 }, | |
62 { 68, 36, 92, 60, 66, 34, 90, 58 }, | |
63 { 20,116, 12,108, 18,114, 10,106 }, | |
64 { 84, 52, 76, 44, 82, 50, 74, 42 }, | |
65 }; | |
66 | |
67 static void filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, | |
68 int width, int thresh, const uint16_t *dithers) | |
69 { | |
70 int x; | |
71 for (x=0; x<width; x++, dc+=x&1) { | |
72 int pix = src[x]<<7; | |
73 int delta = dc[0] - pix; | |
74 int m = abs(delta) * thresh >> 16; | |
75 m = FFMAX(0, 127-m); | |
76 m = m*m*delta >> 14; | |
77 pix += m + dithers[x&7]; | |
78 dst[x] = av_clip_uint8(pix>>7); | |
79 } | |
80 } | |
81 | |
82 static void blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, | |
83 uint8_t *src, int sstride, int width) | |
84 { | |
85 int x, v, old; | |
86 for (x=0; x<width; x++) { | |
87 v = buf1[x] + src[2*x] + src[2*x+1] + src[2*x+sstride] + src[2*x+1+sstride]; | |
88 old = buf[x]; | |
89 buf[x] = v; | |
90 dc[x] = v - old; | |
91 } | |
92 } | |
93 | |
30976
6955998c187e
Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents:
30920
diff
changeset
|
94 #if HAVE_MMX2 |
29371 | 95 static void filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, |
96 int width, int thresh, const uint16_t *dithers) | |
97 { | |
98 intptr_t x; | |
99 if (width&3) { | |
100 x = width&~3; | |
101 filter_line_c(dst+x, src+x, dc+x/2, width-x, thresh, dithers); | |
102 width = x; | |
103 } | |
104 x = -width; | |
30920 | 105 __asm__ volatile( |
29371 | 106 "movd %4, %%mm5 \n" |
107 "pxor %%mm7, %%mm7 \n" | |
108 "pshufw $0, %%mm5, %%mm5 \n" | |
109 "movq %6, %%mm6 \n" | |
110 "movq %5, %%mm4 \n" | |
111 "1: \n" | |
112 "movd (%2,%0), %%mm0 \n" | |
113 "movd (%3,%0), %%mm1 \n" | |
114 "punpcklbw %%mm7, %%mm0 \n" | |
115 "punpcklwd %%mm1, %%mm1 \n" | |
116 "psllw $7, %%mm0 \n" | |
117 "pxor %%mm2, %%mm2 \n" | |
118 "psubw %%mm0, %%mm1 \n" // delta = dc - pix | |
119 "psubw %%mm1, %%mm2 \n" | |
120 "pmaxsw %%mm1, %%mm2 \n" | |
121 "pmulhuw %%mm5, %%mm2 \n" // m = abs(delta) * thresh >> 16 | |
122 "psubw %%mm6, %%mm2 \n" | |
123 "pminsw %%mm7, %%mm2 \n" // m = -max(0, 127-m) | |
124 "pmullw %%mm2, %%mm2 \n" | |
125 "paddw %%mm4, %%mm0 \n" // pix += dither | |
126 "pmulhw %%mm2, %%mm1 \n" | |
127 "psllw $2, %%mm1 \n" // m = m*m*delta >> 14 | |
128 "paddw %%mm1, %%mm0 \n" // pix += m | |
129 "psraw $7, %%mm0 \n" | |
130 "packuswb %%mm0, %%mm0 \n" | |
131 "movd %%mm0, (%1,%0) \n" // dst = clip(pix>>7) | |
132 "add $4, %0 \n" | |
133 "jl 1b \n" | |
134 "emms \n" | |
135 :"+r"(x) | |
136 :"r"(dst+width), "r"(src+width), "r"(dc+width/2), | |
137 "rm"(thresh), "m"(*dithers), "m"(*pw_7f) | |
138 :"memory" | |
139 ); | |
140 } | |
30976
6955998c187e
Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents:
30920
diff
changeset
|
141 #endif |
29371 | 142 |
30976
6955998c187e
Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents:
30920
diff
changeset
|
143 #if HAVE_SSSE3 |
29371 | 144 static void filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, |
145 int width, int thresh, const uint16_t *dithers) | |
146 { | |
147 intptr_t x; | |
148 if (width&7) { | |
149 // could be 10% faster if I somehow eliminated this | |
150 x = width&~7; | |
151 filter_line_c(dst+x, src+x, dc+x/2, width-x, thresh, dithers); | |
152 width = x; | |
153 } | |
154 x = -width; | |
30920 | 155 __asm__ volatile( |
29371 | 156 "movd %4, %%xmm5 \n" |
157 "pxor %%xmm7, %%xmm7 \n" | |
158 "pshuflw $0,%%xmm5, %%xmm5 \n" | |
159 "movdqa %6, %%xmm6 \n" | |
160 "punpcklqdq %%xmm5, %%xmm5 \n" | |
161 "movdqa %5, %%xmm4 \n" | |
162 "1: \n" | |
163 "movq (%2,%0), %%xmm0 \n" | |
164 "movq (%3,%0), %%xmm1 \n" | |
165 "punpcklbw %%xmm7, %%xmm0 \n" | |
166 "punpcklwd %%xmm1, %%xmm1 \n" | |
167 "psllw $7, %%xmm0 \n" | |
168 "psubw %%xmm0, %%xmm1 \n" // delta = dc - pix | |
169 "pabsw %%xmm1, %%xmm2 \n" | |
170 "pmulhuw %%xmm5, %%xmm2 \n" // m = abs(delta) * thresh >> 16 | |
171 "psubw %%xmm6, %%xmm2 \n" | |
172 "pminsw %%xmm7, %%xmm2 \n" // m = -max(0, 127-m) | |
173 "pmullw %%xmm2, %%xmm2 \n" | |
174 "psllw $1, %%xmm2 \n" | |
175 "paddw %%xmm4, %%xmm0 \n" // pix += dither | |
176 "pmulhrsw %%xmm2, %%xmm1 \n" // m = m*m*delta >> 14 | |
177 "paddw %%xmm1, %%xmm0 \n" // pix += m | |
178 "psraw $7, %%xmm0 \n" | |
179 "packuswb %%xmm0, %%xmm0 \n" | |
180 "movq %%xmm0, (%1,%0) \n" // dst = clip(pix>>7) | |
181 "add $8, %0 \n" | |
182 "jl 1b \n" | |
183 :"+&r"(x) | |
184 :"r"(dst+width), "r"(src+width), "r"(dc+width/2), | |
185 "rm"(thresh), "m"(*dithers), "m"(*pw_7f) | |
186 :"memory" | |
187 ); | |
188 } | |
30976
6955998c187e
Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents:
30920
diff
changeset
|
189 #endif // HAVE_SSSE3 |
29371 | 190 |
30983
8b6727d2d479
cosmetics: Reorder some x86-related preprocessor conditionals.
diego
parents:
30976
diff
changeset
|
191 #if HAVE_SSE2 && HAVE_6REGS |
29371 | 192 #define BLURV(load)\ |
193 intptr_t x = -2*width;\ | |
30920 | 194 __asm__ volatile(\ |
29371 | 195 "movdqa %6, %%xmm7 \n"\ |
196 "1: \n"\ | |
197 load" (%4,%0), %%xmm0 \n"\ | |
198 load" (%5,%0), %%xmm1 \n"\ | |
199 "movdqa %%xmm0, %%xmm2 \n"\ | |
200 "movdqa %%xmm1, %%xmm3 \n"\ | |
201 "psrlw $8, %%xmm0 \n"\ | |
202 "psrlw $8, %%xmm1 \n"\ | |
203 "pand %%xmm7, %%xmm2 \n"\ | |
204 "pand %%xmm7, %%xmm3 \n"\ | |
205 "paddw %%xmm1, %%xmm0 \n"\ | |
206 "paddw %%xmm3, %%xmm2 \n"\ | |
207 "paddw %%xmm2, %%xmm0 \n"\ | |
208 "paddw (%2,%0), %%xmm0 \n"\ | |
209 "movdqa (%1,%0), %%xmm1 \n"\ | |
210 "movdqa %%xmm0, (%1,%0) \n"\ | |
211 "psubw %%xmm1, %%xmm0 \n"\ | |
212 "movdqa %%xmm0, (%3,%0) \n"\ | |
213 "add $16, %0 \n"\ | |
214 "jl 1b \n"\ | |
215 :"+&r"(x)\ | |
216 :"r"(buf+width),\ | |
217 "r"(buf1+width),\ | |
218 "r"(dc+width),\ | |
219 "r"(src+width*2),\ | |
220 "r"(src+width*2+sstride),\ | |
221 "m"(*pw_ff)\ | |
222 :"memory"\ | |
223 ); | |
224 | |
225 static void blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, | |
226 uint8_t *src, int sstride, int width) | |
227 { | |
228 if (((intptr_t)src|sstride)&15) { | |
229 BLURV("movdqu"); | |
230 } else { | |
231 BLURV("movdqa"); | |
232 } | |
233 } | |
30976
6955998c187e
Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents:
30920
diff
changeset
|
234 #endif // HAVE_6REGS && HAVE_SSE2 |
29371 | 235 |
236 static void filter(struct vf_priv_s *ctx, uint8_t *dst, uint8_t *src, | |
237 int width, int height, int dstride, int sstride, int r) | |
238 { | |
239 int bstride = ((width+15)&~15)/2; | |
240 int y; | |
241 uint32_t dc_factor = (1<<21)/(r*r); | |
242 uint16_t *dc = ctx->buf+16; | |
243 uint16_t *buf = ctx->buf+bstride+32; | |
244 int thresh = ctx->thresh; | |
245 | |
246 memset(dc, 0, (bstride+16)*sizeof(*buf)); | |
247 for (y=0; y<r; y++) | |
248 ctx->blur_line(dc, buf+y*bstride, buf+(y-1)*bstride, src+2*y*sstride, sstride, width/2); | |
249 for (;;) { | |
250 if (y < height-r) { | |
251 int mod = ((y+r)/2)%r; | |
252 uint16_t *buf0 = buf+mod*bstride; | |
253 uint16_t *buf1 = buf+(mod?mod-1:r-1)*bstride; | |
254 int x, v; | |
255 ctx->blur_line(dc, buf0, buf1, src+(y+r)*sstride, sstride, width/2); | |
256 for (x=v=0; x<r; x++) | |
257 v += dc[x]; | |
258 for (; x<width/2; x++) { | |
259 v += dc[x] - dc[x-r]; | |
260 dc[x-r] = v * dc_factor >> 16; | |
261 } | |
262 for (; x<(width+r+1)/2; x++) | |
263 dc[x-r] = v * dc_factor >> 16; | |
264 for (x=-r/2; x<0; x++) | |
265 dc[x] = dc[0]; | |
266 } | |
267 if (y == r) { | |
268 for (y=0; y<r; y++) | |
269 ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]); | |
270 } | |
271 ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]); | |
272 if (++y >= height) break; | |
273 ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]); | |
274 if (++y >= height) break; | |
275 } | |
276 } | |
277 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
278 static void get_image(struct vf_instance *vf, mp_image_t *mpi) |
29371 | 279 { |
280 if (mpi->flags&MP_IMGFLAG_PRESERVE) return; // don't change | |
281 // ok, we can do pp in-place: | |
282 vf->dmpi = vf_get_image(vf->next, mpi->imgfmt, | |
283 mpi->type, mpi->flags, mpi->width, mpi->height); | |
284 mpi->planes[0] = vf->dmpi->planes[0]; | |
285 mpi->stride[0] = vf->dmpi->stride[0]; | |
286 mpi->width = vf->dmpi->width; | |
287 if (mpi->flags&MP_IMGFLAG_PLANAR){ | |
288 mpi->planes[1] = vf->dmpi->planes[1]; | |
289 mpi->planes[2] = vf->dmpi->planes[2]; | |
290 mpi->stride[1] = vf->dmpi->stride[1]; | |
291 mpi->stride[2] = vf->dmpi->stride[2]; | |
292 } | |
293 mpi->flags |= MP_IMGFLAG_DIRECT; | |
294 } | |
295 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
296 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts) |
29371 | 297 { |
298 mp_image_t *dmpi = vf->dmpi; | |
299 int p; | |
300 | |
301 if (!(mpi->flags&MP_IMGFLAG_DIRECT)) { | |
302 // no DR, so get a new image. hope we'll get DR buffer: | |
303 dmpi = vf_get_image(vf->next,mpi->imgfmt, MP_IMGTYPE_TEMP, | |
304 MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE, | |
305 mpi->w, mpi->h); | |
306 } | |
307 vf_clone_mpi_attributes(dmpi, mpi); | |
308 | |
309 for (p=0; p<mpi->num_planes; p++) { | |
310 int w = mpi->w; | |
311 int h = mpi->h; | |
312 int r = vf->priv->radius; | |
313 if (p) { | |
314 w >>= mpi->chroma_x_shift; | |
315 h >>= mpi->chroma_y_shift; | |
316 r = ((r>>mpi->chroma_x_shift) + (r>>mpi->chroma_y_shift)) / 2; | |
317 r = av_clip((r+1)&~1,4,32); | |
318 } | |
319 if (FFMIN(w,h) > 2*r) | |
320 filter(vf->priv, dmpi->planes[p], mpi->planes[p], w, h, | |
321 dmpi->stride[p], mpi->stride[p], r); | |
322 else if (dmpi->planes[p] != mpi->planes[p]) | |
323 memcpy_pic(dmpi->planes[p], mpi->planes[p], w, h, | |
324 dmpi->stride[p], mpi->stride[p]); | |
325 } | |
326 | |
327 return vf_next_put_image(vf, dmpi, pts); | |
328 } | |
329 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
330 static int query_format(struct vf_instance *vf, unsigned int fmt) |
29371 | 331 { |
332 switch (fmt){ | |
333 case IMGFMT_YVU9: | |
334 case IMGFMT_IF09: | |
335 case IMGFMT_YV12: | |
336 case IMGFMT_I420: | |
337 case IMGFMT_IYUV: | |
338 case IMGFMT_CLPL: | |
339 case IMGFMT_Y800: | |
340 case IMGFMT_Y8: | |
341 case IMGFMT_NV12: | |
342 case IMGFMT_NV21: | |
343 case IMGFMT_444P: | |
344 case IMGFMT_422P: | |
345 case IMGFMT_411P: | |
346 case IMGFMT_HM12: | |
347 return vf_next_query_format(vf,fmt); | |
348 } | |
349 return 0; | |
350 } | |
351 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
352 static int config(struct vf_instance *vf, |
29371 | 353 int width, int height, int d_width, int d_height, |
354 unsigned int flags, unsigned int outfmt) | |
355 { | |
356 free(vf->priv->buf); | |
357 vf->priv->buf = av_mallocz((((width+15)&~15)*(vf->priv->radius+1)/2+32)*sizeof(uint16_t)); | |
358 return vf_next_config(vf,width,height,d_width,d_height,flags,outfmt); | |
359 } | |
360 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
361 static void uninit(struct vf_instance *vf) |
29371 | 362 { |
363 if (!vf->priv) return; | |
364 av_free(vf->priv->buf); | |
365 free(vf->priv); | |
366 vf->priv = NULL; | |
367 } | |
368 | |
30638
a7b908875c14
Rename open() vf initialization function to vf_open().
diego
parents:
29371
diff
changeset
|
369 static int vf_open(vf_instance_t *vf, char *args) |
29371 | 370 { |
371 float thresh = 1.2; | |
372 int radius = 16; | |
373 | |
374 vf->get_image=get_image; | |
375 vf->put_image=put_image; | |
376 vf->query_format=query_format; | |
377 vf->config=config; | |
378 vf->uninit=uninit; | |
379 vf->priv=malloc(sizeof(struct vf_priv_s)); | |
380 memset(vf->priv, 0, sizeof(struct vf_priv_s)); | |
381 | |
382 if (args) sscanf(args, "%f:%d", &thresh, &radius); | |
383 vf->priv->thresh = (1<<15)/av_clipf(thresh,0.51,255); | |
384 vf->priv->radius = av_clip((radius+1)&~1,4,32); | |
385 | |
386 vf->priv->blur_line = blur_line_c; | |
387 vf->priv->filter_line = filter_line_c; | |
30983
8b6727d2d479
cosmetics: Reorder some x86-related preprocessor conditionals.
diego
parents:
30976
diff
changeset
|
388 #if HAVE_SSE2 && HAVE_6REGS |
29371 | 389 if (gCpuCaps.hasSSE2) |
390 vf->priv->blur_line = blur_line_sse2; | |
391 #endif | |
30976
6955998c187e
Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents:
30920
diff
changeset
|
392 #if HAVE_MMX2 |
29371 | 393 if (gCpuCaps.hasMMX2) |
394 vf->priv->filter_line = filter_line_mmx2; | |
30976
6955998c187e
Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents:
30920
diff
changeset
|
395 #endif |
6955998c187e
Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents:
30920
diff
changeset
|
396 #if HAVE_SSSE3 |
29371 | 397 if (gCpuCaps.hasSSSE3) |
398 vf->priv->filter_line = filter_line_ssse3; | |
399 #endif | |
400 | |
401 return 1; | |
402 } | |
403 | |
404 const vf_info_t vf_info_gradfun = { | |
405 "gradient deband", | |
406 "gradfun", | |
407 "Loren Merritt", | |
408 "", | |
30638
a7b908875c14
Rename open() vf initialization function to vf_open().
diego
parents:
29371
diff
changeset
|
409 vf_open, |
29371 | 410 NULL |
411 }; |