annotate libmpcodecs/vf_gradfun.c @ 31824:e26b1c667fd8

Add const to avoid warnings. The const on the return type is not correct compared to the real win32 API functions, but that really does not matter for us, avoiding the warning is more useful.
author reimar
date Mon, 02 Aug 2010 17:32:42 +0000
parents 8b6727d2d479
children b4ce15212bfc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
1 /*
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
2 * Copyright (C) 2009 Loren Merritt <lorenm@u.washignton.edu>
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
3 *
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
4 * This file is part of MPlayer.
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
5 *
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
6 * MPlayer is free software; you can redistribute it and/or modify
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
7 * it under the terms of the GNU General Public License as published by
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
8 * the Free Software Foundation; either version 2 of the License, or
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
9 * (at your option) any later version.
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
10 *
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
11 * MPlayer is distributed in the hope that it will be useful,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
14 * GNU General Public License for more details.
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
15 *
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
16 * You should have received a copy of the GNU General Public License along
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
17 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
19 */
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
20
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
21 /*
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
22 * Debanding algorithm (from gradfun2db by prunedtree):
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
23 * Boxblur.
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
24 * Foreach pixel, if it's within threshold of the blurred value, make it closer.
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
25 * So now we have a smoothed and higher bitdepth version of all the shallow
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
26 * gradients, while leaving detailed areas untouched.
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
27 * Dither it back to 8bit.
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
28 */
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
29
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
30 #include <stdio.h>
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
31 #include <stdlib.h>
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
32 #include <string.h>
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
33 #include <inttypes.h>
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
34
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
35 #include "config.h"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
36 #include "cpudetect.h"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
37 #include "img_format.h"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
38 #include "mp_image.h"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
39 #include "vf.h"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
40 #include "libvo/fastmemcpy.h"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
41 #include "libavutil/avutil.h"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
42 #include "libavutil/x86_cpu.h"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
43
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
44 struct vf_priv_s {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
45 int thresh;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
46 int radius;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
47 uint16_t *buf;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
48 void (*filter_line)(uint8_t *dst, uint8_t *src, uint16_t *dc,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
49 int width, int thresh, const uint16_t *dithers);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
50 void (*blur_line)(uint16_t *dc, uint16_t *buf, uint16_t *buf1,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
51 uint8_t *src, int sstride, int width);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
52 };
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
53
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
54 static const uint16_t __attribute__((aligned(16))) pw_7f[8] = {127,127,127,127,127,127,127,127};
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
55 static const uint16_t __attribute__((aligned(16))) pw_ff[8] = {255,255,255,255,255,255,255,255};
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
56 static const uint16_t __attribute__((aligned(16))) dither[8][8] = {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
57 { 0, 96, 24,120, 6,102, 30,126 },
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
58 { 64, 32, 88, 56, 70, 38, 94, 62 },
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
59 { 16,112, 8,104, 22,118, 14,110 },
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
60 { 80, 48, 72, 40, 86, 54, 78, 46 },
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
61 { 4,100, 28,124, 2, 98, 26,122 },
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
62 { 68, 36, 92, 60, 66, 34, 90, 58 },
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
63 { 20,116, 12,108, 18,114, 10,106 },
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
64 { 84, 52, 76, 44, 82, 50, 74, 42 },
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
65 };
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
66
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
67 static void filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
68 int width, int thresh, const uint16_t *dithers)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
69 {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
70 int x;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
71 for (x=0; x<width; x++, dc+=x&1) {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
72 int pix = src[x]<<7;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
73 int delta = dc[0] - pix;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
74 int m = abs(delta) * thresh >> 16;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
75 m = FFMAX(0, 127-m);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
76 m = m*m*delta >> 14;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
77 pix += m + dithers[x&7];
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
78 dst[x] = av_clip_uint8(pix>>7);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
79 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
80 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
81
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
82 static void blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
83 uint8_t *src, int sstride, int width)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
84 {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
85 int x, v, old;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
86 for (x=0; x<width; x++) {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
87 v = buf1[x] + src[2*x] + src[2*x+1] + src[2*x+sstride] + src[2*x+1+sstride];
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
88 old = buf[x];
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
89 buf[x] = v;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
90 dc[x] = v - old;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
91 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
92 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
93
30976
6955998c187e Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents: 30920
diff changeset
94 #if HAVE_MMX2
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
95 static void filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
96 int width, int thresh, const uint16_t *dithers)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
97 {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
98 intptr_t x;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
99 if (width&3) {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
100 x = width&~3;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
101 filter_line_c(dst+x, src+x, dc+x/2, width-x, thresh, dithers);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
102 width = x;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
103 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
104 x = -width;
30920
570e001f2e53 Convert asm keyword to __asm__.
diego
parents: 30642
diff changeset
105 __asm__ volatile(
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
106 "movd %4, %%mm5 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
107 "pxor %%mm7, %%mm7 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
108 "pshufw $0, %%mm5, %%mm5 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
109 "movq %6, %%mm6 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
110 "movq %5, %%mm4 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
111 "1: \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
112 "movd (%2,%0), %%mm0 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
113 "movd (%3,%0), %%mm1 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
114 "punpcklbw %%mm7, %%mm0 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
115 "punpcklwd %%mm1, %%mm1 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
116 "psllw $7, %%mm0 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
117 "pxor %%mm2, %%mm2 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
118 "psubw %%mm0, %%mm1 \n" // delta = dc - pix
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
119 "psubw %%mm1, %%mm2 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
120 "pmaxsw %%mm1, %%mm2 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
121 "pmulhuw %%mm5, %%mm2 \n" // m = abs(delta) * thresh >> 16
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
122 "psubw %%mm6, %%mm2 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
123 "pminsw %%mm7, %%mm2 \n" // m = -max(0, 127-m)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
124 "pmullw %%mm2, %%mm2 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
125 "paddw %%mm4, %%mm0 \n" // pix += dither
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
126 "pmulhw %%mm2, %%mm1 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
127 "psllw $2, %%mm1 \n" // m = m*m*delta >> 14
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
128 "paddw %%mm1, %%mm0 \n" // pix += m
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
129 "psraw $7, %%mm0 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
130 "packuswb %%mm0, %%mm0 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
131 "movd %%mm0, (%1,%0) \n" // dst = clip(pix>>7)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
132 "add $4, %0 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
133 "jl 1b \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
134 "emms \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
135 :"+r"(x)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
136 :"r"(dst+width), "r"(src+width), "r"(dc+width/2),
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
137 "rm"(thresh), "m"(*dithers), "m"(*pw_7f)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
138 :"memory"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
139 );
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
140 }
30976
6955998c187e Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents: 30920
diff changeset
141 #endif
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
142
30976
6955998c187e Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents: 30920
diff changeset
143 #if HAVE_SSSE3
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
144 static void filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
145 int width, int thresh, const uint16_t *dithers)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
146 {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
147 intptr_t x;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
148 if (width&7) {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
149 // could be 10% faster if I somehow eliminated this
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
150 x = width&~7;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
151 filter_line_c(dst+x, src+x, dc+x/2, width-x, thresh, dithers);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
152 width = x;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
153 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
154 x = -width;
30920
570e001f2e53 Convert asm keyword to __asm__.
diego
parents: 30642
diff changeset
155 __asm__ volatile(
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
156 "movd %4, %%xmm5 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
157 "pxor %%xmm7, %%xmm7 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
158 "pshuflw $0,%%xmm5, %%xmm5 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
159 "movdqa %6, %%xmm6 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
160 "punpcklqdq %%xmm5, %%xmm5 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
161 "movdqa %5, %%xmm4 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
162 "1: \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
163 "movq (%2,%0), %%xmm0 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
164 "movq (%3,%0), %%xmm1 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
165 "punpcklbw %%xmm7, %%xmm0 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
166 "punpcklwd %%xmm1, %%xmm1 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
167 "psllw $7, %%xmm0 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
168 "psubw %%xmm0, %%xmm1 \n" // delta = dc - pix
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
169 "pabsw %%xmm1, %%xmm2 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
170 "pmulhuw %%xmm5, %%xmm2 \n" // m = abs(delta) * thresh >> 16
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
171 "psubw %%xmm6, %%xmm2 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
172 "pminsw %%xmm7, %%xmm2 \n" // m = -max(0, 127-m)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
173 "pmullw %%xmm2, %%xmm2 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
174 "psllw $1, %%xmm2 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
175 "paddw %%xmm4, %%xmm0 \n" // pix += dither
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
176 "pmulhrsw %%xmm2, %%xmm1 \n" // m = m*m*delta >> 14
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
177 "paddw %%xmm1, %%xmm0 \n" // pix += m
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
178 "psraw $7, %%xmm0 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
179 "packuswb %%xmm0, %%xmm0 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
180 "movq %%xmm0, (%1,%0) \n" // dst = clip(pix>>7)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
181 "add $8, %0 \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
182 "jl 1b \n"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
183 :"+&r"(x)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
184 :"r"(dst+width), "r"(src+width), "r"(dc+width/2),
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
185 "rm"(thresh), "m"(*dithers), "m"(*pw_7f)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
186 :"memory"
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
187 );
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
188 }
30976
6955998c187e Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents: 30920
diff changeset
189 #endif // HAVE_SSSE3
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
190
30983
8b6727d2d479 cosmetics: Reorder some x86-related preprocessor conditionals.
diego
parents: 30976
diff changeset
191 #if HAVE_SSE2 && HAVE_6REGS
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
192 #define BLURV(load)\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
193 intptr_t x = -2*width;\
30920
570e001f2e53 Convert asm keyword to __asm__.
diego
parents: 30642
diff changeset
194 __asm__ volatile(\
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
195 "movdqa %6, %%xmm7 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
196 "1: \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
197 load" (%4,%0), %%xmm0 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
198 load" (%5,%0), %%xmm1 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
199 "movdqa %%xmm0, %%xmm2 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
200 "movdqa %%xmm1, %%xmm3 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
201 "psrlw $8, %%xmm0 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
202 "psrlw $8, %%xmm1 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
203 "pand %%xmm7, %%xmm2 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
204 "pand %%xmm7, %%xmm3 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
205 "paddw %%xmm1, %%xmm0 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
206 "paddw %%xmm3, %%xmm2 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
207 "paddw %%xmm2, %%xmm0 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
208 "paddw (%2,%0), %%xmm0 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
209 "movdqa (%1,%0), %%xmm1 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
210 "movdqa %%xmm0, (%1,%0) \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
211 "psubw %%xmm1, %%xmm0 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
212 "movdqa %%xmm0, (%3,%0) \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
213 "add $16, %0 \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
214 "jl 1b \n"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
215 :"+&r"(x)\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
216 :"r"(buf+width),\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
217 "r"(buf1+width),\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
218 "r"(dc+width),\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
219 "r"(src+width*2),\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
220 "r"(src+width*2+sstride),\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
221 "m"(*pw_ff)\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
222 :"memory"\
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
223 );
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
224
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
225 static void blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
226 uint8_t *src, int sstride, int width)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
227 {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
228 if (((intptr_t)src|sstride)&15) {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
229 BLURV("movdqu");
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
230 } else {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
231 BLURV("movdqa");
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
232 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
233 }
30976
6955998c187e Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents: 30920
diff changeset
234 #endif // HAVE_6REGS && HAVE_SSE2
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
235
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
236 static void filter(struct vf_priv_s *ctx, uint8_t *dst, uint8_t *src,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
237 int width, int height, int dstride, int sstride, int r)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
238 {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
239 int bstride = ((width+15)&~15)/2;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
240 int y;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
241 uint32_t dc_factor = (1<<21)/(r*r);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
242 uint16_t *dc = ctx->buf+16;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
243 uint16_t *buf = ctx->buf+bstride+32;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
244 int thresh = ctx->thresh;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
245
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
246 memset(dc, 0, (bstride+16)*sizeof(*buf));
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
247 for (y=0; y<r; y++)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
248 ctx->blur_line(dc, buf+y*bstride, buf+(y-1)*bstride, src+2*y*sstride, sstride, width/2);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
249 for (;;) {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
250 if (y < height-r) {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
251 int mod = ((y+r)/2)%r;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
252 uint16_t *buf0 = buf+mod*bstride;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
253 uint16_t *buf1 = buf+(mod?mod-1:r-1)*bstride;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
254 int x, v;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
255 ctx->blur_line(dc, buf0, buf1, src+(y+r)*sstride, sstride, width/2);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
256 for (x=v=0; x<r; x++)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
257 v += dc[x];
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
258 for (; x<width/2; x++) {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
259 v += dc[x] - dc[x-r];
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
260 dc[x-r] = v * dc_factor >> 16;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
261 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
262 for (; x<(width+r+1)/2; x++)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
263 dc[x-r] = v * dc_factor >> 16;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
264 for (x=-r/2; x<0; x++)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
265 dc[x] = dc[0];
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
266 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
267 if (y == r) {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
268 for (y=0; y<r; y++)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
269 ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
270 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
271 ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
272 if (++y >= height) break;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
273 ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
274 if (++y >= height) break;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
275 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
276 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
277
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
278 static void get_image(struct vf_instance *vf, mp_image_t *mpi)
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
279 {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
280 if (mpi->flags&MP_IMGFLAG_PRESERVE) return; // don't change
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
281 // ok, we can do pp in-place:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
282 vf->dmpi = vf_get_image(vf->next, mpi->imgfmt,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
283 mpi->type, mpi->flags, mpi->width, mpi->height);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
284 mpi->planes[0] = vf->dmpi->planes[0];
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
285 mpi->stride[0] = vf->dmpi->stride[0];
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
286 mpi->width = vf->dmpi->width;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
287 if (mpi->flags&MP_IMGFLAG_PLANAR){
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
288 mpi->planes[1] = vf->dmpi->planes[1];
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
289 mpi->planes[2] = vf->dmpi->planes[2];
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
290 mpi->stride[1] = vf->dmpi->stride[1];
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
291 mpi->stride[2] = vf->dmpi->stride[2];
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
292 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
293 mpi->flags |= MP_IMGFLAG_DIRECT;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
294 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
295
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
296 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
297 {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
298 mp_image_t *dmpi = vf->dmpi;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
299 int p;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
300
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
301 if (!(mpi->flags&MP_IMGFLAG_DIRECT)) {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
302 // no DR, so get a new image. hope we'll get DR buffer:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
303 dmpi = vf_get_image(vf->next,mpi->imgfmt, MP_IMGTYPE_TEMP,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
304 MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
305 mpi->w, mpi->h);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
306 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
307 vf_clone_mpi_attributes(dmpi, mpi);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
308
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
309 for (p=0; p<mpi->num_planes; p++) {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
310 int w = mpi->w;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
311 int h = mpi->h;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
312 int r = vf->priv->radius;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
313 if (p) {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
314 w >>= mpi->chroma_x_shift;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
315 h >>= mpi->chroma_y_shift;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
316 r = ((r>>mpi->chroma_x_shift) + (r>>mpi->chroma_y_shift)) / 2;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
317 r = av_clip((r+1)&~1,4,32);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
318 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
319 if (FFMIN(w,h) > 2*r)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
320 filter(vf->priv, dmpi->planes[p], mpi->planes[p], w, h,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
321 dmpi->stride[p], mpi->stride[p], r);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
322 else if (dmpi->planes[p] != mpi->planes[p])
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
323 memcpy_pic(dmpi->planes[p], mpi->planes[p], w, h,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
324 dmpi->stride[p], mpi->stride[p]);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
325 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
326
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
327 return vf_next_put_image(vf, dmpi, pts);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
328 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
329
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
330 static int query_format(struct vf_instance *vf, unsigned int fmt)
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
331 {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
332 switch (fmt){
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
333 case IMGFMT_YVU9:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
334 case IMGFMT_IF09:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
335 case IMGFMT_YV12:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
336 case IMGFMT_I420:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
337 case IMGFMT_IYUV:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
338 case IMGFMT_CLPL:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
339 case IMGFMT_Y800:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
340 case IMGFMT_Y8:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
341 case IMGFMT_NV12:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
342 case IMGFMT_NV21:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
343 case IMGFMT_444P:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
344 case IMGFMT_422P:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
345 case IMGFMT_411P:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
346 case IMGFMT_HM12:
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
347 return vf_next_query_format(vf,fmt);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
348 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
349 return 0;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
350 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
351
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
352 static int config(struct vf_instance *vf,
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
353 int width, int height, int d_width, int d_height,
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
354 unsigned int flags, unsigned int outfmt)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
355 {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
356 free(vf->priv->buf);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
357 vf->priv->buf = av_mallocz((((width+15)&~15)*(vf->priv->radius+1)/2+32)*sizeof(uint16_t));
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
358 return vf_next_config(vf,width,height,d_width,d_height,flags,outfmt);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
359 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
360
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
361 static void uninit(struct vf_instance *vf)
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
362 {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
363 if (!vf->priv) return;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
364 av_free(vf->priv->buf);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
365 free(vf->priv);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
366 vf->priv = NULL;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
367 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
368
30638
a7b908875c14 Rename open() vf initialization function to vf_open().
diego
parents: 29371
diff changeset
369 static int vf_open(vf_instance_t *vf, char *args)
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
370 {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
371 float thresh = 1.2;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
372 int radius = 16;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
373
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
374 vf->get_image=get_image;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
375 vf->put_image=put_image;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
376 vf->query_format=query_format;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
377 vf->config=config;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
378 vf->uninit=uninit;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
379 vf->priv=malloc(sizeof(struct vf_priv_s));
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
380 memset(vf->priv, 0, sizeof(struct vf_priv_s));
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
381
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
382 if (args) sscanf(args, "%f:%d", &thresh, &radius);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
383 vf->priv->thresh = (1<<15)/av_clipf(thresh,0.51,255);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
384 vf->priv->radius = av_clip((radius+1)&~1,4,32);
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
385
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
386 vf->priv->blur_line = blur_line_c;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
387 vf->priv->filter_line = filter_line_c;
30983
8b6727d2d479 cosmetics: Reorder some x86-related preprocessor conditionals.
diego
parents: 30976
diff changeset
388 #if HAVE_SSE2 && HAVE_6REGS
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
389 if (gCpuCaps.hasSSE2)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
390 vf->priv->blur_line = blur_line_sse2;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
391 #endif
30976
6955998c187e Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents: 30920
diff changeset
392 #if HAVE_MMX2
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
393 if (gCpuCaps.hasMMX2)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
394 vf->priv->filter_line = filter_line_mmx2;
30976
6955998c187e Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents: 30920
diff changeset
395 #endif
6955998c187e Change ifdefs to make more sense: HAVE_SSSE3 should only be around SSSE3-code,
reimar
parents: 30920
diff changeset
396 #if HAVE_SSSE3
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
397 if (gCpuCaps.hasSSSE3)
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
398 vf->priv->filter_line = filter_line_ssse3;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
399 #endif
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
400
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
401 return 1;
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
402 }
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
403
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
404 const vf_info_t vf_info_gradfun = {
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
405 "gradient deband",
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
406 "gradfun",
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
407 "Loren Merritt",
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
408 "",
30638
a7b908875c14 Rename open() vf initialization function to vf_open().
diego
parents: 29371
diff changeset
409 vf_open,
29371
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
410 NULL
3fa15eca924e new debanding filter
lorenm
parents:
diff changeset
411 };