Mercurial > mplayer.hg
annotate libmpcodecs/vf_spp.c @ 17197:0ab565f7ed60
Avoid gcc warnings:
'...' might be used uninitialized in this function
In this case 'H', 'N', 'D', and 'F' can indeed be
used unitialized, thus possibly causing all sorts of problems.
Patch by Peter Breitenlohner
author | rathann |
---|---|
date | Thu, 15 Dec 2005 20:39:59 +0000 |
parents | ba9bf9beb23f |
children | 401b440a6d76 |
rev | line source |
---|---|
11277 | 1 /* |
2 Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at> | |
3 | |
4 This program is free software; you can redistribute it and/or modify | |
5 it under the terms of the GNU General Public License as published by | |
6 the Free Software Foundation; either version 2 of the License, or | |
7 (at your option) any later version. | |
8 | |
9 This program is distributed in the hope that it will be useful, | |
10 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 GNU General Public License for more details. | |
13 | |
14 You should have received a copy of the GNU General Public License | |
15 along with this program; if not, write to the Free Software | |
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 */ | |
18 | |
19 /* | |
20 * This implementation is based on an algorithm described in | |
21 * "Aria Nosratinia Embedded Post-Processing for | |
22 * Enhancement of Compressed Images (1999)" | |
23 * (http://citeseer.nj.nec.com/nosratinia99embedded.html) | |
24 */ | |
25 | |
11334 | 26 |
11277 | 27 #include <stdio.h> |
28 #include <stdlib.h> | |
29 #include <string.h> | |
30 #include <inttypes.h> | |
31 #include <math.h> | |
32 | |
17012 | 33 #include "config.h" |
11335 | 34 |
35 #ifdef USE_LIBAVCODEC | |
36 | |
17012 | 37 #include "mp_msg.h" |
38 #include "cpudetect.h" | |
11568
cf76671b3d77
Fix compilation if lavc is compiled as a shared lib. Patch by Panagiotis Issaris <takis@lumumba.luc.ac.be>
alex
parents:
11477
diff
changeset
|
39 |
cf76671b3d77
Fix compilation if lavc is compiled as a shared lib. Patch by Panagiotis Issaris <takis@lumumba.luc.ac.be>
alex
parents:
11477
diff
changeset
|
40 #ifdef USE_LIBAVCODEC_SO |
cf76671b3d77
Fix compilation if lavc is compiled as a shared lib. Patch by Panagiotis Issaris <takis@lumumba.luc.ac.be>
alex
parents:
11477
diff
changeset
|
41 #include <ffmpeg/avcodec.h> |
cf76671b3d77
Fix compilation if lavc is compiled as a shared lib. Patch by Panagiotis Issaris <takis@lumumba.luc.ac.be>
alex
parents:
11477
diff
changeset
|
42 #include <ffmpeg/dsputil.h> |
cf76671b3d77
Fix compilation if lavc is compiled as a shared lib. Patch by Panagiotis Issaris <takis@lumumba.luc.ac.be>
alex
parents:
11477
diff
changeset
|
43 #else |
17012 | 44 #include "libavcodec/avcodec.h" |
45 #include "libavcodec/dsputil.h" | |
11568
cf76671b3d77
Fix compilation if lavc is compiled as a shared lib. Patch by Panagiotis Issaris <takis@lumumba.luc.ac.be>
alex
parents:
11477
diff
changeset
|
46 #endif |
11277 | 47 |
48 #ifdef HAVE_MALLOC_H | |
49 #include <malloc.h> | |
50 #endif | |
51 | |
52 #include "img_format.h" | |
53 #include "mp_image.h" | |
54 #include "vf.h" | |
17012 | 55 #include "libvo/fastmemcpy.h" |
11277 | 56 |
57 #define XMIN(a,b) ((a) < (b) ? (a) : (b)) | |
58 | |
59 //===========================================================================// | |
11477
9785bff83777
memcpy pix instead of black screen if no DR and codec provides no qscale table
michael
parents:
11335
diff
changeset
|
60 static const uint8_t __attribute__((aligned(8))) dither[8][8]={ |
11277 | 61 { 0, 48, 12, 60, 3, 51, 15, 63, }, |
62 { 32, 16, 44, 28, 35, 19, 47, 31, }, | |
63 { 8, 56, 4, 52, 11, 59, 7, 55, }, | |
64 { 40, 24, 36, 20, 43, 27, 39, 23, }, | |
65 { 2, 50, 14, 62, 1, 49, 13, 61, }, | |
66 { 34, 18, 46, 30, 33, 17, 45, 29, }, | |
67 { 10, 58, 6, 54, 9, 57, 5, 53, }, | |
68 { 42, 26, 38, 22, 41, 25, 37, 21, }, | |
69 }; | |
70 | |
11477
9785bff83777
memcpy pix instead of black screen if no DR and codec provides no qscale table
michael
parents:
11335
diff
changeset
|
71 static const uint8_t offset[127][2]= { |
11298 | 72 {0,0}, |
73 {0,0}, {4,4}, | |
74 {0,0}, {2,2}, {6,4}, {4,6}, | |
75 {0,0}, {5,1}, {2,2}, {7,3}, {4,4}, {1,5}, {6,6}, {3,7}, | |
76 | |
77 {0,0}, {4,0}, {1,1}, {5,1}, {3,2}, {7,2}, {2,3}, {6,3}, | |
78 {0,4}, {4,4}, {1,5}, {5,5}, {3,6}, {7,6}, {2,7}, {6,7}, | |
79 | |
80 {0,0}, {0,2}, {0,4}, {0,6}, {1,1}, {1,3}, {1,5}, {1,7}, | |
81 {2,0}, {2,2}, {2,4}, {2,6}, {3,1}, {3,3}, {3,5}, {3,7}, | |
82 {4,0}, {4,2}, {4,4}, {4,6}, {5,1}, {5,3}, {5,5}, {5,7}, | |
83 {6,0}, {6,2}, {6,4}, {6,6}, {7,1}, {7,3}, {7,5}, {7,7}, | |
84 | |
11277 | 85 {0,0}, {4,4}, {0,4}, {4,0}, {2,2}, {6,6}, {2,6}, {6,2}, |
86 {0,2}, {4,6}, {0,6}, {4,2}, {2,0}, {6,4}, {2,4}, {6,0}, | |
87 {1,1}, {5,5}, {1,5}, {5,1}, {3,3}, {7,7}, {3,7}, {7,3}, | |
88 {1,3}, {5,7}, {1,7}, {5,3}, {3,1}, {7,5}, {3,5}, {7,1}, | |
89 {0,1}, {4,5}, {0,5}, {4,1}, {2,3}, {6,7}, {2,7}, {6,3}, | |
90 {0,3}, {4,7}, {0,7}, {4,3}, {2,1}, {6,5}, {2,5}, {6,1}, | |
91 {1,0}, {5,4}, {1,4}, {5,0}, {3,2}, {7,6}, {3,6}, {7,2}, | |
92 {1,2}, {5,6}, {1,6}, {5,2}, {3,0}, {7,4}, {3,4}, {7,0}, | |
93 }; | |
94 | |
95 struct vf_priv_s { | |
96 int log2_count; | |
97 int qp; | |
11993
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
98 int mode; |
11277 | 99 int mpeg2; |
100 int temp_stride; | |
101 uint8_t *src; | |
102 int16_t *temp; | |
103 AVCodecContext *avctx; | |
104 DSPContext dsp; | |
17110
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
105 char *non_b_qp; |
11277 | 106 }; |
107 | |
108 #define SHIFT 22 | |
109 | |
11993
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
110 static void hardthresh_c(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){ |
11277 | 111 int i; |
112 int bias= 0; //FIXME | |
113 unsigned int threshold1, threshold2; | |
11296 | 114 |
11301 | 115 threshold1= qp*((1<<4) - bias) - 1; |
11277 | 116 threshold2= (threshold1<<1); |
117 | |
118 memset(dst, 0, 64*sizeof(DCTELEM)); | |
11296 | 119 dst[0]= (src[0] + 4)>>3; |
11277 | 120 |
11296 | 121 for(i=1; i<64; i++){ |
122 int level= src[i]; | |
11277 | 123 if(((unsigned)(level+threshold1))>threshold2){ |
124 const int j= permutation[i]; | |
11296 | 125 dst[j]= (level + 4)>>3; |
11277 | 126 } |
127 } | |
128 } | |
129 | |
11993
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
130 static void softthresh_c(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){ |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
131 int i; |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
132 int bias= 0; //FIXME |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
133 unsigned int threshold1, threshold2; |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
134 |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
135 threshold1= qp*((1<<4) - bias) - 1; |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
136 threshold2= (threshold1<<1); |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
137 |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
138 memset(dst, 0, 64*sizeof(DCTELEM)); |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
139 dst[0]= (src[0] + 4)>>3; |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
140 |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
141 for(i=1; i<64; i++){ |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
142 int level= src[i]; |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
143 if(((unsigned)(level+threshold1))>threshold2){ |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
144 const int j= permutation[i]; |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
145 if(level>0) |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
146 dst[j]= (level - threshold1 + 4)>>3; |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
147 else |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
148 dst[j]= (level + threshold1 + 4)>>3; |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
149 } |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
150 } |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
151 } |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
152 |
11301 | 153 #ifdef HAVE_MMX |
11993
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
154 static void hardthresh_mmx(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){ |
11301 | 155 int bias= 0; //FIXME |
156 unsigned int threshold1; | |
157 | |
158 threshold1= qp*((1<<4) - bias) - 1; | |
159 | |
160 asm volatile( | |
161 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ | |
162 "movq " #src0 ", %%mm0 \n\t"\ | |
163 "movq " #src1 ", %%mm1 \n\t"\ | |
164 "movq " #src2 ", %%mm2 \n\t"\ | |
165 "movq " #src3 ", %%mm3 \n\t"\ | |
166 "psubw %%mm4, %%mm0 \n\t"\ | |
167 "psubw %%mm4, %%mm1 \n\t"\ | |
168 "psubw %%mm4, %%mm2 \n\t"\ | |
169 "psubw %%mm4, %%mm3 \n\t"\ | |
170 "paddusw %%mm5, %%mm0 \n\t"\ | |
171 "paddusw %%mm5, %%mm1 \n\t"\ | |
172 "paddusw %%mm5, %%mm2 \n\t"\ | |
173 "paddusw %%mm5, %%mm3 \n\t"\ | |
174 "paddw %%mm6, %%mm0 \n\t"\ | |
175 "paddw %%mm6, %%mm1 \n\t"\ | |
176 "paddw %%mm6, %%mm2 \n\t"\ | |
177 "paddw %%mm6, %%mm3 \n\t"\ | |
178 "psubusw %%mm6, %%mm0 \n\t"\ | |
179 "psubusw %%mm6, %%mm1 \n\t"\ | |
180 "psubusw %%mm6, %%mm2 \n\t"\ | |
181 "psubusw %%mm6, %%mm3 \n\t"\ | |
182 "psraw $3, %%mm0 \n\t"\ | |
183 "psraw $3, %%mm1 \n\t"\ | |
184 "psraw $3, %%mm2 \n\t"\ | |
185 "psraw $3, %%mm3 \n\t"\ | |
186 \ | |
187 "movq %%mm0, %%mm7 \n\t"\ | |
188 "punpcklwd %%mm2, %%mm0 \n\t" /*A*/\ | |
189 "punpckhwd %%mm2, %%mm7 \n\t" /*C*/\ | |
190 "movq %%mm1, %%mm2 \n\t"\ | |
191 "punpcklwd %%mm3, %%mm1 \n\t" /*B*/\ | |
192 "punpckhwd %%mm3, %%mm2 \n\t" /*D*/\ | |
193 "movq %%mm0, %%mm3 \n\t"\ | |
194 "punpcklwd %%mm1, %%mm0 \n\t" /*A*/\ | |
195 "punpckhwd %%mm7, %%mm3 \n\t" /*C*/\ | |
13393 | 196 "punpcklwd %%mm2, %%mm7 \n\t" /*B*/\ |
11301 | 197 "punpckhwd %%mm2, %%mm1 \n\t" /*D*/\ |
198 \ | |
199 "movq %%mm0, " #dst0 " \n\t"\ | |
200 "movq %%mm7, " #dst1 " \n\t"\ | |
201 "movq %%mm3, " #dst2 " \n\t"\ | |
202 "movq %%mm1, " #dst3 " \n\t" | |
203 | |
204 "movd %2, %%mm4 \n\t" | |
205 "movd %3, %%mm5 \n\t" | |
206 "movd %4, %%mm6 \n\t" | |
207 "packssdw %%mm4, %%mm4 \n\t" | |
208 "packssdw %%mm5, %%mm5 \n\t" | |
209 "packssdw %%mm6, %%mm6 \n\t" | |
210 "packssdw %%mm4, %%mm4 \n\t" | |
211 "packssdw %%mm5, %%mm5 \n\t" | |
212 "packssdw %%mm6, %%mm6 \n\t" | |
213 REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0)) | |
214 REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0)) | |
215 REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0)) | |
216 REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0)) | |
217 : : "r" (src), "r" (dst), "g" (threshold1+1), "g" (threshold1+5), "g" (threshold1-4) //FIXME maybe more accurate then needed? | |
218 ); | |
219 dst[0]= (src[0] + 4)>>3; | |
220 } | |
11994 | 221 |
222 static void softthresh_mmx(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){ | |
223 int bias= 0; //FIXME | |
224 unsigned int threshold1; | |
225 | |
226 threshold1= qp*((1<<4) - bias) - 1; | |
227 | |
228 asm volatile( | |
229 #undef REQUANT_CORE | |
230 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ | |
231 "movq " #src0 ", %%mm0 \n\t"\ | |
232 "movq " #src1 ", %%mm1 \n\t"\ | |
233 "pxor %%mm6, %%mm6 \n\t"\ | |
234 "pxor %%mm7, %%mm7 \n\t"\ | |
235 "pcmpgtw %%mm0, %%mm6 \n\t"\ | |
236 "pcmpgtw %%mm1, %%mm7 \n\t"\ | |
237 "pxor %%mm6, %%mm0 \n\t"\ | |
238 "pxor %%mm7, %%mm1 \n\t"\ | |
239 "psubusw %%mm4, %%mm0 \n\t"\ | |
240 "psubusw %%mm4, %%mm1 \n\t"\ | |
241 "pxor %%mm6, %%mm0 \n\t"\ | |
242 "pxor %%mm7, %%mm1 \n\t"\ | |
243 "movq " #src2 ", %%mm2 \n\t"\ | |
244 "movq " #src3 ", %%mm3 \n\t"\ | |
245 "pxor %%mm6, %%mm6 \n\t"\ | |
246 "pxor %%mm7, %%mm7 \n\t"\ | |
247 "pcmpgtw %%mm2, %%mm6 \n\t"\ | |
248 "pcmpgtw %%mm3, %%mm7 \n\t"\ | |
249 "pxor %%mm6, %%mm2 \n\t"\ | |
250 "pxor %%mm7, %%mm3 \n\t"\ | |
251 "psubusw %%mm4, %%mm2 \n\t"\ | |
252 "psubusw %%mm4, %%mm3 \n\t"\ | |
253 "pxor %%mm6, %%mm2 \n\t"\ | |
254 "pxor %%mm7, %%mm3 \n\t"\ | |
255 \ | |
256 "paddsw %%mm5, %%mm0 \n\t"\ | |
257 "paddsw %%mm5, %%mm1 \n\t"\ | |
258 "paddsw %%mm5, %%mm2 \n\t"\ | |
259 "paddsw %%mm5, %%mm3 \n\t"\ | |
260 "psraw $3, %%mm0 \n\t"\ | |
261 "psraw $3, %%mm1 \n\t"\ | |
262 "psraw $3, %%mm2 \n\t"\ | |
263 "psraw $3, %%mm3 \n\t"\ | |
264 \ | |
265 "movq %%mm0, %%mm7 \n\t"\ | |
266 "punpcklwd %%mm2, %%mm0 \n\t" /*A*/\ | |
267 "punpckhwd %%mm2, %%mm7 \n\t" /*C*/\ | |
268 "movq %%mm1, %%mm2 \n\t"\ | |
269 "punpcklwd %%mm3, %%mm1 \n\t" /*B*/\ | |
270 "punpckhwd %%mm3, %%mm2 \n\t" /*D*/\ | |
271 "movq %%mm0, %%mm3 \n\t"\ | |
272 "punpcklwd %%mm1, %%mm0 \n\t" /*A*/\ | |
273 "punpckhwd %%mm7, %%mm3 \n\t" /*C*/\ | |
13393 | 274 "punpcklwd %%mm2, %%mm7 \n\t" /*B*/\ |
11994 | 275 "punpckhwd %%mm2, %%mm1 \n\t" /*D*/\ |
276 \ | |
277 "movq %%mm0, " #dst0 " \n\t"\ | |
278 "movq %%mm7, " #dst1 " \n\t"\ | |
279 "movq %%mm3, " #dst2 " \n\t"\ | |
280 "movq %%mm1, " #dst3 " \n\t" | |
281 | |
282 "movd %2, %%mm4 \n\t" | |
283 "movd %3, %%mm5 \n\t" | |
284 "packssdw %%mm4, %%mm4 \n\t" | |
285 "packssdw %%mm5, %%mm5 \n\t" | |
286 "packssdw %%mm4, %%mm4 \n\t" | |
287 "packssdw %%mm5, %%mm5 \n\t" | |
288 REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0)) | |
289 REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0)) | |
290 REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0)) | |
291 REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0)) | |
292 : : "r" (src), "r" (dst), "g" (threshold1), "rm" (4) //FIXME maybe more accurate then needed? | |
293 ); | |
294 | |
295 dst[0]= (src[0] + 4)>>3; | |
296 } | |
11301 | 297 #endif |
298 | |
11277 | 299 static inline void add_block(int16_t *dst, int stride, DCTELEM block[64]){ |
11280 | 300 int y; |
11277 | 301 |
302 for(y=0; y<8; y++){ | |
11280 | 303 *(uint32_t*)&dst[0 + y*stride]+= *(uint32_t*)&block[0 + y*8]; |
304 *(uint32_t*)&dst[2 + y*stride]+= *(uint32_t*)&block[2 + y*8]; | |
305 *(uint32_t*)&dst[4 + y*stride]+= *(uint32_t*)&block[4 + y*8]; | |
306 *(uint32_t*)&dst[6 + y*stride]+= *(uint32_t*)&block[6 + y*8]; | |
11277 | 307 } |
308 } | |
309 | |
11305 | 310 static void store_slice_c(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale){ |
11299 | 311 int y, x; |
312 | |
313 #define STORE(pos) \ | |
314 temp= ((src[x + y*src_stride + pos]<<log2_scale) + d[pos])>>6;\ | |
315 if(temp & 0x100) temp= ~(temp>>31);\ | |
316 dst[x + y*dst_stride + pos]= temp; | |
317 | |
11305 | 318 for(y=0; y<height; y++){ |
12157 | 319 const uint8_t *d= dither[y]; |
11299 | 320 for(x=0; x<width; x+=8){ |
321 int temp; | |
322 STORE(0); | |
323 STORE(1); | |
324 STORE(2); | |
325 STORE(3); | |
326 STORE(4); | |
327 STORE(5); | |
328 STORE(6); | |
329 STORE(7); | |
330 } | |
331 } | |
332 } | |
333 | |
334 #ifdef HAVE_MMX | |
11305 | 335 static void store_slice_mmx(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale){ |
11299 | 336 int y; |
337 | |
11305 | 338 for(y=0; y<height; y++){ |
11299 | 339 uint8_t *dst1= dst; |
340 int16_t *src1= src; | |
341 asm volatile( | |
342 "movq (%3), %%mm3 \n\t" | |
343 "movq (%3), %%mm4 \n\t" | |
344 "movd %4, %%mm2 \n\t" | |
345 "pxor %%mm0, %%mm0 \n\t" | |
346 "punpcklbw %%mm0, %%mm3 \n\t" | |
347 "punpckhbw %%mm0, %%mm4 \n\t" | |
348 "psraw %%mm2, %%mm3 \n\t" | |
349 "psraw %%mm2, %%mm4 \n\t" | |
350 "movd %5, %%mm2 \n\t" | |
351 "1: \n\t" | |
352 "movq (%0), %%mm0 \n\t" | |
353 "movq 8(%0), %%mm1 \n\t" | |
354 "paddw %%mm3, %%mm0 \n\t" | |
355 "paddw %%mm4, %%mm1 \n\t" | |
356 "psraw %%mm2, %%mm0 \n\t" | |
357 "psraw %%mm2, %%mm1 \n\t" | |
358 "packuswb %%mm1, %%mm0 \n\t" | |
359 "movq %%mm0, (%1) \n\t" | |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
13393
diff
changeset
|
360 "add $16, %0 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
13393
diff
changeset
|
361 "add $8, %1 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
13393
diff
changeset
|
362 "cmp %2, %1 \n\t" |
11299 | 363 " jb 1b \n\t" |
364 : "+r" (src1), "+r"(dst1) | |
365 : "r"(dst + width), "r"(dither[y]), "g"(log2_scale), "g"(6-log2_scale) | |
366 ); | |
367 src += src_stride; | |
368 dst += dst_stride; | |
369 } | |
370 // if(width != mmxw) | |
371 // store_slice_c(dst + mmxw, src + mmxw, dst_stride, src_stride, width - mmxw, log2_scale); | |
372 } | |
373 #endif | |
374 | |
11305 | 375 static void (*store_slice)(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale)= store_slice_c; |
11299 | 376 |
11993
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
377 static void (*requantize)(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation)= hardthresh_c; |
11301 | 378 |
11277 | 379 static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, uint8_t *qp_store, int qp_stride, int is_luma){ |
380 int x, y, i; | |
381 const int count= 1<<p->log2_count; | |
11307 | 382 const int stride= is_luma ? p->temp_stride : ((width+16+15)&(~15)); |
17082 | 383 uint64_t __attribute__((aligned(16))) block_align[32]; |
11277 | 384 DCTELEM *block = (DCTELEM *)block_align; |
385 DCTELEM *block2= (DCTELEM *)(block_align+16); | |
11301 | 386 |
13729 | 387 if (!src || !dst) return; // HACK avoid crash for Y8 colourspace |
11277 | 388 for(y=0; y<height; y++){ |
11299 | 389 int index= 8 + 8*stride + y*stride; |
390 memcpy(p->src + index, src + y*src_stride, width); | |
11277 | 391 for(x=0; x<8; x++){ |
392 p->src[index - x - 1]= p->src[index + x ]; | |
393 p->src[index + width + x ]= p->src[index + width - x - 1]; | |
394 } | |
395 } | |
396 for(y=0; y<8; y++){ | |
397 memcpy(p->src + ( 7-y)*stride, p->src + ( y+8)*stride, stride); | |
11281 | 398 memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride); |
11277 | 399 } |
400 //FIXME (try edge emu) | |
401 | |
402 for(y=0; y<height+8; y+=8){ | |
11299 | 403 memset(p->temp + (8+y)*stride, 0, 8*stride*sizeof(int16_t)); |
11277 | 404 for(x=0; x<width+8; x+=8){ |
405 const int qps= 3 + is_luma; | |
406 int qp; | |
407 | |
408 if(p->qp) | |
409 qp= p->qp; | |
410 else{ | |
411 qp= qp_store[ (XMIN(x, width-1)>>qps) + (XMIN(y, height-1)>>qps) * qp_stride]; | |
412 if(p->mpeg2) qp>>=1; | |
413 } | |
414 for(i=0; i<count; i++){ | |
11298 | 415 const int x1= x + offset[i+count-1][0]; |
416 const int y1= y + offset[i+count-1][1]; | |
11277 | 417 const int index= x1 + y1*stride; |
418 p->dsp.get_pixels(block, p->src + index, stride); | |
419 p->dsp.fdct(block); | |
420 requantize(block2, block, qp, p->dsp.idct_permutation); | |
421 p->dsp.idct(block2); | |
422 add_block(p->temp + index, stride, block2); | |
423 } | |
424 } | |
11299 | 425 if(y) |
11305 | 426 store_slice(dst + (y-8)*dst_stride, p->temp + 8 + y*stride, dst_stride, stride, width, XMIN(8, height+8-y), 6-p->log2_count); |
11277 | 427 } |
11295 | 428 #if 0 |
429 for(y=0; y<height; y++){ | |
430 for(x=0; x<width; x++){ | |
431 if((((x>>6) ^ (y>>6)) & 1) == 0) | |
432 dst[x + y*dst_stride]= p->src[8 + 8*stride + x + y*stride]; | |
433 if((x&63) == 0 || (y&63)==0) | |
434 dst[x + y*dst_stride] += 128; | |
435 } | |
436 } | |
437 #endif | |
11277 | 438 //FIXME reorder for better caching |
439 } | |
440 | |
441 static int config(struct vf_instance_s* vf, | |
442 int width, int height, int d_width, int d_height, | |
443 unsigned int flags, unsigned int outfmt){ | |
11305 | 444 int h= (height+16+15)&(~15); |
11277 | 445 |
446 vf->priv->temp_stride= (width+16+15)&(~15); | |
11305 | 447 vf->priv->temp= malloc(vf->priv->temp_stride*h*sizeof(int16_t)); |
448 vf->priv->src = malloc(vf->priv->temp_stride*h*sizeof(uint8_t)); | |
11277 | 449 |
450 return vf_next_config(vf,width,height,d_width,d_height,flags,outfmt); | |
451 } | |
452 | |
453 static void get_image(struct vf_instance_s* vf, mp_image_t *mpi){ | |
454 if(mpi->flags&MP_IMGFLAG_PRESERVE) return; // don't change | |
455 // ok, we can do pp in-place (or pp disabled): | |
456 vf->dmpi=vf_get_image(vf->next,mpi->imgfmt, | |
16018
bdf1b4ecb906
use stored dimensions instead of visible one when (vf_)get_image is called
iive
parents:
15965
diff
changeset
|
457 mpi->type, mpi->flags | MP_IMGFLAG_READABLE, mpi->width, mpi->height); |
11277 | 458 mpi->planes[0]=vf->dmpi->planes[0]; |
459 mpi->stride[0]=vf->dmpi->stride[0]; | |
460 mpi->width=vf->dmpi->width; | |
461 if(mpi->flags&MP_IMGFLAG_PLANAR){ | |
462 mpi->planes[1]=vf->dmpi->planes[1]; | |
463 mpi->planes[2]=vf->dmpi->planes[2]; | |
464 mpi->stride[1]=vf->dmpi->stride[1]; | |
465 mpi->stride[2]=vf->dmpi->stride[2]; | |
466 } | |
467 mpi->flags|=MP_IMGFLAG_DIRECT; | |
468 } | |
469 | |
470 static int put_image(struct vf_instance_s* vf, mp_image_t *mpi){ | |
471 mp_image_t *dmpi; | |
472 | |
473 if(!(mpi->flags&MP_IMGFLAG_DIRECT)){ | |
474 // no DR, so get a new image! hope we'll get DR buffer: | |
14022
803be9a78854
removing strange csp matching code (was copy&pasted from vf_pp where it originated from arpi 2.5 years ago) -> fixes spp+scale+x11 crash
michael
parents:
13729
diff
changeset
|
475 dmpi=vf_get_image(vf->next,mpi->imgfmt, |
12631
6cbccc0c7d7b
Fix memory corruption, noticable at reallocate image
iive
parents:
12163
diff
changeset
|
476 MP_IMGTYPE_TEMP, |
6cbccc0c7d7b
Fix memory corruption, noticable at reallocate image
iive
parents:
12163
diff
changeset
|
477 MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE, |
16018
bdf1b4ecb906
use stored dimensions instead of visible one when (vf_)get_image is called
iive
parents:
15965
diff
changeset
|
478 mpi->width,mpi->height); |
12631
6cbccc0c7d7b
Fix memory corruption, noticable at reallocate image
iive
parents:
12163
diff
changeset
|
479 vf_clone_mpi_attributes(dmpi, mpi); |
6cbccc0c7d7b
Fix memory corruption, noticable at reallocate image
iive
parents:
12163
diff
changeset
|
480 }else{ |
13149 | 481 dmpi=vf->dmpi; |
12631
6cbccc0c7d7b
Fix memory corruption, noticable at reallocate image
iive
parents:
12163
diff
changeset
|
482 } |
11277 | 483 |
484 vf->priv->mpeg2= mpi->qscale_type; | |
17110
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
485 if(mpi->pict_type != 3 && mpi->qscale && !vf->priv->qp){ |
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
486 if(!vf->priv->non_b_qp) |
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
487 vf->priv->non_b_qp= malloc(mpi->qstride * mpi->h); |
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
488 memcpy(vf->priv->non_b_qp, mpi->qscale, mpi->qstride * mpi->h); |
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
489 } |
11307 | 490 if(vf->priv->log2_count || !(mpi->flags&MP_IMGFLAG_DIRECT)){ |
17110
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
491 char *qp_tab= vf->priv->non_b_qp; |
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
492 if((vf->priv->mode&4) || !qp_tab) |
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
493 qp_tab= mpi->qscale; |
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
494 |
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
495 if(qp_tab || vf->priv->qp){ |
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
496 filter(vf->priv, dmpi->planes[0], mpi->planes[0], dmpi->stride[0], mpi->stride[0], mpi->w, mpi->h, qp_tab, mpi->qstride, 1); |
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
497 filter(vf->priv, dmpi->planes[1], mpi->planes[1], dmpi->stride[1], mpi->stride[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0); |
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
498 filter(vf->priv, dmpi->planes[2], mpi->planes[2], dmpi->stride[2], mpi->stride[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0); |
11477
9785bff83777
memcpy pix instead of black screen if no DR and codec provides no qscale table
michael
parents:
11335
diff
changeset
|
499 }else{ |
9785bff83777
memcpy pix instead of black screen if no DR and codec provides no qscale table
michael
parents:
11335
diff
changeset
|
500 memcpy_pic(dmpi->planes[0], mpi->planes[0], mpi->w, mpi->h, dmpi->stride[0], mpi->stride[0]); |
9785bff83777
memcpy pix instead of black screen if no DR and codec provides no qscale table
michael
parents:
11335
diff
changeset
|
501 memcpy_pic(dmpi->planes[1], mpi->planes[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[1], mpi->stride[1]); |
9785bff83777
memcpy pix instead of black screen if no DR and codec provides no qscale table
michael
parents:
11335
diff
changeset
|
502 memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[2], mpi->stride[2]); |
11308
0c8d12a58a29
skip filter if codec doesnt provide the QP array and user didnt force a QP (fixes diegos segfault)
michael
parents:
11307
diff
changeset
|
503 } |
11307 | 504 } |
11277 | 505 |
506 #ifdef HAVE_MMX | |
507 if(gCpuCaps.hasMMX) asm volatile ("emms\n\t"); | |
508 #endif | |
509 #ifdef HAVE_MMX2 | |
510 if(gCpuCaps.hasMMX2) asm volatile ("sfence\n\t"); | |
511 #endif | |
512 | |
513 return vf_next_put_image(vf,dmpi); | |
514 } | |
515 | |
516 static void uninit(struct vf_instance_s* vf){ | |
517 if(!vf->priv) return; | |
518 | |
519 if(vf->priv->temp) free(vf->priv->temp); | |
520 vf->priv->temp= NULL; | |
521 if(vf->priv->src) free(vf->priv->src); | |
522 vf->priv->src= NULL; | |
523 if(vf->priv->avctx) free(vf->priv->avctx); | |
524 vf->priv->avctx= NULL; | |
17110
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
525 if(vf->priv->non_b_qp) free(vf->priv->non_b_qp); |
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
526 vf->priv->non_b_qp= NULL; |
11277 | 527 |
528 free(vf->priv); | |
529 vf->priv=NULL; | |
530 } | |
531 | |
532 //===========================================================================// | |
533 static int query_format(struct vf_instance_s* vf, unsigned int fmt){ | |
11307 | 534 switch(fmt){ |
535 case IMGFMT_YVU9: | |
536 case IMGFMT_IF09: | |
11277 | 537 case IMGFMT_YV12: |
538 case IMGFMT_I420: | |
539 case IMGFMT_IYUV: | |
11307 | 540 case IMGFMT_CLPL: |
541 case IMGFMT_Y800: | |
542 case IMGFMT_Y8: | |
543 case IMGFMT_444P: | |
544 case IMGFMT_422P: | |
545 case IMGFMT_411P: | |
546 return vf_next_query_format(vf,fmt); | |
547 } | |
548 return 0; | |
11277 | 549 } |
550 | |
551 static unsigned int fmt_list[]={ | |
11307 | 552 IMGFMT_YVU9, |
553 IMGFMT_IF09, | |
554 IMGFMT_YV12, | |
555 IMGFMT_I420, | |
556 IMGFMT_IYUV, | |
557 IMGFMT_CLPL, | |
558 IMGFMT_Y800, | |
559 IMGFMT_Y8, | |
560 IMGFMT_444P, | |
561 IMGFMT_422P, | |
562 IMGFMT_411P, | |
563 0 | |
11277 | 564 }; |
565 | |
11307 | 566 static int control(struct vf_instance_s* vf, int request, void* data){ |
567 switch(request){ | |
568 case VFCTRL_QUERY_MAX_PP_LEVEL: | |
569 return 6; | |
570 case VFCTRL_SET_PP_LEVEL: | |
571 vf->priv->log2_count= *((unsigned int*)data); | |
572 return CONTROL_TRUE; | |
573 } | |
574 return vf_next_control(vf,request,data); | |
575 } | |
576 | |
11277 | 577 static int open(vf_instance_t *vf, char* args){ |
12157 | 578 |
14022
803be9a78854
removing strange csp matching code (was copy&pasted from vf_pp where it originated from arpi 2.5 years ago) -> fixes spp+scale+x11 crash
michael
parents:
13729
diff
changeset
|
579 int log2c=-1; |
12157 | 580 |
11277 | 581 vf->config=config; |
582 vf->put_image=put_image; | |
583 vf->get_image=get_image; | |
584 vf->query_format=query_format; | |
585 vf->uninit=uninit; | |
11307 | 586 vf->control= control; |
11277 | 587 vf->priv=malloc(sizeof(struct vf_priv_s)); |
588 memset(vf->priv, 0, sizeof(struct vf_priv_s)); | |
11280 | 589 |
590 avcodec_init(); | |
591 | |
11277 | 592 vf->priv->avctx= avcodec_alloc_context(); |
593 dsputil_init(&vf->priv->dsp, vf->priv->avctx); | |
11993
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
594 |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
595 vf->priv->log2_count= 3; |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
596 |
12157 | 597 if (args) sscanf(args, "%d:%d:%d", &log2c, &vf->priv->qp, &vf->priv->mode); |
598 | |
599 if( log2c >=0 && log2c <=6 ) | |
600 vf->priv->log2_count = log2c; | |
601 | |
602 if(vf->priv->qp < 0) | |
603 vf->priv->qp = 0; | |
604 | |
17110
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
605 switch(vf->priv->mode&3){ |
12157 | 606 default: |
11993
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
607 case 0: requantize= hardthresh_c; break; |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
608 case 1: requantize= softthresh_c; break; |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
609 } |
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
610 |
11304 | 611 #ifdef HAVE_MMX |
11299 | 612 if(gCpuCaps.hasMMX){ |
613 store_slice= store_slice_mmx; | |
17110
ba9bf9beb23f
prevent flicker, to get old behaviour use spp=x:y:4 / x:y:5
michael
parents:
17082
diff
changeset
|
614 switch(vf->priv->mode&3){ |
11993
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
615 case 0: requantize= hardthresh_mmx; break; |
11994 | 616 case 1: requantize= softthresh_mmx; break; |
11993
4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
michael
parents:
11568
diff
changeset
|
617 } |
11299 | 618 } |
11304 | 619 #endif |
11299 | 620 |
11277 | 621 return 1; |
622 } | |
623 | |
624 vf_info_t vf_info_spp = { | |
625 "simple postprocess", | |
626 "spp", | |
627 "Michael Niedermayer", | |
628 "", | |
629 open, | |
630 NULL | |
631 }; | |
632 | |
11335 | 633 #endif //USE_LIBAVCODEC |