Mercurial > mplayer.hg
annotate libmpcodecs/vf_ass.c @ 36835:6bc38224c74a
Fix bug with Win32 GUI preferences.
The combobox entry must not be used as the priority option's name.
Translate between the text presented to the user and the internal
name to be used.
author | ib |
---|---|
date | Mon, 24 Feb 2014 15:20:34 +0000 |
parents | b4ce15212bfc |
children |
rev | line source |
---|---|
20008
fa122b7c71c6
Add copyright notice and vim/emacs comments to libass and vf_ass.c.
eugeni
parents:
19563
diff
changeset
|
1 /* |
26727 | 2 * Copyright (C) 2006 Evgeniy Stepanov <eugeni.stepanov@gmail.com> |
35244 | 3 * Copyright (C) 2012 Xidorn Quan <quanxunzhen@gmail.com> |
26727 | 4 * |
5 * This file is part of MPlayer. | |
6 * | |
7 * MPlayer is free software; you can redistribute it and/or modify | |
8 * it under the terms of the GNU General Public License as published by | |
9 * the Free Software Foundation; either version 2 of the License, or | |
10 * (at your option) any later version. | |
11 * | |
12 * MPlayer is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 * GNU General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU General Public License along | |
18 * with MPlayer; if not, write to the Free Software Foundation, Inc., | |
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
20 */ | |
20008
fa122b7c71c6
Add copyright notice and vim/emacs comments to libass and vf_ass.c.
eugeni
parents:
19563
diff
changeset
|
21 |
18937 | 22 #include "config.h" |
23 | |
24 #include <stdio.h> | |
25 #include <stdlib.h> | |
26 #include <string.h> | |
24545
9e5126679d44
Replace stdint.h #include by functionally equivalent inttypes.h.
diego
parents:
23134
diff
changeset
|
27 #include <inttypes.h> |
18937 | 28 #include <assert.h> |
29 | |
30 #include "config.h" | |
31 #include "mp_msg.h" | |
32 #include "help_mp.h" | |
31489 | 33 #include "mpcommon.h" |
18937 | 34 #include "img_format.h" |
35 #include "mp_image.h" | |
30653
3d23e24c5c60
Declare externally used variables from vd.c as extern in vd.h.
diego
parents:
30642
diff
changeset
|
36 #include "vd.h" |
18937 | 37 #include "vf.h" |
38 | |
39 #include "libvo/fastmemcpy.h" | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
40 #include "libavutil/intreadwrite.h" |
32467 | 41 #include "sub/sub.h" |
18937 | 42 #include "m_option.h" |
43 #include "m_struct.h" | |
44 | |
32461 | 45 #include "sub/ass_mp.h" |
32460 | 46 #include "sub/eosd.h" |
18937 | 47 |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
48 #include "cpudetect.h" |
35705
b4ce15212bfc
Replace obsolete x86_cpu.h #includes by the correct header.
diego
parents:
35618
diff
changeset
|
49 #include "libavutil/x86/asm.h" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
50 |
18937 | 51 #define _r(c) ((c)>>24) |
52 #define _g(c) (((c)>>16)&0xFF) | |
53 #define _b(c) (((c)>>8)&0xFF) | |
54 #define _a(c) ((c)&0xFF) | |
32096 | 55 #define rgba2y(c) ( (( 263*_r(c) + 516*_g(c) + 100*_b(c)) >> 10) + 16 ) |
18937 | 56 #define rgba2u(c) ( ((-152*_r(c) - 298*_g(c) + 450*_b(c)) >> 10) + 128 ) |
57 #define rgba2v(c) ( (( 450*_r(c) - 376*_g(c) - 73*_b(c)) >> 10) + 128 ) | |
58 | |
35244 | 59 /* map 0 - 0xFF -> 0 - 0x101 */ |
60 #define MAP_16BIT(v) RSHIFT(0x102 * (v), 8) | |
61 /* map 0 - 0xFF -> 0 - 0x10101 */ | |
62 #define MAP_24BIT(v) RSHIFT(0x10203 * (v), 8) | |
18937 | 63 |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
64 #if HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
65 |
35582 | 66 DECLARE_ASM_CONST(16, uint32_t, sse_int32_80h[4]) |
67 = { 0x80, 0x80, 0x80, 0x80 }; | |
68 DECLARE_ASM_CONST(16, uint32_t, sse_int32_map_factor[4]) | |
69 = { 0x102, 0x102, 0x102, 0x102 }; | |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
70 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
71 #endif // HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
72 |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
73 static const struct vf_priv_s { |
32096 | 74 int outh, outw; |
18937 | 75 |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
76 int is_planar; |
32096 | 77 unsigned int outfmt; |
18937 | 78 |
32096 | 79 // 1 = auto-added filter: insert only if chain does not support EOSD already |
80 // 0 = insert always | |
81 int auto_insert; | |
18937 | 82 |
35244 | 83 // planar data to be directly rendered on frames |
84 uint8_t *planes[MP_MAX_PLANES]; | |
85 // alpha here is actually transparency, not opacity | |
86 uint8_t *alphas[MP_MAX_PLANES]; | |
87 struct dirty_rows_extent { | |
88 int xmin, xmax; | |
89 } *dirty_rows; | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
90 |
35244 | 91 // called for every eosd image when subtitle is changed |
92 void (*draw_image)(vf_instance_t *, struct mp_eosd_image *); | |
93 // called for every time subtitle is changed | |
94 void (*prepare_buffer)(vf_instance_t *); | |
95 // called for every frame | |
96 void (*render_frame)(vf_instance_t *); | |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
97 } vf_priv_dflt; |
18937 | 98 |
35244 | 99 static void draw_image_yuv(vf_instance_t *vf, struct mp_eosd_image *img) |
100 { | |
101 uint32_t color = img->color; | |
102 uint32_t opacity = 0xFF - _a(color); | |
103 uint8_t y = rgba2y(color), | |
104 u = rgba2u(color), | |
105 v = rgba2v(color); | |
35268 | 106 int outw = vf->priv->outw; |
35244 | 107 uint8_t *alpha = vf->priv->alphas[0], |
108 *dst_y = vf->priv->planes[0], | |
109 *dst_u = vf->priv->planes[1], | |
110 *dst_v = vf->priv->planes[2]; | |
111 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
112 int src_x = img->dst_x, src_w = img->w, | |
113 src_y = img->dst_y, src_h = img->h, | |
114 stride = img->stride; | |
115 uint8_t *src = img->bitmap; | |
116 int i, j; | |
117 | |
118 opacity = MAP_24BIT(opacity); | |
119 for (i = 0; i < src_h; i++) { | |
120 struct dirty_rows_extent *dirty_row = &dirty_rows[src_y + i]; | |
121 dirty_row->xmin = FFMIN(dirty_row->xmin, src_x); | |
122 dirty_row->xmax = FFMAX(dirty_row->xmax, src_x + src_w); | |
123 | |
124 for (j = 0; j < src_w; j++) { | |
125 uint32_t k = src[i * stride + j]; | |
126 if (k) { | |
127 size_t p = (src_y + i) * outw + src_x + j; | |
128 k *= opacity; | |
129 alpha[p] = RSHIFT((0xFFFFFF - k) * alpha[p], 24); | |
130 dst_y[p] = RSHIFT((0xFFFFFF - k) * dst_y[p] + k * y, 24); | |
131 dst_u[p] = RSHIFT((0xFFFFFF - k) * dst_u[p] + k * u, 24); | |
132 dst_v[p] = RSHIFT((0xFFFFFF - k) * dst_v[p] + k * v, 24); | |
133 } | |
134 } | |
135 } | |
136 } | |
137 | |
138 static void prepare_buffer_422(vf_instance_t *vf) | |
139 { | |
140 uint8_t *dst_u = vf->priv->planes[1], | |
141 *dst_v = vf->priv->planes[2]; | |
142 int outw = vf->priv->outw, | |
143 outh = vf->priv->outh; | |
144 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
145 int i, j; | |
146 | |
147 for (i = 0; i < outh; i++) { | |
148 int xmin = dirty_rows[i].xmin & ~1, | |
149 xmax = dirty_rows[i].xmax; | |
150 for (j = xmin; j < xmax; j += 2) { | |
151 size_t p = i * outw + j; | |
152 dst_u[p] = (dst_u[p] + dst_u[p + 1]) / 2; | |
153 dst_v[p] = (dst_v[p] + dst_v[p + 1]) / 2; | |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
154 dst_u[p + 1] = dst_v[p + 1] = 0; |
35244 | 155 } |
156 } | |
157 } | |
158 | |
159 static void render_frame_yuv422(vf_instance_t *vf) | |
160 { | |
161 uint8_t *alpha = vf->priv->alphas[0]; | |
162 uint8_t *src_y = vf->priv->planes[0], | |
163 *src_u = vf->priv->planes[1], | |
164 *src_v = vf->priv->planes[2]; | |
165 int outw = vf->priv->outw, | |
166 outh = vf->priv->outh; | |
167 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
168 uint8_t *dest = vf->dmpi->planes[0]; | |
169 int stride = vf->dmpi->stride[0]; | |
170 int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY; | |
171 int i, j; | |
172 | |
173 for (i = 0; i < outh; i++) { | |
174 int xmin = dirty_rows[i].xmin & ~1, | |
175 xmax = dirty_rows[i].xmax; | |
176 for (j = xmin; j < xmax; j += 2) { | |
177 size_t src = i * outw + j, | |
178 dst = i * stride + j * 2; | |
179 uint_fast16_t a0 = alpha[src], | |
180 a1 = alpha[src + 1]; | |
181 uint8_t y0, y1, u, v; | |
182 | |
183 if (a0 == 0xFF && a1 == 0xFF) | |
184 continue; | |
185 | |
186 y0 = dest[dst + is_uyvy + 0]; | |
187 y1 = dest[dst + is_uyvy + 2]; | |
188 u = dest[dst - is_uyvy + 1]; | |
189 v = dest[dst - is_uyvy + 3]; | |
190 | |
191 a0 = MAP_16BIT(a0); | |
192 a1 = MAP_16BIT(a1); | |
193 y0 = ((a0 * y0) >> 8) + src_y[src]; | |
194 y1 = ((a1 * y1) >> 8) + src_y[src + 1]; | |
195 | |
196 a0 = (a0 + a1) / 2; | |
197 u = ((a0 * u) >> 8) + src_u[src]; | |
198 v = ((a0 * v) >> 8) + src_v[src]; | |
199 | |
200 dest[dst + is_uyvy + 0] = y0; | |
201 dest[dst + is_uyvy + 2] = y1; | |
202 dest[dst - is_uyvy + 1] = u; | |
203 dest[dst - is_uyvy + 3] = v; | |
204 } | |
205 } | |
206 } | |
207 | |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
208 #if HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
209 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
210 static void render_frame_yuv422_sse4(vf_instance_t *vf) |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
211 { |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
212 uint8_t *alpha = vf->priv->alphas[0]; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
213 uint8_t *src_y = vf->priv->planes[0], |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
214 *src_u = vf->priv->planes[1], |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
215 *src_v = vf->priv->planes[2]; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
216 int outw = vf->priv->outw, |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
217 outh = vf->priv->outh; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
218 struct dirty_rows_extent *dr = vf->priv->dirty_rows; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
219 uint8_t *dst = vf->dmpi->planes[0]; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
220 int stride = vf->dmpi->stride[0]; |
35579
6b169870ae30
Reduce register usage to fix the compilation in x86.
upsuper
parents:
35576
diff
changeset
|
221 int32_t is_uyvy = vf->priv->outfmt == IMGFMT_UYVY; |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
222 int i; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
223 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
224 for (i = 0; i < outh; i++) { |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
225 size_t xmin = dr[i].xmin & ~7, |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
226 xmax = dr[i].xmax; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
227 __asm__ volatile ( |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
228 "pxor %%xmm7, %%xmm7 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
229 "jmp 4f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
230 "1: \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
231 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
232 "cmpl $-1, 0(%[alpha], %[j], 1) \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
233 "jne 2f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
234 "cmpl $-1, 4(%[alpha], %[j], 1) \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
235 "jne 2f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
236 "jmp 3f \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
237 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
238 "2: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
239 "movq (%[alpha], %[j], 1), %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
240 "punpcklbw %%xmm7, %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
241 "movdqa %%xmm0, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
242 "punpcklwd %%xmm7, %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
243 "punpckhwd %%xmm7, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
244 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
245 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
246 "paddd "MANGLE(sse_int32_80h)", %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
247 "paddd "MANGLE(sse_int32_80h)", %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
248 "psrld $8, %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
249 "psrld $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
250 "movdqa %%xmm0, %%xmm2 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
251 "movdqa %%xmm1, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
252 "packssdw %%xmm1, %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
253 "phaddd %%xmm3, %%xmm2 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
254 "psrld $1, %%xmm2 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
255 "packssdw %%xmm7, %%xmm2 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
256 "punpcklwd %%xmm2, %%xmm2 \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
257 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
258 "movdqu (%[dst], %[j], 2), %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
259 "movdqa %%xmm1, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
260 "cmpl $0, %[f] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
261 "je 11f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
262 "psrlw $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
263 "psllw $8, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
264 "psrlw $8, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
265 "jmp 12f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
266 "11: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
267 "psllw $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
268 "psrlw $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
269 "psrlw $8, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
270 "12: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
271 "pmullw %%xmm0, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
272 "pmullw %%xmm2, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
273 "psrlw $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
274 "psrlw $8, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
275 "packuswb %%xmm7, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
276 "packuswb %%xmm7, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
277 "mov %[src_y], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
278 "movq (%%"REG_S", %[j], 1), %%xmm4 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
279 "mov %[src_u], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
280 "movq (%%"REG_S", %[j], 1), %%xmm5 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
281 "mov %[src_v], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
282 "movq (%%"REG_S", %[j], 1), %%xmm6 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
283 "packuswb %%xmm7, %%xmm5 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
284 "packuswb %%xmm7, %%xmm6 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
285 "punpcklbw %%xmm6, %%xmm5 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
286 "cmpl $0, %[f] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
287 "je 21f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
288 "punpcklbw %%xmm1, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
289 "punpcklbw %%xmm4, %%xmm5 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
290 "paddb %%xmm5, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
291 "movdqu %%xmm3, (%[dst], %[j], 2) \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
292 "jmp 22f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
293 "21: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
294 "punpcklbw %%xmm3, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
295 "punpcklbw %%xmm5, %%xmm4 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
296 "paddb %%xmm4, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
297 "movdqu %%xmm1, (%[dst], %[j], 2) \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
298 "22: \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
299 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
300 "3: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
301 "add $8, %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
302 "4: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
303 "cmp %[xmax], %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
304 "jl 1b \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
305 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
306 : : [dst] "r" (dst + i * stride), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
307 [alpha] "r" (alpha + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
308 [src_y] "g" (src_y + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
309 [src_u] "g" (src_u + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
310 [src_v] "g" (src_v + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
311 [j] "r" (xmin), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
312 [xmax] "g" (xmax), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
313 [f] "g" (is_uyvy) |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
314 : REG_S |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
315 ); |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
316 } |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
317 } |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
318 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
319 #endif // HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
320 |
35244 | 321 static void prepare_buffer_420p(vf_instance_t *vf) |
322 { | |
323 int outw = vf->priv->outw, | |
324 outh = vf->priv->outh; | |
325 uint8_t *dst_u = vf->priv->planes[1], | |
326 *dst_v = vf->priv->planes[2]; | |
327 uint8_t *src_a = vf->priv->alphas[0], | |
328 *dst_a = vf->priv->alphas[1]; | |
329 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
330 int i, j; | |
331 | |
332 for (i = 0; i < outh; i += 2) { | |
333 int xmin = FFMIN(dirty_rows[i].xmin, dirty_rows[i + 1].xmin) & ~1, | |
334 xmax = FFMAX(dirty_rows[i].xmax, dirty_rows[i + 1].xmax); | |
335 for (j = xmin; j < xmax; j += 2) { | |
35618 | 336 size_t p = i * outw / 2 + j / 2, |
35244 | 337 q1 = i * outw + j, |
338 q2 = q1 + outw; | |
339 dst_a[p] = (src_a[q1] + src_a[q1 + 1] + | |
340 src_a[q2] + src_a[q2 + 1] + 2) / 4; | |
341 dst_u[p] = (dst_u[q1] + dst_u[q1 + 1] + | |
342 dst_u[q2] + dst_u[q2 + 1] + 2) / 4; | |
343 dst_v[p] = (dst_v[q1] + dst_v[q1 + 1] + | |
344 dst_v[q2] + dst_v[q2 + 1] + 2) / 4; | |
345 } | |
346 } | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
347 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
348 #if HAVE_SSE4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
349 // for render_frame_yuv420p_sse4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
350 if (gCpuCaps.hasSSE4 && outw % 32 == 0) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
351 for (i = 0; i < outh; i += 2) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
352 int xmin = FFMIN(dirty_rows[i].xmin, dirty_rows[i + 1].xmin) & ~1, |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
353 xmax = FFMAX(dirty_rows[i].xmax, dirty_rows[i + 1].xmax); |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
354 if (xmin >= xmax) |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
355 continue; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
356 for (j = xmin & ~31; j < xmin; j += 2) { |
35618 | 357 size_t p = i * outw / 2 + j / 2; |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
358 dst_a[p] = 0xFF; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
359 dst_u[p] = dst_v[p] = 0; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
360 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
361 for (j = xmax; j < FFALIGN(xmax, 32); j += 2) { |
35618 | 362 size_t p = i * outw / 2 + j / 2; |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
363 dst_a[p] = 0xFF; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
364 dst_u[p] = dst_v[p] = 0; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
365 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
366 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
367 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
368 #endif // HAVE_SSE4 |
35244 | 369 } |
370 | |
371 static void render_frame_yuv420p(vf_instance_t *vf) | |
372 { | |
373 uint8_t **planes = vf->priv->planes; | |
374 uint8_t **dest = vf->dmpi->planes; | |
375 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
376 uint8_t *alpha; | |
35268 | 377 uint8_t *src_y = planes[0], |
378 *src_u = planes[1], | |
379 *src_v = planes[2]; | |
380 uint8_t *dst_y = dest[0], | |
381 *dst_u = dest[1], | |
382 *dst_v = dest[2]; | |
35244 | 383 int stride; |
384 int outw = vf->priv->outw, | |
385 outh = vf->priv->outh; | |
386 int i, j; | |
387 | |
388 // y | |
389 alpha = vf->priv->alphas[0]; | |
390 stride = vf->dmpi->stride[0]; | |
391 for (i = 0; i < outh; i++) { | |
392 int xmin = dirty_rows[i].xmin, | |
393 xmax = dirty_rows[i].xmax; | |
394 for (j = xmin; j < xmax; j++) { | |
395 size_t s = i * outw + j, | |
396 d = i * stride + j; | |
397 if (alpha[s] != 0xFF) | |
398 dst_y[d] = ((MAP_16BIT(alpha[s]) * dst_y[d]) >> 8) + src_y[s]; | |
399 } | |
400 } | |
401 | |
402 // u & v | |
403 alpha = vf->priv->alphas[1]; | |
404 stride = vf->dmpi->stride[1]; | |
405 for (i = 0; i < outh / 2; i++) { | |
406 int xmin = FFMIN(dirty_rows[i * 2].xmin, dirty_rows[i * 2 + 1].xmin), | |
407 xmax = FFMAX(dirty_rows[i * 2].xmax, dirty_rows[i * 2 + 1].xmax); | |
408 for (j = xmin / 2; j < (xmax + 1) / 2; j++) { | |
35618 | 409 size_t s = i * outw + j, |
35244 | 410 d = i * stride + j; |
411 if (alpha[s] != 0xFF) { | |
412 uint_fast16_t a = MAP_16BIT(alpha[s]); | |
413 dst_u[d] = ((a * dst_u[d]) >> 8) + src_u[s]; | |
414 dst_v[d] = ((a * dst_v[d]) >> 8) + src_v[s]; | |
415 } | |
416 } | |
417 } | |
418 } | |
419 | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
420 #if HAVE_SSE4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
421 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
422 #define CHECK_16_ALPHA \ |
35602 | 423 "cmpl $-1, 0(%[alpha], %[j], 1) \n\t" \ |
424 "jne 2f \n\t" \ | |
425 "cmpl $-1, 4(%[alpha], %[j], 1) \n\t" \ | |
426 "jne 2f \n\t" \ | |
427 "cmpl $-1, 8(%[alpha], %[j], 1) \n\t" \ | |
428 "jne 2f \n\t" \ | |
429 "cmpl $-1, 12(%[alpha], %[j], 1) \n\t" \ | |
430 "jne 2f \n\t" \ | |
431 "jmp 3f \n\t" | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
432 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
433 #define MAP_16_ALPHA \ |
35602 | 434 "movq 0(%[alpha], %[j], 1), %%xmm0 \n\t" \ |
435 "movq 8(%[alpha], %[j], 1), %%xmm2 \n\t" \ | |
436 "punpcklbw %%xmm7, %%xmm0 \n\t" \ | |
437 "punpcklbw %%xmm7, %%xmm2 \n\t" \ | |
438 "movdqa %%xmm0, %%xmm1 \n\t" \ | |
439 "movdqa %%xmm2, %%xmm3 \n\t" \ | |
440 "punpcklwd %%xmm7, %%xmm0 \n\t" \ | |
441 "punpckhwd %%xmm7, %%xmm1 \n\t" \ | |
442 "punpcklwd %%xmm7, %%xmm2 \n\t" \ | |
443 "punpckhwd %%xmm7, %%xmm3 \n\t" \ | |
444 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm0 \n\t"\ | |
445 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm1 \n\t"\ | |
446 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm2 \n\t"\ | |
447 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm3 \n\t"\ | |
448 "paddd "MANGLE(sse_int32_80h)", %%xmm0 \n\t" \ | |
449 "paddd "MANGLE(sse_int32_80h)", %%xmm1 \n\t" \ | |
450 "paddd "MANGLE(sse_int32_80h)", %%xmm2 \n\t" \ | |
451 "paddd "MANGLE(sse_int32_80h)", %%xmm3 \n\t" \ | |
452 "psrld $8, %%xmm0 \n\t" \ | |
453 "psrld $8, %%xmm1 \n\t" \ | |
454 "psrld $8, %%xmm2 \n\t" \ | |
455 "psrld $8, %%xmm3 \n\t" \ | |
456 "packssdw %%xmm1, %%xmm0 \n\t" \ | |
457 "packssdw %%xmm3, %%xmm2 \n\t" | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
458 |
35584 | 459 #define DO_RENDER \ |
35602 | 460 "movq 0(%%"REG_D", %[j], 1), %%xmm1 \n\t" \ |
461 "movq 8(%%"REG_D", %[j], 1), %%xmm3 \n\t" \ | |
462 "punpcklbw %%xmm7, %%xmm1 \n\t" \ | |
463 "punpcklbw %%xmm7, %%xmm3 \n\t" \ | |
464 "pmullw %%xmm0, %%xmm1 \n\t" \ | |
465 "pmullw %%xmm2, %%xmm3 \n\t" \ | |
466 "psrlw $8, %%xmm1 \n\t" \ | |
467 "psrlw $8, %%xmm3 \n\t" \ | |
468 "packuswb %%xmm3, %%xmm1 \n\t" \ | |
469 "movdqa (%%"REG_S", %[j], 1), %%xmm4 \n\t" \ | |
470 "paddb %%xmm4, %%xmm1 \n\t" \ | |
471 "movdqu %%xmm1, (%%"REG_D", %[j], 1) \n\t" | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
472 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
473 static void render_frame_yuv420p_sse4(vf_instance_t *vf) |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
474 { |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
475 struct dirty_rows_extent *dr = vf->priv->dirty_rows; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
476 uint8_t *alpha; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
477 uint8_t *src_y = vf->priv->planes[0], |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
478 *src_u = vf->priv->planes[1], |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
479 *src_v = vf->priv->planes[2]; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
480 uint8_t *dst_y = vf->dmpi->planes[0], |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
481 *dst_u = vf->dmpi->planes[1], |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
482 *dst_v = vf->dmpi->planes[2]; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
483 int stride; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
484 int outw = vf->priv->outw, |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
485 outh = vf->priv->outh; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
486 int i; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
487 |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
488 // y |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
489 alpha = vf->priv->alphas[0]; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
490 stride = vf->dmpi->stride[0]; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
491 for (i = 0; i < outh; i++) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
492 size_t xmin = dr[i].xmin & ~15, |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
493 xmax = dr[i].xmax; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
494 __asm__ volatile ( |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
495 "pxor %%xmm7, %%xmm7 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
496 "jmp 4f \n\t" |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
497 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
498 "1: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
499 CHECK_16_ALPHA |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
500 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
501 "2: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
502 MAP_16_ALPHA |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
503 DO_RENDER |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
504 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
505 "3: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
506 "add $16, %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
507 "4: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
508 "cmp %[xmax], %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
509 "jl 1b \n\t" |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
510 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
511 : : [j] "r" (xmin), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
512 [xmax] "g" (xmax), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
513 [alpha] "r" (alpha + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
514 [src] "S" (src_y + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
515 [dst] "D" (dst_y + i * stride) |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
516 ); |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
517 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
518 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
519 // u & v |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
520 alpha = vf->priv->alphas[1]; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
521 stride = vf->dmpi->stride[1]; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
522 for (i = 0; i < outh / 2; i++) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
523 size_t xmin = FFMIN(dr[i * 2].xmin, dr[i * 2 + 1].xmin) & ~31, |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
524 xmax = FFMAX(dr[i * 2].xmax, dr[i * 2 + 1].xmax); |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
525 __asm__ volatile ( |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
526 "pxor %%xmm7, %%xmm7 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
527 "jmp 4f \n\t" |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
528 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
529 "1: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
530 CHECK_16_ALPHA |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
531 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
532 "2: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
533 MAP_16_ALPHA |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
534 "mov %[src_u], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
535 "mov %[dst_u], %%"REG_D" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
536 DO_RENDER |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
537 "mov %[src_v], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
538 "mov %[dst_v], %%"REG_D" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
539 DO_RENDER |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
540 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
541 "3: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
542 "add $16, %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
543 "4: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
544 "cmp %[xmax], %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
545 "jl 1b \n\t" |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
546 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
547 : : [j] "r" (xmin / 2), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
548 [xmax] "g" ((xmax + 1) / 2), |
35618 | 549 [alpha] "r" (alpha + i * outw), |
550 [src_u] "g" (src_u + i * outw), | |
551 [src_v] "g" (src_v + i * outw), | |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
552 [dst_u] "g" (dst_u + i * stride), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
553 [dst_v] "g" (dst_v + i * stride) |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
554 : REG_S, REG_D |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
555 ); |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
556 } |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
557 } |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
558 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
559 #undef CHECK_16_ALPHA |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
560 #undef MAP_16_ALPHA |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
561 #undef MUL_ALPHA |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
562 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
563 #endif // HAVE_SSE4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
564 |
35244 | 565 static void clean_buffer(vf_instance_t *vf) |
566 { | |
567 int outw = vf->priv->outw, | |
568 outh = vf->priv->outh; | |
569 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
570 uint8_t **planes = vf->priv->planes; | |
571 uint8_t *alpha = vf->priv->alphas[0]; | |
572 int i, j; | |
573 | |
35618 | 574 if (vf->priv->prepare_buffer == prepare_buffer_420p) { |
575 // HACK: prepare_buffer_420p touched u & v planes | |
576 // so we want to clean them here. | |
577 for (i = 0; i < outh; i += 2) { | |
578 int xmin = FFMIN(dirty_rows[i].xmin, dirty_rows[i + 1].xmin) & ~1, | |
579 xmax = FFMAX(dirty_rows[i].xmax, dirty_rows[i + 1].xmax); | |
580 dirty_rows[i / 2].xmin = FFMIN(dirty_rows[i / 2].xmin, xmin / 2); | |
581 dirty_rows[i / 2].xmax = FFMAX(dirty_rows[i / 2].xmax, xmax / 2); | |
582 } | |
583 } | |
35244 | 584 for (i = 0; i < MP_MAX_PLANES; i++) { |
585 uint8_t *plane = planes[i]; | |
586 if (!plane) | |
587 break; | |
588 for (j = 0; j < outh; j++) { | |
589 int xmin = dirty_rows[j].xmin; | |
590 int width = dirty_rows[j].xmax - xmin; | |
591 if (width > 0) | |
592 memset(plane + j * outw + xmin, 0, width); | |
593 } | |
594 } | |
595 for (i = 0; i < outh; i++) { | |
596 int xmin = dirty_rows[i].xmin; | |
597 int width = dirty_rows[i].xmax - xmin; | |
598 if (width > 0) | |
599 memset(alpha + i * outw + xmin, -1, width); | |
600 } | |
601 for (i = 0; i < outh; i++) { | |
602 dirty_rows[i].xmin = outw; | |
603 dirty_rows[i].xmax = 0; | |
604 } | |
605 } | |
18937 | 606 |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
607 static int config(struct vf_instance *vf, |
32096 | 608 int width, int height, int d_width, int d_height, |
609 unsigned int flags, unsigned int outfmt) | |
18937 | 610 { |
32391
b4c3659d16b1
Use a dynamic list for the sources of EOSD elements.
cigaes
parents:
32261
diff
changeset
|
611 struct mp_eosd_settings res = {0}; |
35244 | 612 struct dirty_rows_extent *dirty_rows; |
613 int outw, outh; | |
614 int planes, alphas; | |
615 int i; | |
31927 | 616 |
35574 | 617 vf->priv->outfmt = outfmt; |
618 vf->priv->outh = outh = height + ass_top_margin + ass_bottom_margin; | |
619 vf->priv->outw = outw = width; | |
620 | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
621 switch (outfmt) { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
622 case IMGFMT_YV12: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
623 case IMGFMT_I420: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
624 case IMGFMT_IYUV: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
625 vf->priv->is_planar = 1; |
35244 | 626 planes = 3; |
627 alphas = 2; | |
628 vf->priv->draw_image = draw_image_yuv; | |
629 vf->priv->render_frame = render_frame_yuv420p; | |
630 vf->priv->prepare_buffer = prepare_buffer_420p; | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
631 #if HAVE_SSE4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
632 if (gCpuCaps.hasSSE4 && outw % 32 == 0) |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
633 vf->priv->render_frame = render_frame_yuv420p_sse4; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
634 #endif |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
635 break; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
636 case IMGFMT_UYVY: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
637 case IMGFMT_YUY2: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
638 vf->priv->is_planar = 0; |
35244 | 639 planes = 3; |
640 alphas = 1; | |
641 vf->priv->draw_image = draw_image_yuv; | |
642 vf->priv->render_frame = render_frame_yuv422; | |
643 vf->priv->prepare_buffer = prepare_buffer_422; | |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
644 #if HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
645 if (gCpuCaps.hasSSE4 && outw % 8 == 0) |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
646 vf->priv->render_frame = render_frame_yuv422_sse4; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
647 #endif |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
648 break; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
649 default: |
32096 | 650 return 0; |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
651 } |
18937 | 652 |
32096 | 653 if (!opt_screen_size_x && !opt_screen_size_y) { |
654 d_width = d_width * vf->priv->outw / width; | |
655 d_height = d_height * vf->priv->outh / height; | |
656 } | |
18937 | 657 |
35244 | 658 for (i = 0; i < planes; i++) |
659 vf->priv->planes[i] = av_malloc(outw * outh); | |
660 for (i = 0; i < alphas; i++) | |
661 vf->priv->alphas[i] = av_malloc(outw * outh); | |
662 dirty_rows = av_malloc(outh * sizeof(*dirty_rows)); | |
663 // mark all rows dirty here | |
664 // so that they can be properly cleaned in clear_buffer() | |
665 for (i = 0; i < outh; i++) { | |
666 dirty_rows[i].xmin = 0; | |
667 dirty_rows[i].xmax = outw; | |
668 } | |
669 vf->priv->dirty_rows = dirty_rows; | |
670 clean_buffer(vf); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
26727
diff
changeset
|
671 |
32096 | 672 res.w = vf->priv->outw; |
673 res.h = vf->priv->outh; | |
674 res.srcw = width; | |
675 res.srch = height; | |
676 res.mt = ass_top_margin; | |
677 res.mb = ass_bottom_margin; | |
32391
b4c3659d16b1
Use a dynamic list for the sources of EOSD elements.
cigaes
parents:
32261
diff
changeset
|
678 eosd_configure(&res); |
18937 | 679 |
32096 | 680 return vf_next_config(vf, vf->priv->outw, vf->priv->outh, d_width, |
681 d_height, flags, outfmt); | |
18937 | 682 } |
683 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
684 static void get_image(struct vf_instance *vf, mp_image_t *mpi) |
18937 | 685 { |
32096 | 686 if (mpi->type == MP_IMGTYPE_IPB) |
687 return; | |
688 if (mpi->flags & MP_IMGFLAG_PRESERVE) | |
689 return; | |
690 if (mpi->imgfmt != vf->priv->outfmt) | |
691 return; // colorspace differ | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
26727
diff
changeset
|
692 |
32096 | 693 // width never changes, always try full DR |
694 mpi->priv = vf->dmpi = vf_get_image(vf->next, mpi->imgfmt, mpi->type, | |
695 mpi->flags | MP_IMGFLAG_READABLE, | |
34882
649d4cad4619
Request a sufficiently large image for direct rendering.
reimar
parents:
34863
diff
changeset
|
696 FFMAX(mpi->width, vf->priv->outw), |
649d4cad4619
Request a sufficiently large image for direct rendering.
reimar
parents:
34863
diff
changeset
|
697 FFMAX(mpi->height, vf->priv->outh)); |
18937 | 698 |
32096 | 699 if ( (vf->dmpi->flags & MP_IMGFLAG_DRAW_CALLBACK) && |
700 !(vf->dmpi->flags & MP_IMGFLAG_DIRECT)) { | |
701 mp_msg(MSGT_ASS, MSGL_INFO, MSGTR_MPCODECS_FullDRNotPossible); | |
702 return; | |
703 } | |
704 // set up mpi as a cropped-down image of dmpi: | |
705 if (mpi->flags & MP_IMGFLAG_PLANAR) { | |
706 mpi->planes[0] = vf->dmpi->planes[0] + ass_top_margin * vf->dmpi->stride[0]; | |
707 mpi->planes[1] = vf->dmpi->planes[1] + (ass_top_margin >> mpi->chroma_y_shift) * vf->dmpi->stride[1]; | |
708 mpi->planes[2] = vf->dmpi->planes[2] + (ass_top_margin >> mpi->chroma_y_shift) * vf->dmpi->stride[2]; | |
709 mpi->stride[1] = vf->dmpi->stride[1]; | |
710 mpi->stride[2] = vf->dmpi->stride[2]; | |
711 } else { | |
712 mpi->planes[0] = vf->dmpi->planes[0] + ass_top_margin * vf->dmpi->stride[0]; | |
713 } | |
714 mpi->stride[0] = vf->dmpi->stride[0]; | |
715 mpi->width = vf->dmpi->width; | |
716 mpi->flags |= MP_IMGFLAG_DIRECT; | |
717 mpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK; | |
718 // vf->dmpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK; | |
18937 | 719 } |
720 | |
721 static void blank(mp_image_t *mpi, int y1, int y2) | |
722 { | |
32096 | 723 int color[3] = { 16, 128, 128 }; // black (YUV) |
724 int y; | |
725 unsigned char *dst; | |
726 int chroma_rows = (y2 - y1) >> mpi->chroma_y_shift; | |
18937 | 727 |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
728 if (mpi->flags & MP_IMGFLAG_PLANAR) { |
35436 | 729 dst = mpi->planes[0] + y1 * mpi->stride[0]; |
730 for (y = 0; y < y2 - y1; ++y) { | |
731 memset(dst, color[0], mpi->w); | |
732 dst += mpi->stride[0]; | |
733 } | |
734 dst = mpi->planes[1] + (y1 >> mpi->chroma_y_shift) * mpi->stride[1]; | |
735 for (y = 0; y < chroma_rows; ++y) { | |
736 memset(dst, color[1], mpi->chroma_width); | |
737 dst += mpi->stride[1]; | |
738 } | |
739 dst = mpi->planes[2] + (y1 >> mpi->chroma_y_shift) * mpi->stride[2]; | |
740 for (y = 0; y < chroma_rows; ++y) { | |
741 memset(dst, color[2], mpi->chroma_width); | |
742 dst += mpi->stride[2]; | |
743 } | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
744 } else { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
745 unsigned char packed_color[4]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
746 int x; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
747 if (mpi->imgfmt == IMGFMT_UYVY) { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
748 packed_color[0] = color[1]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
749 packed_color[1] = color[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
750 packed_color[2] = color[2]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
751 packed_color[3] = color[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
752 } else { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
753 packed_color[0] = color[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
754 packed_color[1] = color[1]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
755 packed_color[2] = color[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
756 packed_color[3] = color[2]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
757 } |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
758 dst = mpi->planes[0] + y1 * mpi->stride[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
759 for (y = y1; y < y2; ++y) { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
760 for (x = 0; x < mpi->w / 2; ++x) |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
761 AV_COPY32(dst + 4 * x, packed_color); |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
762 dst += mpi->stride[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
763 } |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
764 } |
18937 | 765 } |
766 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
767 static int prepare_image(struct vf_instance *vf, mp_image_t *mpi) |
18937 | 768 { |
32096 | 769 if (mpi->flags & MP_IMGFLAG_DIRECT || |
770 mpi->flags & MP_IMGFLAG_DRAW_CALLBACK) { | |
771 vf->dmpi = mpi->priv; | |
772 if (!vf->dmpi) { | |
773 mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_MPCODECS_FunWhydowegetNULL); | |
774 return 0; | |
775 } | |
776 mpi->priv = NULL; | |
777 // we've used DR, so we're ready... | |
778 if (ass_top_margin) | |
779 blank(vf->dmpi, 0, ass_top_margin); | |
780 if (ass_bottom_margin) | |
781 blank(vf->dmpi, vf->priv->outh - ass_bottom_margin, vf->priv->outh); | |
782 if (!(mpi->flags & MP_IMGFLAG_PLANAR)) | |
783 vf->dmpi->planes[1] = mpi->planes[1]; // passthrough rgb8 palette | |
784 return 0; | |
785 } | |
786 // hope we'll get DR buffer: | |
787 vf->dmpi = vf_get_image(vf->next, vf->priv->outfmt, MP_IMGTYPE_TEMP, | |
788 MP_IMGFLAG_ACCEPT_STRIDE | MP_IMGFLAG_READABLE, | |
789 vf->priv->outw, vf->priv->outh); | |
18937 | 790 |
32096 | 791 // copy mpi->dmpi... |
792 if (mpi->flags & MP_IMGFLAG_PLANAR) { | |
793 memcpy_pic(vf->dmpi->planes[0] + ass_top_margin * vf->dmpi->stride[0], | |
35436 | 794 mpi->planes[0], |
795 mpi->w, | |
796 mpi->h, | |
797 vf->dmpi->stride[0], | |
798 mpi->stride[0]); | |
32096 | 799 memcpy_pic(vf->dmpi->planes[1] + (ass_top_margin >> mpi->chroma_y_shift) * vf->dmpi->stride[1], |
35436 | 800 mpi->planes[1], |
801 mpi->w >> mpi->chroma_x_shift, | |
32096 | 802 mpi->h >> mpi->chroma_y_shift, |
35436 | 803 vf->dmpi->stride[1], |
32096 | 804 mpi->stride[1]); |
805 memcpy_pic(vf->dmpi->planes[2] + (ass_top_margin >> mpi->chroma_y_shift) * vf->dmpi->stride[2], | |
35436 | 806 mpi->planes[2], |
32096 | 807 mpi->w >> mpi->chroma_x_shift, |
808 mpi->h >> mpi->chroma_y_shift, | |
35436 | 809 vf->dmpi->stride[2], |
32096 | 810 mpi->stride[2]); |
811 } else { | |
812 memcpy_pic(vf->dmpi->planes[0] + ass_top_margin * vf->dmpi->stride[0], | |
35436 | 813 mpi->planes[0], |
32096 | 814 mpi->w * (vf->dmpi->bpp / 8), |
35436 | 815 mpi->h, |
32096 | 816 vf->dmpi->stride[0], |
35436 | 817 mpi->stride[0]); |
32096 | 818 vf->dmpi->planes[1] = mpi->planes[1]; // passthrough rgb8 palette |
819 } | |
820 if (ass_top_margin) | |
821 blank(vf->dmpi, 0, ass_top_margin); | |
822 if (ass_bottom_margin) | |
823 blank(vf->dmpi, vf->priv->outh - ass_bottom_margin, vf->priv->outh); | |
824 return 0; | |
18937 | 825 } |
826 | |
35244 | 827 static void prepare_eosd(vf_instance_t *vf, struct mp_eosd_image_list *imgs) |
18937 | 828 { |
35244 | 829 struct mp_eosd_image *img = eosd_image_first(imgs); |
830 void (*draw_image)(vf_instance_t *, struct mp_eosd_image *); | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
831 |
35244 | 832 clean_buffer(vf); |
833 draw_image = vf->priv->draw_image; | |
834 for (; img; img = eosd_image_next(imgs)) | |
835 draw_image(vf, img); | |
836 vf->priv->prepare_buffer(vf); | |
18937 | 837 } |
838 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
839 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts) |
18937 | 840 { |
32391
b4c3659d16b1
Use a dynamic list for the sources of EOSD elements.
cigaes
parents:
32261
diff
changeset
|
841 struct mp_eosd_image_list images; |
b4c3659d16b1
Use a dynamic list for the sources of EOSD elements.
cigaes
parents:
32261
diff
changeset
|
842 eosd_render_frame(pts, &images); |
32096 | 843 prepare_image(vf, mpi); |
35244 | 844 if (images.changed) |
845 prepare_eosd(vf, &images); | |
846 vf->priv->render_frame(vf); | |
32096 | 847 return vf_next_put_image(vf, vf->dmpi, pts); |
18937 | 848 } |
849 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
850 static int query_format(struct vf_instance *vf, unsigned int fmt) |
18937 | 851 { |
32096 | 852 switch (fmt) { |
853 case IMGFMT_YV12: | |
854 case IMGFMT_I420: | |
855 case IMGFMT_IYUV: | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
856 case IMGFMT_UYVY: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
857 case IMGFMT_YUY2: |
35033 | 858 return vf_next_query_format(vf, fmt) | VFCAP_EOSD; |
32096 | 859 } |
860 return 0; | |
18937 | 861 } |
862 | |
863 static int control(vf_instance_t *vf, int request, void *data) | |
864 { | |
32096 | 865 switch (request) { |
866 case VFCTRL_INIT_EOSD: | |
867 return CONTROL_TRUE; | |
868 case VFCTRL_DRAW_EOSD: | |
869 return CONTROL_TRUE; | |
870 } | |
871 return vf_next_control(vf, request, data); | |
18937 | 872 } |
873 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
874 static void uninit(struct vf_instance *vf) |
18937 | 875 { |
35244 | 876 int i; |
877 for (i = 0; i < MP_MAX_PLANES; i++) | |
878 av_free(vf->priv->planes[i]); | |
879 for (i = 0; i < MP_MAX_PLANES; i++) | |
880 av_free(vf->priv->alphas[i]); | |
881 av_free(vf->priv->dirty_rows); | |
18937 | 882 } |
883 | |
32096 | 884 static const unsigned int fmt_list[] = { |
885 IMGFMT_YV12, | |
886 IMGFMT_I420, | |
887 IMGFMT_IYUV, | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
888 IMGFMT_UYVY, |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
889 IMGFMT_YUY2, |
32096 | 890 0 |
18937 | 891 }; |
892 | |
30638
a7b908875c14
Rename open() vf initialization function to vf_open().
diego
parents:
30633
diff
changeset
|
893 static int vf_open(vf_instance_t *vf, char *args) |
18937 | 894 { |
32096 | 895 int flags; |
35033 | 896 unsigned outfmt = vf_match_csp(&vf->next, fmt_list, IMGFMT_YV12); |
897 if (outfmt) | |
898 flags = vf_next_query_format(vf, outfmt); | |
899 if (!outfmt || (vf->priv->auto_insert && flags & VFCAP_EOSD)) { | |
32096 | 900 uninit(vf); |
901 return 0; | |
902 } | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
26727
diff
changeset
|
903 |
32096 | 904 if (vf->priv->auto_insert) |
905 mp_msg(MSGT_ASS, MSGL_INFO, "[ass] auto-open\n"); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
26727
diff
changeset
|
906 |
32096 | 907 vf->config = config; |
908 vf->query_format = query_format; | |
909 vf->uninit = uninit; | |
910 vf->control = control; | |
911 vf->get_image = get_image; | |
912 vf->put_image = put_image; | |
913 vf->default_caps = VFCAP_EOSD; | |
914 return 1; | |
18937 | 915 } |
916 | |
917 #define ST_OFF(f) M_ST_OFF(struct vf_priv_s,f) | |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
918 static const m_option_t vf_opts_fields[] = { |
32096 | 919 {"auto", ST_OFF(auto_insert), CONF_TYPE_FLAG, 0, 0, 1, NULL}, |
920 {NULL, NULL, 0, 0, 0, 0, NULL} | |
18937 | 921 }; |
922 | |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
923 static const m_struct_t vf_opts = { |
32096 | 924 "ass", |
925 sizeof(struct vf_priv_s), | |
926 &vf_priv_dflt, | |
927 vf_opts_fields | |
18937 | 928 }; |
929 | |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
930 const vf_info_t vf_info_ass = { |
32096 | 931 "Render ASS/SSA subtitles", |
932 "ass", | |
35244 | 933 "Evgeniy Stepanov, Xidorn Quan", |
32096 | 934 "", |
935 vf_open, | |
936 &vf_opts | |
18937 | 937 }; |