Mercurial > mplayer.hg
annotate libmpcodecs/vf_ass.c @ 35610:187738d40b0b
Add support for FFmpeg's libilbc decoder.
author | cehoyos |
---|---|
date | Sat, 22 Dec 2012 11:21:35 +0000 |
parents | 80521cdbc272 |
children | 2cf9f80c4023 |
rev | line source |
---|---|
20008
fa122b7c71c6
Add copyright notice and vim/emacs comments to libass and vf_ass.c.
eugeni
parents:
19563
diff
changeset
|
1 /* |
26727 | 2 * Copyright (C) 2006 Evgeniy Stepanov <eugeni.stepanov@gmail.com> |
35244 | 3 * Copyright (C) 2012 Xidorn Quan <quanxunzhen@gmail.com> |
26727 | 4 * |
5 * This file is part of MPlayer. | |
6 * | |
7 * MPlayer is free software; you can redistribute it and/or modify | |
8 * it under the terms of the GNU General Public License as published by | |
9 * the Free Software Foundation; either version 2 of the License, or | |
10 * (at your option) any later version. | |
11 * | |
12 * MPlayer is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 * GNU General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU General Public License along | |
18 * with MPlayer; if not, write to the Free Software Foundation, Inc., | |
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
20 */ | |
20008
fa122b7c71c6
Add copyright notice and vim/emacs comments to libass and vf_ass.c.
eugeni
parents:
19563
diff
changeset
|
21 |
18937 | 22 #include "config.h" |
23 | |
24 #include <stdio.h> | |
25 #include <stdlib.h> | |
26 #include <string.h> | |
24545
9e5126679d44
Replace stdint.h #include by functionally equivalent inttypes.h.
diego
parents:
23134
diff
changeset
|
27 #include <inttypes.h> |
18937 | 28 #include <assert.h> |
29 | |
30 #include "config.h" | |
31 #include "mp_msg.h" | |
32 #include "help_mp.h" | |
31489 | 33 #include "mpcommon.h" |
18937 | 34 #include "img_format.h" |
35 #include "mp_image.h" | |
30653
3d23e24c5c60
Declare externally used variables from vd.c as extern in vd.h.
diego
parents:
30642
diff
changeset
|
36 #include "vd.h" |
18937 | 37 #include "vf.h" |
38 | |
39 #include "libvo/fastmemcpy.h" | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
40 #include "libavutil/intreadwrite.h" |
32467 | 41 #include "sub/sub.h" |
18937 | 42 #include "m_option.h" |
43 #include "m_struct.h" | |
44 | |
32461 | 45 #include "sub/ass_mp.h" |
32460 | 46 #include "sub/eosd.h" |
18937 | 47 |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
48 #include "cpudetect.h" |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
49 #include "libavutil/x86_cpu.h" |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
50 |
18937 | 51 #define _r(c) ((c)>>24) |
52 #define _g(c) (((c)>>16)&0xFF) | |
53 #define _b(c) (((c)>>8)&0xFF) | |
54 #define _a(c) ((c)&0xFF) | |
32096 | 55 #define rgba2y(c) ( (( 263*_r(c) + 516*_g(c) + 100*_b(c)) >> 10) + 16 ) |
18937 | 56 #define rgba2u(c) ( ((-152*_r(c) - 298*_g(c) + 450*_b(c)) >> 10) + 128 ) |
57 #define rgba2v(c) ( (( 450*_r(c) - 376*_g(c) - 73*_b(c)) >> 10) + 128 ) | |
58 | |
35244 | 59 /* map 0 - 0xFF -> 0 - 0x101 */ |
60 #define MAP_16BIT(v) RSHIFT(0x102 * (v), 8) | |
61 /* map 0 - 0xFF -> 0 - 0x10101 */ | |
62 #define MAP_24BIT(v) RSHIFT(0x10203 * (v), 8) | |
18937 | 63 |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
64 #if HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
65 |
35582 | 66 DECLARE_ASM_CONST(16, uint32_t, sse_int32_80h[4]) |
67 = { 0x80, 0x80, 0x80, 0x80 }; | |
68 DECLARE_ASM_CONST(16, uint32_t, sse_int32_map_factor[4]) | |
69 = { 0x102, 0x102, 0x102, 0x102 }; | |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
70 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
71 #endif // HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
72 |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
73 static const struct vf_priv_s { |
32096 | 74 int outh, outw; |
18937 | 75 |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
76 int is_planar; |
32096 | 77 unsigned int outfmt; |
18937 | 78 |
32096 | 79 // 1 = auto-added filter: insert only if chain does not support EOSD already |
80 // 0 = insert always | |
81 int auto_insert; | |
18937 | 82 |
35244 | 83 // planar data to be directly rendered on frames |
84 uint8_t *planes[MP_MAX_PLANES]; | |
85 // alpha here is actually transparency, not opacity | |
86 uint8_t *alphas[MP_MAX_PLANES]; | |
87 struct dirty_rows_extent { | |
88 int xmin, xmax; | |
89 } *dirty_rows; | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
90 |
35244 | 91 // called for every eosd image when subtitle is changed |
92 void (*draw_image)(vf_instance_t *, struct mp_eosd_image *); | |
93 // called for every time subtitle is changed | |
94 void (*prepare_buffer)(vf_instance_t *); | |
95 // called for every frame | |
96 void (*render_frame)(vf_instance_t *); | |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
97 } vf_priv_dflt; |
18937 | 98 |
35244 | 99 static void draw_image_yuv(vf_instance_t *vf, struct mp_eosd_image *img) |
100 { | |
101 uint32_t color = img->color; | |
102 uint32_t opacity = 0xFF - _a(color); | |
103 uint8_t y = rgba2y(color), | |
104 u = rgba2u(color), | |
105 v = rgba2v(color); | |
35268 | 106 int outw = vf->priv->outw; |
35244 | 107 uint8_t *alpha = vf->priv->alphas[0], |
108 *dst_y = vf->priv->planes[0], | |
109 *dst_u = vf->priv->planes[1], | |
110 *dst_v = vf->priv->planes[2]; | |
111 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
112 int src_x = img->dst_x, src_w = img->w, | |
113 src_y = img->dst_y, src_h = img->h, | |
114 stride = img->stride; | |
115 uint8_t *src = img->bitmap; | |
116 int i, j; | |
117 | |
118 opacity = MAP_24BIT(opacity); | |
119 for (i = 0; i < src_h; i++) { | |
120 struct dirty_rows_extent *dirty_row = &dirty_rows[src_y + i]; | |
121 dirty_row->xmin = FFMIN(dirty_row->xmin, src_x); | |
122 dirty_row->xmax = FFMAX(dirty_row->xmax, src_x + src_w); | |
123 | |
124 for (j = 0; j < src_w; j++) { | |
125 uint32_t k = src[i * stride + j]; | |
126 if (k) { | |
127 size_t p = (src_y + i) * outw + src_x + j; | |
128 k *= opacity; | |
129 alpha[p] = RSHIFT((0xFFFFFF - k) * alpha[p], 24); | |
130 dst_y[p] = RSHIFT((0xFFFFFF - k) * dst_y[p] + k * y, 24); | |
131 dst_u[p] = RSHIFT((0xFFFFFF - k) * dst_u[p] + k * u, 24); | |
132 dst_v[p] = RSHIFT((0xFFFFFF - k) * dst_v[p] + k * v, 24); | |
133 } | |
134 } | |
135 } | |
136 } | |
137 | |
138 static void prepare_buffer_422(vf_instance_t *vf) | |
139 { | |
140 uint8_t *dst_u = vf->priv->planes[1], | |
141 *dst_v = vf->priv->planes[2]; | |
142 int outw = vf->priv->outw, | |
143 outh = vf->priv->outh; | |
144 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
145 int i, j; | |
146 | |
147 for (i = 0; i < outh; i++) { | |
148 int xmin = dirty_rows[i].xmin & ~1, | |
149 xmax = dirty_rows[i].xmax; | |
150 for (j = xmin; j < xmax; j += 2) { | |
151 size_t p = i * outw + j; | |
152 dst_u[p] = (dst_u[p] + dst_u[p + 1]) / 2; | |
153 dst_v[p] = (dst_v[p] + dst_v[p + 1]) / 2; | |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
154 dst_u[p + 1] = dst_v[p + 1] = 0; |
35244 | 155 } |
156 } | |
157 } | |
158 | |
159 static void render_frame_yuv422(vf_instance_t *vf) | |
160 { | |
161 uint8_t *alpha = vf->priv->alphas[0]; | |
162 uint8_t *src_y = vf->priv->planes[0], | |
163 *src_u = vf->priv->planes[1], | |
164 *src_v = vf->priv->planes[2]; | |
165 int outw = vf->priv->outw, | |
166 outh = vf->priv->outh; | |
167 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
168 uint8_t *dest = vf->dmpi->planes[0]; | |
169 int stride = vf->dmpi->stride[0]; | |
170 int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY; | |
171 int i, j; | |
172 | |
173 for (i = 0; i < outh; i++) { | |
174 int xmin = dirty_rows[i].xmin & ~1, | |
175 xmax = dirty_rows[i].xmax; | |
176 for (j = xmin; j < xmax; j += 2) { | |
177 size_t src = i * outw + j, | |
178 dst = i * stride + j * 2; | |
179 uint_fast16_t a0 = alpha[src], | |
180 a1 = alpha[src + 1]; | |
181 uint8_t y0, y1, u, v; | |
182 | |
183 if (a0 == 0xFF && a1 == 0xFF) | |
184 continue; | |
185 | |
186 y0 = dest[dst + is_uyvy + 0]; | |
187 y1 = dest[dst + is_uyvy + 2]; | |
188 u = dest[dst - is_uyvy + 1]; | |
189 v = dest[dst - is_uyvy + 3]; | |
190 | |
191 a0 = MAP_16BIT(a0); | |
192 a1 = MAP_16BIT(a1); | |
193 y0 = ((a0 * y0) >> 8) + src_y[src]; | |
194 y1 = ((a1 * y1) >> 8) + src_y[src + 1]; | |
195 | |
196 a0 = (a0 + a1) / 2; | |
197 u = ((a0 * u) >> 8) + src_u[src]; | |
198 v = ((a0 * v) >> 8) + src_v[src]; | |
199 | |
200 dest[dst + is_uyvy + 0] = y0; | |
201 dest[dst + is_uyvy + 2] = y1; | |
202 dest[dst - is_uyvy + 1] = u; | |
203 dest[dst - is_uyvy + 3] = v; | |
204 } | |
205 } | |
206 } | |
207 | |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
208 #if HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
209 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
210 static void render_frame_yuv422_sse4(vf_instance_t *vf) |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
211 { |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
212 uint8_t *alpha = vf->priv->alphas[0]; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
213 uint8_t *src_y = vf->priv->planes[0], |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
214 *src_u = vf->priv->planes[1], |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
215 *src_v = vf->priv->planes[2]; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
216 int outw = vf->priv->outw, |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
217 outh = vf->priv->outh; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
218 struct dirty_rows_extent *dr = vf->priv->dirty_rows; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
219 uint8_t *dst = vf->dmpi->planes[0]; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
220 int stride = vf->dmpi->stride[0]; |
35579
6b169870ae30
Reduce register usage to fix the compilation in x86.
upsuper
parents:
35576
diff
changeset
|
221 int32_t is_uyvy = vf->priv->outfmt == IMGFMT_UYVY; |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
222 int i; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
223 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
224 for (i = 0; i < outh; i++) { |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
225 size_t xmin = dr[i].xmin & ~7, |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
226 xmax = dr[i].xmax; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
227 __asm__ volatile ( |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
228 "pxor %%xmm7, %%xmm7 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
229 "jmp 4f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
230 "1: \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
231 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
232 "cmpl $-1, 0(%[alpha], %[j], 1) \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
233 "jne 2f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
234 "cmpl $-1, 4(%[alpha], %[j], 1) \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
235 "jne 2f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
236 "jmp 3f \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
237 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
238 "2: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
239 "movq (%[alpha], %[j], 1), %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
240 "punpcklbw %%xmm7, %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
241 "movdqa %%xmm0, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
242 "punpcklwd %%xmm7, %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
243 "punpckhwd %%xmm7, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
244 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
245 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
246 "paddd "MANGLE(sse_int32_80h)", %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
247 "paddd "MANGLE(sse_int32_80h)", %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
248 "psrld $8, %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
249 "psrld $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
250 "movdqa %%xmm0, %%xmm2 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
251 "movdqa %%xmm1, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
252 "packssdw %%xmm1, %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
253 "phaddd %%xmm3, %%xmm2 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
254 "psrld $1, %%xmm2 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
255 "packssdw %%xmm7, %%xmm2 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
256 "punpcklwd %%xmm2, %%xmm2 \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
257 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
258 "movdqu (%[dst], %[j], 2), %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
259 "movdqa %%xmm1, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
260 "cmpl $0, %[f] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
261 "je 11f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
262 "psrlw $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
263 "psllw $8, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
264 "psrlw $8, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
265 "jmp 12f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
266 "11: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
267 "psllw $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
268 "psrlw $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
269 "psrlw $8, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
270 "12: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
271 "pmullw %%xmm0, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
272 "pmullw %%xmm2, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
273 "psrlw $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
274 "psrlw $8, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
275 "packuswb %%xmm7, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
276 "packuswb %%xmm7, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
277 "mov %[src_y], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
278 "movq (%%"REG_S", %[j], 1), %%xmm4 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
279 "mov %[src_u], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
280 "movq (%%"REG_S", %[j], 1), %%xmm5 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
281 "mov %[src_v], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
282 "movq (%%"REG_S", %[j], 1), %%xmm6 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
283 "packuswb %%xmm7, %%xmm5 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
284 "packuswb %%xmm7, %%xmm6 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
285 "punpcklbw %%xmm6, %%xmm5 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
286 "cmpl $0, %[f] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
287 "je 21f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
288 "punpcklbw %%xmm1, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
289 "punpcklbw %%xmm4, %%xmm5 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
290 "paddb %%xmm5, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
291 "movdqu %%xmm3, (%[dst], %[j], 2) \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
292 "jmp 22f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
293 "21: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
294 "punpcklbw %%xmm3, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
295 "punpcklbw %%xmm5, %%xmm4 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
296 "paddb %%xmm4, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
297 "movdqu %%xmm1, (%[dst], %[j], 2) \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
298 "22: \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
299 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
300 "3: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
301 "add $8, %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
302 "4: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
303 "cmp %[xmax], %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
304 "jl 1b \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
305 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
306 : : [dst] "r" (dst + i * stride), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
307 [alpha] "r" (alpha + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
308 [src_y] "g" (src_y + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
309 [src_u] "g" (src_u + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
310 [src_v] "g" (src_v + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
311 [j] "r" (xmin), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
312 [xmax] "g" (xmax), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
313 [f] "g" (is_uyvy) |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
314 : REG_S |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
315 ); |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
316 } |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
317 } |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
318 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
319 #endif // HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
320 |
35244 | 321 static void prepare_buffer_420p(vf_instance_t *vf) |
322 { | |
323 int outw = vf->priv->outw, | |
324 outh = vf->priv->outh; | |
325 uint8_t *dst_u = vf->priv->planes[1], | |
326 *dst_v = vf->priv->planes[2]; | |
327 uint8_t *src_a = vf->priv->alphas[0], | |
328 *dst_a = vf->priv->alphas[1]; | |
329 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
330 int i, j; | |
331 | |
332 for (i = 0; i < outh; i += 2) { | |
333 int xmin = FFMIN(dirty_rows[i].xmin, dirty_rows[i + 1].xmin) & ~1, | |
334 xmax = FFMAX(dirty_rows[i].xmax, dirty_rows[i + 1].xmax); | |
335 for (j = xmin; j < xmax; j += 2) { | |
336 size_t p = i * outw / 4 + j / 2, | |
337 q1 = i * outw + j, | |
338 q2 = q1 + outw; | |
339 dst_a[p] = (src_a[q1] + src_a[q1 + 1] + | |
340 src_a[q2] + src_a[q2 + 1] + 2) / 4; | |
341 dst_u[p] = (dst_u[q1] + dst_u[q1 + 1] + | |
342 dst_u[q2] + dst_u[q2 + 1] + 2) / 4; | |
343 dst_v[p] = (dst_v[q1] + dst_v[q1 + 1] + | |
344 dst_v[q2] + dst_v[q2 + 1] + 2) / 4; | |
345 } | |
346 } | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
347 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
348 #if HAVE_SSE4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
349 // for render_frame_yuv420p_sse4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
350 if (gCpuCaps.hasSSE4 && outw % 32 == 0) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
351 for (i = 0; i < outh; i += 2) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
352 int xmin = FFMIN(dirty_rows[i].xmin, dirty_rows[i + 1].xmin) & ~1, |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
353 xmax = FFMAX(dirty_rows[i].xmax, dirty_rows[i + 1].xmax); |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
354 if (xmin >= xmax) |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
355 continue; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
356 for (j = xmin & ~31; j < xmin; j += 2) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
357 size_t p = i * outw / 4 + j / 2; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
358 dst_a[p] = 0xFF; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
359 dst_u[p] = dst_v[p] = 0; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
360 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
361 for (j = xmax; j < FFALIGN(xmax, 32); j += 2) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
362 size_t p = i * outw / 4 + j / 2; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
363 dst_a[p] = 0xFF; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
364 dst_u[p] = dst_v[p] = 0; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
365 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
366 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
367 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
368 #endif // HAVE_SSE4 |
35244 | 369 } |
370 | |
371 static void render_frame_yuv420p(vf_instance_t *vf) | |
372 { | |
373 uint8_t **planes = vf->priv->planes; | |
374 uint8_t **dest = vf->dmpi->planes; | |
375 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
376 uint8_t *alpha; | |
35268 | 377 uint8_t *src_y = planes[0], |
378 *src_u = planes[1], | |
379 *src_v = planes[2]; | |
380 uint8_t *dst_y = dest[0], | |
381 *dst_u = dest[1], | |
382 *dst_v = dest[2]; | |
35244 | 383 int stride; |
384 int outw = vf->priv->outw, | |
385 outh = vf->priv->outh; | |
386 int i, j; | |
387 | |
388 // y | |
389 alpha = vf->priv->alphas[0]; | |
390 stride = vf->dmpi->stride[0]; | |
391 for (i = 0; i < outh; i++) { | |
392 int xmin = dirty_rows[i].xmin, | |
393 xmax = dirty_rows[i].xmax; | |
394 for (j = xmin; j < xmax; j++) { | |
395 size_t s = i * outw + j, | |
396 d = i * stride + j; | |
397 if (alpha[s] != 0xFF) | |
398 dst_y[d] = ((MAP_16BIT(alpha[s]) * dst_y[d]) >> 8) + src_y[s]; | |
399 } | |
400 } | |
401 | |
402 // u & v | |
403 alpha = vf->priv->alphas[1]; | |
404 stride = vf->dmpi->stride[1]; | |
405 for (i = 0; i < outh / 2; i++) { | |
406 int xmin = FFMIN(dirty_rows[i * 2].xmin, dirty_rows[i * 2 + 1].xmin), | |
407 xmax = FFMAX(dirty_rows[i * 2].xmax, dirty_rows[i * 2 + 1].xmax); | |
408 for (j = xmin / 2; j < (xmax + 1) / 2; j++) { | |
409 size_t s = i * outw / 2 + j, | |
410 d = i * stride + j; | |
411 if (alpha[s] != 0xFF) { | |
412 uint_fast16_t a = MAP_16BIT(alpha[s]); | |
413 dst_u[d] = ((a * dst_u[d]) >> 8) + src_u[s]; | |
414 dst_v[d] = ((a * dst_v[d]) >> 8) + src_v[s]; | |
415 } | |
416 } | |
417 } | |
418 } | |
419 | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
420 #if HAVE_SSE4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
421 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
422 #define CHECK_16_ALPHA \ |
35602 | 423 "cmpl $-1, 0(%[alpha], %[j], 1) \n\t" \ |
424 "jne 2f \n\t" \ | |
425 "cmpl $-1, 4(%[alpha], %[j], 1) \n\t" \ | |
426 "jne 2f \n\t" \ | |
427 "cmpl $-1, 8(%[alpha], %[j], 1) \n\t" \ | |
428 "jne 2f \n\t" \ | |
429 "cmpl $-1, 12(%[alpha], %[j], 1) \n\t" \ | |
430 "jne 2f \n\t" \ | |
431 "jmp 3f \n\t" | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
432 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
433 #define MAP_16_ALPHA \ |
35602 | 434 "movq 0(%[alpha], %[j], 1), %%xmm0 \n\t" \ |
435 "movq 8(%[alpha], %[j], 1), %%xmm2 \n\t" \ | |
436 "punpcklbw %%xmm7, %%xmm0 \n\t" \ | |
437 "punpcklbw %%xmm7, %%xmm2 \n\t" \ | |
438 "movdqa %%xmm0, %%xmm1 \n\t" \ | |
439 "movdqa %%xmm2, %%xmm3 \n\t" \ | |
440 "punpcklwd %%xmm7, %%xmm0 \n\t" \ | |
441 "punpckhwd %%xmm7, %%xmm1 \n\t" \ | |
442 "punpcklwd %%xmm7, %%xmm2 \n\t" \ | |
443 "punpckhwd %%xmm7, %%xmm3 \n\t" \ | |
444 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm0 \n\t"\ | |
445 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm1 \n\t"\ | |
446 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm2 \n\t"\ | |
447 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm3 \n\t"\ | |
448 "paddd "MANGLE(sse_int32_80h)", %%xmm0 \n\t" \ | |
449 "paddd "MANGLE(sse_int32_80h)", %%xmm1 \n\t" \ | |
450 "paddd "MANGLE(sse_int32_80h)", %%xmm2 \n\t" \ | |
451 "paddd "MANGLE(sse_int32_80h)", %%xmm3 \n\t" \ | |
452 "psrld $8, %%xmm0 \n\t" \ | |
453 "psrld $8, %%xmm1 \n\t" \ | |
454 "psrld $8, %%xmm2 \n\t" \ | |
455 "psrld $8, %%xmm3 \n\t" \ | |
456 "packssdw %%xmm1, %%xmm0 \n\t" \ | |
457 "packssdw %%xmm3, %%xmm2 \n\t" | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
458 |
35584 | 459 #define DO_RENDER \ |
35602 | 460 "movq 0(%%"REG_D", %[j], 1), %%xmm1 \n\t" \ |
461 "movq 8(%%"REG_D", %[j], 1), %%xmm3 \n\t" \ | |
462 "punpcklbw %%xmm7, %%xmm1 \n\t" \ | |
463 "punpcklbw %%xmm7, %%xmm3 \n\t" \ | |
464 "pmullw %%xmm0, %%xmm1 \n\t" \ | |
465 "pmullw %%xmm2, %%xmm3 \n\t" \ | |
466 "psrlw $8, %%xmm1 \n\t" \ | |
467 "psrlw $8, %%xmm3 \n\t" \ | |
468 "packuswb %%xmm3, %%xmm1 \n\t" \ | |
469 "movdqa (%%"REG_S", %[j], 1), %%xmm4 \n\t" \ | |
470 "paddb %%xmm4, %%xmm1 \n\t" \ | |
471 "movdqu %%xmm1, (%%"REG_D", %[j], 1) \n\t" | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
472 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
473 static void render_frame_yuv420p_sse4(vf_instance_t *vf) |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
474 { |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
475 struct dirty_rows_extent *dr = vf->priv->dirty_rows; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
476 uint8_t *alpha; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
477 uint8_t *src_y = vf->priv->planes[0], |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
478 *src_u = vf->priv->planes[1], |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
479 *src_v = vf->priv->planes[2]; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
480 uint8_t *dst_y = vf->dmpi->planes[0], |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
481 *dst_u = vf->dmpi->planes[1], |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
482 *dst_v = vf->dmpi->planes[2]; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
483 int stride; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
484 int outw = vf->priv->outw, |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
485 outh = vf->priv->outh; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
486 int i; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
487 |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
488 // y |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
489 alpha = vf->priv->alphas[0]; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
490 stride = vf->dmpi->stride[0]; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
491 for (i = 0; i < outh; i++) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
492 size_t xmin = dr[i].xmin & ~15, |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
493 xmax = dr[i].xmax; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
494 __asm__ volatile ( |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
495 "pxor %%xmm7, %%xmm7 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
496 "jmp 4f \n\t" |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
497 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
498 "1: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
499 CHECK_16_ALPHA |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
500 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
501 "2: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
502 MAP_16_ALPHA |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
503 DO_RENDER |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
504 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
505 "3: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
506 "add $16, %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
507 "4: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
508 "cmp %[xmax], %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
509 "jl 1b \n\t" |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
510 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
511 : : [j] "r" (xmin), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
512 [xmax] "g" (xmax), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
513 [alpha] "r" (alpha + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
514 [src] "S" (src_y + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
515 [dst] "D" (dst_y + i * stride) |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
516 ); |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
517 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
518 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
519 // u & v |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
520 alpha = vf->priv->alphas[1]; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
521 stride = vf->dmpi->stride[1]; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
522 for (i = 0; i < outh / 2; i++) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
523 size_t xmin = FFMIN(dr[i * 2].xmin, dr[i * 2 + 1].xmin) & ~31, |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
524 xmax = FFMAX(dr[i * 2].xmax, dr[i * 2 + 1].xmax); |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
525 __asm__ volatile ( |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
526 "pxor %%xmm7, %%xmm7 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
527 "jmp 4f \n\t" |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
528 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
529 "1: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
530 CHECK_16_ALPHA |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
531 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
532 "2: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
533 MAP_16_ALPHA |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
534 "mov %[src_u], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
535 "mov %[dst_u], %%"REG_D" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
536 DO_RENDER |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
537 "mov %[src_v], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
538 "mov %[dst_v], %%"REG_D" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
539 DO_RENDER |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
540 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
541 "3: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
542 "add $16, %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
543 "4: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
544 "cmp %[xmax], %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
545 "jl 1b \n\t" |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
546 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
547 : : [j] "r" (xmin / 2), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
548 [xmax] "g" ((xmax + 1) / 2), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
549 [alpha] "r" (alpha + i * outw / 2), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
550 [src_u] "g" (src_u + i * outw / 2), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
551 [src_v] "g" (src_v + i * outw / 2), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
552 [dst_u] "g" (dst_u + i * stride), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
553 [dst_v] "g" (dst_v + i * stride) |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
554 : REG_S, REG_D |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
555 ); |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
556 } |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
557 } |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
558 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
559 #undef CHECK_16_ALPHA |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
560 #undef MAP_16_ALPHA |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
561 #undef MUL_ALPHA |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
562 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
563 #endif // HAVE_SSE4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
564 |
35244 | 565 static void clean_buffer(vf_instance_t *vf) |
566 { | |
567 int outw = vf->priv->outw, | |
568 outh = vf->priv->outh; | |
569 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
570 uint8_t **planes = vf->priv->planes; | |
571 uint8_t *alpha = vf->priv->alphas[0]; | |
572 int i, j; | |
573 | |
574 for (i = 0; i < MP_MAX_PLANES; i++) { | |
575 uint8_t *plane = planes[i]; | |
576 if (!plane) | |
577 break; | |
578 for (j = 0; j < outh; j++) { | |
579 int xmin = dirty_rows[j].xmin; | |
580 int width = dirty_rows[j].xmax - xmin; | |
581 if (width > 0) | |
582 memset(plane + j * outw + xmin, 0, width); | |
583 } | |
584 } | |
585 for (i = 0; i < outh; i++) { | |
586 int xmin = dirty_rows[i].xmin; | |
587 int width = dirty_rows[i].xmax - xmin; | |
588 if (width > 0) | |
589 memset(alpha + i * outw + xmin, -1, width); | |
590 } | |
591 for (i = 0; i < outh; i++) { | |
592 dirty_rows[i].xmin = outw; | |
593 dirty_rows[i].xmax = 0; | |
594 } | |
595 } | |
18937 | 596 |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
597 static int config(struct vf_instance *vf, |
32096 | 598 int width, int height, int d_width, int d_height, |
599 unsigned int flags, unsigned int outfmt) | |
18937 | 600 { |
32391
b4c3659d16b1
Use a dynamic list for the sources of EOSD elements.
cigaes
parents:
32261
diff
changeset
|
601 struct mp_eosd_settings res = {0}; |
35244 | 602 struct dirty_rows_extent *dirty_rows; |
603 int outw, outh; | |
604 int planes, alphas; | |
605 int i; | |
31927 | 606 |
35574 | 607 vf->priv->outfmt = outfmt; |
608 vf->priv->outh = outh = height + ass_top_margin + ass_bottom_margin; | |
609 vf->priv->outw = outw = width; | |
610 | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
611 switch (outfmt) { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
612 case IMGFMT_YV12: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
613 case IMGFMT_I420: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
614 case IMGFMT_IYUV: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
615 vf->priv->is_planar = 1; |
35244 | 616 planes = 3; |
617 alphas = 2; | |
618 vf->priv->draw_image = draw_image_yuv; | |
619 vf->priv->render_frame = render_frame_yuv420p; | |
620 vf->priv->prepare_buffer = prepare_buffer_420p; | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
621 #if HAVE_SSE4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
622 if (gCpuCaps.hasSSE4 && outw % 32 == 0) |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
623 vf->priv->render_frame = render_frame_yuv420p_sse4; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
624 #endif |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
625 break; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
626 case IMGFMT_UYVY: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
627 case IMGFMT_YUY2: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
628 vf->priv->is_planar = 0; |
35244 | 629 planes = 3; |
630 alphas = 1; | |
631 vf->priv->draw_image = draw_image_yuv; | |
632 vf->priv->render_frame = render_frame_yuv422; | |
633 vf->priv->prepare_buffer = prepare_buffer_422; | |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
634 #if HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
635 if (gCpuCaps.hasSSE4 && outw % 8 == 0) |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
636 vf->priv->render_frame = render_frame_yuv422_sse4; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
637 #endif |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
638 break; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
639 default: |
32096 | 640 return 0; |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
641 } |
18937 | 642 |
32096 | 643 if (!opt_screen_size_x && !opt_screen_size_y) { |
644 d_width = d_width * vf->priv->outw / width; | |
645 d_height = d_height * vf->priv->outh / height; | |
646 } | |
18937 | 647 |
35244 | 648 for (i = 0; i < planes; i++) |
649 vf->priv->planes[i] = av_malloc(outw * outh); | |
650 for (i = 0; i < alphas; i++) | |
651 vf->priv->alphas[i] = av_malloc(outw * outh); | |
652 dirty_rows = av_malloc(outh * sizeof(*dirty_rows)); | |
653 // mark all rows dirty here | |
654 // so that they can be properly cleaned in clear_buffer() | |
655 for (i = 0; i < outh; i++) { | |
656 dirty_rows[i].xmin = 0; | |
657 dirty_rows[i].xmax = outw; | |
658 } | |
659 vf->priv->dirty_rows = dirty_rows; | |
660 clean_buffer(vf); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
26727
diff
changeset
|
661 |
32096 | 662 res.w = vf->priv->outw; |
663 res.h = vf->priv->outh; | |
664 res.srcw = width; | |
665 res.srch = height; | |
666 res.mt = ass_top_margin; | |
667 res.mb = ass_bottom_margin; | |
32391
b4c3659d16b1
Use a dynamic list for the sources of EOSD elements.
cigaes
parents:
32261
diff
changeset
|
668 eosd_configure(&res); |
18937 | 669 |
32096 | 670 return vf_next_config(vf, vf->priv->outw, vf->priv->outh, d_width, |
671 d_height, flags, outfmt); | |
18937 | 672 } |
673 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
674 static void get_image(struct vf_instance *vf, mp_image_t *mpi) |
18937 | 675 { |
32096 | 676 if (mpi->type == MP_IMGTYPE_IPB) |
677 return; | |
678 if (mpi->flags & MP_IMGFLAG_PRESERVE) | |
679 return; | |
680 if (mpi->imgfmt != vf->priv->outfmt) | |
681 return; // colorspace differ | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
26727
diff
changeset
|
682 |
32096 | 683 // width never changes, always try full DR |
684 mpi->priv = vf->dmpi = vf_get_image(vf->next, mpi->imgfmt, mpi->type, | |
685 mpi->flags | MP_IMGFLAG_READABLE, | |
34882
649d4cad4619
Request a sufficiently large image for direct rendering.
reimar
parents:
34863
diff
changeset
|
686 FFMAX(mpi->width, vf->priv->outw), |
649d4cad4619
Request a sufficiently large image for direct rendering.
reimar
parents:
34863
diff
changeset
|
687 FFMAX(mpi->height, vf->priv->outh)); |
18937 | 688 |
32096 | 689 if ( (vf->dmpi->flags & MP_IMGFLAG_DRAW_CALLBACK) && |
690 !(vf->dmpi->flags & MP_IMGFLAG_DIRECT)) { | |
691 mp_msg(MSGT_ASS, MSGL_INFO, MSGTR_MPCODECS_FullDRNotPossible); | |
692 return; | |
693 } | |
694 // set up mpi as a cropped-down image of dmpi: | |
695 if (mpi->flags & MP_IMGFLAG_PLANAR) { | |
696 mpi->planes[0] = vf->dmpi->planes[0] + ass_top_margin * vf->dmpi->stride[0]; | |
697 mpi->planes[1] = vf->dmpi->planes[1] + (ass_top_margin >> mpi->chroma_y_shift) * vf->dmpi->stride[1]; | |
698 mpi->planes[2] = vf->dmpi->planes[2] + (ass_top_margin >> mpi->chroma_y_shift) * vf->dmpi->stride[2]; | |
699 mpi->stride[1] = vf->dmpi->stride[1]; | |
700 mpi->stride[2] = vf->dmpi->stride[2]; | |
701 } else { | |
702 mpi->planes[0] = vf->dmpi->planes[0] + ass_top_margin * vf->dmpi->stride[0]; | |
703 } | |
704 mpi->stride[0] = vf->dmpi->stride[0]; | |
705 mpi->width = vf->dmpi->width; | |
706 mpi->flags |= MP_IMGFLAG_DIRECT; | |
707 mpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK; | |
708 // vf->dmpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK; | |
18937 | 709 } |
710 | |
711 static void blank(mp_image_t *mpi, int y1, int y2) | |
712 { | |
32096 | 713 int color[3] = { 16, 128, 128 }; // black (YUV) |
714 int y; | |
715 unsigned char *dst; | |
716 int chroma_rows = (y2 - y1) >> mpi->chroma_y_shift; | |
18937 | 717 |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
718 if (mpi->flags & MP_IMGFLAG_PLANAR) { |
35436 | 719 dst = mpi->planes[0] + y1 * mpi->stride[0]; |
720 for (y = 0; y < y2 - y1; ++y) { | |
721 memset(dst, color[0], mpi->w); | |
722 dst += mpi->stride[0]; | |
723 } | |
724 dst = mpi->planes[1] + (y1 >> mpi->chroma_y_shift) * mpi->stride[1]; | |
725 for (y = 0; y < chroma_rows; ++y) { | |
726 memset(dst, color[1], mpi->chroma_width); | |
727 dst += mpi->stride[1]; | |
728 } | |
729 dst = mpi->planes[2] + (y1 >> mpi->chroma_y_shift) * mpi->stride[2]; | |
730 for (y = 0; y < chroma_rows; ++y) { | |
731 memset(dst, color[2], mpi->chroma_width); | |
732 dst += mpi->stride[2]; | |
733 } | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
734 } else { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
735 unsigned char packed_color[4]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
736 int x; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
737 if (mpi->imgfmt == IMGFMT_UYVY) { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
738 packed_color[0] = color[1]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
739 packed_color[1] = color[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
740 packed_color[2] = color[2]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
741 packed_color[3] = color[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
742 } else { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
743 packed_color[0] = color[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
744 packed_color[1] = color[1]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
745 packed_color[2] = color[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
746 packed_color[3] = color[2]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
747 } |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
748 dst = mpi->planes[0] + y1 * mpi->stride[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
749 for (y = y1; y < y2; ++y) { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
750 for (x = 0; x < mpi->w / 2; ++x) |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
751 AV_COPY32(dst + 4 * x, packed_color); |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
752 dst += mpi->stride[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
753 } |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
754 } |
18937 | 755 } |
756 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
757 static int prepare_image(struct vf_instance *vf, mp_image_t *mpi) |
18937 | 758 { |
32096 | 759 if (mpi->flags & MP_IMGFLAG_DIRECT || |
760 mpi->flags & MP_IMGFLAG_DRAW_CALLBACK) { | |
761 vf->dmpi = mpi->priv; | |
762 if (!vf->dmpi) { | |
763 mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_MPCODECS_FunWhydowegetNULL); | |
764 return 0; | |
765 } | |
766 mpi->priv = NULL; | |
767 // we've used DR, so we're ready... | |
768 if (ass_top_margin) | |
769 blank(vf->dmpi, 0, ass_top_margin); | |
770 if (ass_bottom_margin) | |
771 blank(vf->dmpi, vf->priv->outh - ass_bottom_margin, vf->priv->outh); | |
772 if (!(mpi->flags & MP_IMGFLAG_PLANAR)) | |
773 vf->dmpi->planes[1] = mpi->planes[1]; // passthrough rgb8 palette | |
774 return 0; | |
775 } | |
776 // hope we'll get DR buffer: | |
777 vf->dmpi = vf_get_image(vf->next, vf->priv->outfmt, MP_IMGTYPE_TEMP, | |
778 MP_IMGFLAG_ACCEPT_STRIDE | MP_IMGFLAG_READABLE, | |
779 vf->priv->outw, vf->priv->outh); | |
18937 | 780 |
32096 | 781 // copy mpi->dmpi... |
782 if (mpi->flags & MP_IMGFLAG_PLANAR) { | |
783 memcpy_pic(vf->dmpi->planes[0] + ass_top_margin * vf->dmpi->stride[0], | |
35436 | 784 mpi->planes[0], |
785 mpi->w, | |
786 mpi->h, | |
787 vf->dmpi->stride[0], | |
788 mpi->stride[0]); | |
32096 | 789 memcpy_pic(vf->dmpi->planes[1] + (ass_top_margin >> mpi->chroma_y_shift) * vf->dmpi->stride[1], |
35436 | 790 mpi->planes[1], |
791 mpi->w >> mpi->chroma_x_shift, | |
32096 | 792 mpi->h >> mpi->chroma_y_shift, |
35436 | 793 vf->dmpi->stride[1], |
32096 | 794 mpi->stride[1]); |
795 memcpy_pic(vf->dmpi->planes[2] + (ass_top_margin >> mpi->chroma_y_shift) * vf->dmpi->stride[2], | |
35436 | 796 mpi->planes[2], |
32096 | 797 mpi->w >> mpi->chroma_x_shift, |
798 mpi->h >> mpi->chroma_y_shift, | |
35436 | 799 vf->dmpi->stride[2], |
32096 | 800 mpi->stride[2]); |
801 } else { | |
802 memcpy_pic(vf->dmpi->planes[0] + ass_top_margin * vf->dmpi->stride[0], | |
35436 | 803 mpi->planes[0], |
32096 | 804 mpi->w * (vf->dmpi->bpp / 8), |
35436 | 805 mpi->h, |
32096 | 806 vf->dmpi->stride[0], |
35436 | 807 mpi->stride[0]); |
32096 | 808 vf->dmpi->planes[1] = mpi->planes[1]; // passthrough rgb8 palette |
809 } | |
810 if (ass_top_margin) | |
811 blank(vf->dmpi, 0, ass_top_margin); | |
812 if (ass_bottom_margin) | |
813 blank(vf->dmpi, vf->priv->outh - ass_bottom_margin, vf->priv->outh); | |
814 return 0; | |
18937 | 815 } |
816 | |
35244 | 817 static void prepare_eosd(vf_instance_t *vf, struct mp_eosd_image_list *imgs) |
18937 | 818 { |
35244 | 819 struct mp_eosd_image *img = eosd_image_first(imgs); |
820 void (*draw_image)(vf_instance_t *, struct mp_eosd_image *); | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
821 |
35244 | 822 clean_buffer(vf); |
823 draw_image = vf->priv->draw_image; | |
824 for (; img; img = eosd_image_next(imgs)) | |
825 draw_image(vf, img); | |
826 vf->priv->prepare_buffer(vf); | |
18937 | 827 } |
828 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
829 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts) |
18937 | 830 { |
32391
b4c3659d16b1
Use a dynamic list for the sources of EOSD elements.
cigaes
parents:
32261
diff
changeset
|
831 struct mp_eosd_image_list images; |
b4c3659d16b1
Use a dynamic list for the sources of EOSD elements.
cigaes
parents:
32261
diff
changeset
|
832 eosd_render_frame(pts, &images); |
32096 | 833 prepare_image(vf, mpi); |
35244 | 834 if (images.changed) |
835 prepare_eosd(vf, &images); | |
836 vf->priv->render_frame(vf); | |
32096 | 837 return vf_next_put_image(vf, vf->dmpi, pts); |
18937 | 838 } |
839 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
840 static int query_format(struct vf_instance *vf, unsigned int fmt) |
18937 | 841 { |
32096 | 842 switch (fmt) { |
843 case IMGFMT_YV12: | |
844 case IMGFMT_I420: | |
845 case IMGFMT_IYUV: | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
846 case IMGFMT_UYVY: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
847 case IMGFMT_YUY2: |
35033 | 848 return vf_next_query_format(vf, fmt) | VFCAP_EOSD; |
32096 | 849 } |
850 return 0; | |
18937 | 851 } |
852 | |
853 static int control(vf_instance_t *vf, int request, void *data) | |
854 { | |
32096 | 855 switch (request) { |
856 case VFCTRL_INIT_EOSD: | |
857 return CONTROL_TRUE; | |
858 case VFCTRL_DRAW_EOSD: | |
859 return CONTROL_TRUE; | |
860 } | |
861 return vf_next_control(vf, request, data); | |
18937 | 862 } |
863 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
864 static void uninit(struct vf_instance *vf) |
18937 | 865 { |
35244 | 866 int i; |
867 for (i = 0; i < MP_MAX_PLANES; i++) | |
868 av_free(vf->priv->planes[i]); | |
869 for (i = 0; i < MP_MAX_PLANES; i++) | |
870 av_free(vf->priv->alphas[i]); | |
871 av_free(vf->priv->dirty_rows); | |
18937 | 872 } |
873 | |
32096 | 874 static const unsigned int fmt_list[] = { |
875 IMGFMT_YV12, | |
876 IMGFMT_I420, | |
877 IMGFMT_IYUV, | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
878 IMGFMT_UYVY, |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
879 IMGFMT_YUY2, |
32096 | 880 0 |
18937 | 881 }; |
882 | |
30638
a7b908875c14
Rename open() vf initialization function to vf_open().
diego
parents:
30633
diff
changeset
|
883 static int vf_open(vf_instance_t *vf, char *args) |
18937 | 884 { |
32096 | 885 int flags; |
35033 | 886 unsigned outfmt = vf_match_csp(&vf->next, fmt_list, IMGFMT_YV12); |
887 if (outfmt) | |
888 flags = vf_next_query_format(vf, outfmt); | |
889 if (!outfmt || (vf->priv->auto_insert && flags & VFCAP_EOSD)) { | |
32096 | 890 uninit(vf); |
891 return 0; | |
892 } | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
26727
diff
changeset
|
893 |
32096 | 894 if (vf->priv->auto_insert) |
895 mp_msg(MSGT_ASS, MSGL_INFO, "[ass] auto-open\n"); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
26727
diff
changeset
|
896 |
32096 | 897 vf->config = config; |
898 vf->query_format = query_format; | |
899 vf->uninit = uninit; | |
900 vf->control = control; | |
901 vf->get_image = get_image; | |
902 vf->put_image = put_image; | |
903 vf->default_caps = VFCAP_EOSD; | |
904 return 1; | |
18937 | 905 } |
906 | |
907 #define ST_OFF(f) M_ST_OFF(struct vf_priv_s,f) | |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
908 static const m_option_t vf_opts_fields[] = { |
32096 | 909 {"auto", ST_OFF(auto_insert), CONF_TYPE_FLAG, 0, 0, 1, NULL}, |
910 {NULL, NULL, 0, 0, 0, 0, NULL} | |
18937 | 911 }; |
912 | |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
913 static const m_struct_t vf_opts = { |
32096 | 914 "ass", |
915 sizeof(struct vf_priv_s), | |
916 &vf_priv_dflt, | |
917 vf_opts_fields | |
18937 | 918 }; |
919 | |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
920 const vf_info_t vf_info_ass = { |
32096 | 921 "Render ASS/SSA subtitles", |
922 "ass", | |
35244 | 923 "Evgeniy Stepanov, Xidorn Quan", |
32096 | 924 "", |
925 vf_open, | |
926 &vf_opts | |
18937 | 927 }; |