Mercurial > mplayer.hg
annotate libmpcodecs/vf_ass.c @ 37064:85499766333b
Add decoding support for On2 AVC audio codec.
author | cehoyos |
---|---|
date | Thu, 24 Apr 2014 06:42:02 +0000 |
parents | b4ce15212bfc |
children |
rev | line source |
---|---|
20008
fa122b7c71c6
Add copyright notice and vim/emacs comments to libass and vf_ass.c.
eugeni
parents:
19563
diff
changeset
|
1 /* |
26727 | 2 * Copyright (C) 2006 Evgeniy Stepanov <eugeni.stepanov@gmail.com> |
35244 | 3 * Copyright (C) 2012 Xidorn Quan <quanxunzhen@gmail.com> |
26727 | 4 * |
5 * This file is part of MPlayer. | |
6 * | |
7 * MPlayer is free software; you can redistribute it and/or modify | |
8 * it under the terms of the GNU General Public License as published by | |
9 * the Free Software Foundation; either version 2 of the License, or | |
10 * (at your option) any later version. | |
11 * | |
12 * MPlayer is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 * GNU General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU General Public License along | |
18 * with MPlayer; if not, write to the Free Software Foundation, Inc., | |
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
20 */ | |
20008
fa122b7c71c6
Add copyright notice and vim/emacs comments to libass and vf_ass.c.
eugeni
parents:
19563
diff
changeset
|
21 |
18937 | 22 #include "config.h" |
23 | |
24 #include <stdio.h> | |
25 #include <stdlib.h> | |
26 #include <string.h> | |
24545
9e5126679d44
Replace stdint.h #include by functionally equivalent inttypes.h.
diego
parents:
23134
diff
changeset
|
27 #include <inttypes.h> |
18937 | 28 #include <assert.h> |
29 | |
30 #include "config.h" | |
31 #include "mp_msg.h" | |
32 #include "help_mp.h" | |
31489 | 33 #include "mpcommon.h" |
18937 | 34 #include "img_format.h" |
35 #include "mp_image.h" | |
30653
3d23e24c5c60
Declare externally used variables from vd.c as extern in vd.h.
diego
parents:
30642
diff
changeset
|
36 #include "vd.h" |
18937 | 37 #include "vf.h" |
38 | |
39 #include "libvo/fastmemcpy.h" | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
40 #include "libavutil/intreadwrite.h" |
32467 | 41 #include "sub/sub.h" |
18937 | 42 #include "m_option.h" |
43 #include "m_struct.h" | |
44 | |
32461 | 45 #include "sub/ass_mp.h" |
32460 | 46 #include "sub/eosd.h" |
18937 | 47 |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
48 #include "cpudetect.h" |
35705
b4ce15212bfc
Replace obsolete x86_cpu.h #includes by the correct header.
diego
parents:
35618
diff
changeset
|
49 #include "libavutil/x86/asm.h" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
50 |
18937 | 51 #define _r(c) ((c)>>24) |
52 #define _g(c) (((c)>>16)&0xFF) | |
53 #define _b(c) (((c)>>8)&0xFF) | |
54 #define _a(c) ((c)&0xFF) | |
32096 | 55 #define rgba2y(c) ( (( 263*_r(c) + 516*_g(c) + 100*_b(c)) >> 10) + 16 ) |
18937 | 56 #define rgba2u(c) ( ((-152*_r(c) - 298*_g(c) + 450*_b(c)) >> 10) + 128 ) |
57 #define rgba2v(c) ( (( 450*_r(c) - 376*_g(c) - 73*_b(c)) >> 10) + 128 ) | |
58 | |
35244 | 59 /* map 0 - 0xFF -> 0 - 0x101 */ |
60 #define MAP_16BIT(v) RSHIFT(0x102 * (v), 8) | |
61 /* map 0 - 0xFF -> 0 - 0x10101 */ | |
62 #define MAP_24BIT(v) RSHIFT(0x10203 * (v), 8) | |
18937 | 63 |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
64 #if HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
65 |
35582 | 66 DECLARE_ASM_CONST(16, uint32_t, sse_int32_80h[4]) |
67 = { 0x80, 0x80, 0x80, 0x80 }; | |
68 DECLARE_ASM_CONST(16, uint32_t, sse_int32_map_factor[4]) | |
69 = { 0x102, 0x102, 0x102, 0x102 }; | |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
70 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
71 #endif // HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
72 |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
73 static const struct vf_priv_s { |
32096 | 74 int outh, outw; |
18937 | 75 |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
76 int is_planar; |
32096 | 77 unsigned int outfmt; |
18937 | 78 |
32096 | 79 // 1 = auto-added filter: insert only if chain does not support EOSD already |
80 // 0 = insert always | |
81 int auto_insert; | |
18937 | 82 |
35244 | 83 // planar data to be directly rendered on frames |
84 uint8_t *planes[MP_MAX_PLANES]; | |
85 // alpha here is actually transparency, not opacity | |
86 uint8_t *alphas[MP_MAX_PLANES]; | |
87 struct dirty_rows_extent { | |
88 int xmin, xmax; | |
89 } *dirty_rows; | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
90 |
35244 | 91 // called for every eosd image when subtitle is changed |
92 void (*draw_image)(vf_instance_t *, struct mp_eosd_image *); | |
93 // called for every time subtitle is changed | |
94 void (*prepare_buffer)(vf_instance_t *); | |
95 // called for every frame | |
96 void (*render_frame)(vf_instance_t *); | |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
97 } vf_priv_dflt; |
18937 | 98 |
35244 | 99 static void draw_image_yuv(vf_instance_t *vf, struct mp_eosd_image *img) |
100 { | |
101 uint32_t color = img->color; | |
102 uint32_t opacity = 0xFF - _a(color); | |
103 uint8_t y = rgba2y(color), | |
104 u = rgba2u(color), | |
105 v = rgba2v(color); | |
35268 | 106 int outw = vf->priv->outw; |
35244 | 107 uint8_t *alpha = vf->priv->alphas[0], |
108 *dst_y = vf->priv->planes[0], | |
109 *dst_u = vf->priv->planes[1], | |
110 *dst_v = vf->priv->planes[2]; | |
111 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
112 int src_x = img->dst_x, src_w = img->w, | |
113 src_y = img->dst_y, src_h = img->h, | |
114 stride = img->stride; | |
115 uint8_t *src = img->bitmap; | |
116 int i, j; | |
117 | |
118 opacity = MAP_24BIT(opacity); | |
119 for (i = 0; i < src_h; i++) { | |
120 struct dirty_rows_extent *dirty_row = &dirty_rows[src_y + i]; | |
121 dirty_row->xmin = FFMIN(dirty_row->xmin, src_x); | |
122 dirty_row->xmax = FFMAX(dirty_row->xmax, src_x + src_w); | |
123 | |
124 for (j = 0; j < src_w; j++) { | |
125 uint32_t k = src[i * stride + j]; | |
126 if (k) { | |
127 size_t p = (src_y + i) * outw + src_x + j; | |
128 k *= opacity; | |
129 alpha[p] = RSHIFT((0xFFFFFF - k) * alpha[p], 24); | |
130 dst_y[p] = RSHIFT((0xFFFFFF - k) * dst_y[p] + k * y, 24); | |
131 dst_u[p] = RSHIFT((0xFFFFFF - k) * dst_u[p] + k * u, 24); | |
132 dst_v[p] = RSHIFT((0xFFFFFF - k) * dst_v[p] + k * v, 24); | |
133 } | |
134 } | |
135 } | |
136 } | |
137 | |
138 static void prepare_buffer_422(vf_instance_t *vf) | |
139 { | |
140 uint8_t *dst_u = vf->priv->planes[1], | |
141 *dst_v = vf->priv->planes[2]; | |
142 int outw = vf->priv->outw, | |
143 outh = vf->priv->outh; | |
144 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
145 int i, j; | |
146 | |
147 for (i = 0; i < outh; i++) { | |
148 int xmin = dirty_rows[i].xmin & ~1, | |
149 xmax = dirty_rows[i].xmax; | |
150 for (j = xmin; j < xmax; j += 2) { | |
151 size_t p = i * outw + j; | |
152 dst_u[p] = (dst_u[p] + dst_u[p + 1]) / 2; | |
153 dst_v[p] = (dst_v[p] + dst_v[p + 1]) / 2; | |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
154 dst_u[p + 1] = dst_v[p + 1] = 0; |
35244 | 155 } |
156 } | |
157 } | |
158 | |
159 static void render_frame_yuv422(vf_instance_t *vf) | |
160 { | |
161 uint8_t *alpha = vf->priv->alphas[0]; | |
162 uint8_t *src_y = vf->priv->planes[0], | |
163 *src_u = vf->priv->planes[1], | |
164 *src_v = vf->priv->planes[2]; | |
165 int outw = vf->priv->outw, | |
166 outh = vf->priv->outh; | |
167 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
168 uint8_t *dest = vf->dmpi->planes[0]; | |
169 int stride = vf->dmpi->stride[0]; | |
170 int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY; | |
171 int i, j; | |
172 | |
173 for (i = 0; i < outh; i++) { | |
174 int xmin = dirty_rows[i].xmin & ~1, | |
175 xmax = dirty_rows[i].xmax; | |
176 for (j = xmin; j < xmax; j += 2) { | |
177 size_t src = i * outw + j, | |
178 dst = i * stride + j * 2; | |
179 uint_fast16_t a0 = alpha[src], | |
180 a1 = alpha[src + 1]; | |
181 uint8_t y0, y1, u, v; | |
182 | |
183 if (a0 == 0xFF && a1 == 0xFF) | |
184 continue; | |
185 | |
186 y0 = dest[dst + is_uyvy + 0]; | |
187 y1 = dest[dst + is_uyvy + 2]; | |
188 u = dest[dst - is_uyvy + 1]; | |
189 v = dest[dst - is_uyvy + 3]; | |
190 | |
191 a0 = MAP_16BIT(a0); | |
192 a1 = MAP_16BIT(a1); | |
193 y0 = ((a0 * y0) >> 8) + src_y[src]; | |
194 y1 = ((a1 * y1) >> 8) + src_y[src + 1]; | |
195 | |
196 a0 = (a0 + a1) / 2; | |
197 u = ((a0 * u) >> 8) + src_u[src]; | |
198 v = ((a0 * v) >> 8) + src_v[src]; | |
199 | |
200 dest[dst + is_uyvy + 0] = y0; | |
201 dest[dst + is_uyvy + 2] = y1; | |
202 dest[dst - is_uyvy + 1] = u; | |
203 dest[dst - is_uyvy + 3] = v; | |
204 } | |
205 } | |
206 } | |
207 | |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
208 #if HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
209 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
210 static void render_frame_yuv422_sse4(vf_instance_t *vf) |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
211 { |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
212 uint8_t *alpha = vf->priv->alphas[0]; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
213 uint8_t *src_y = vf->priv->planes[0], |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
214 *src_u = vf->priv->planes[1], |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
215 *src_v = vf->priv->planes[2]; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
216 int outw = vf->priv->outw, |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
217 outh = vf->priv->outh; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
218 struct dirty_rows_extent *dr = vf->priv->dirty_rows; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
219 uint8_t *dst = vf->dmpi->planes[0]; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
220 int stride = vf->dmpi->stride[0]; |
35579
6b169870ae30
Reduce register usage to fix the compilation in x86.
upsuper
parents:
35576
diff
changeset
|
221 int32_t is_uyvy = vf->priv->outfmt == IMGFMT_UYVY; |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
222 int i; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
223 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
224 for (i = 0; i < outh; i++) { |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
225 size_t xmin = dr[i].xmin & ~7, |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
226 xmax = dr[i].xmax; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
227 __asm__ volatile ( |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
228 "pxor %%xmm7, %%xmm7 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
229 "jmp 4f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
230 "1: \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
231 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
232 "cmpl $-1, 0(%[alpha], %[j], 1) \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
233 "jne 2f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
234 "cmpl $-1, 4(%[alpha], %[j], 1) \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
235 "jne 2f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
236 "jmp 3f \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
237 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
238 "2: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
239 "movq (%[alpha], %[j], 1), %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
240 "punpcklbw %%xmm7, %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
241 "movdqa %%xmm0, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
242 "punpcklwd %%xmm7, %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
243 "punpckhwd %%xmm7, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
244 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
245 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
246 "paddd "MANGLE(sse_int32_80h)", %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
247 "paddd "MANGLE(sse_int32_80h)", %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
248 "psrld $8, %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
249 "psrld $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
250 "movdqa %%xmm0, %%xmm2 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
251 "movdqa %%xmm1, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
252 "packssdw %%xmm1, %%xmm0 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
253 "phaddd %%xmm3, %%xmm2 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
254 "psrld $1, %%xmm2 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
255 "packssdw %%xmm7, %%xmm2 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
256 "punpcklwd %%xmm2, %%xmm2 \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
257 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
258 "movdqu (%[dst], %[j], 2), %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
259 "movdqa %%xmm1, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
260 "cmpl $0, %[f] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
261 "je 11f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
262 "psrlw $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
263 "psllw $8, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
264 "psrlw $8, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
265 "jmp 12f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
266 "11: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
267 "psllw $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
268 "psrlw $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
269 "psrlw $8, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
270 "12: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
271 "pmullw %%xmm0, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
272 "pmullw %%xmm2, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
273 "psrlw $8, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
274 "psrlw $8, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
275 "packuswb %%xmm7, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
276 "packuswb %%xmm7, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
277 "mov %[src_y], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
278 "movq (%%"REG_S", %[j], 1), %%xmm4 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
279 "mov %[src_u], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
280 "movq (%%"REG_S", %[j], 1), %%xmm5 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
281 "mov %[src_v], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
282 "movq (%%"REG_S", %[j], 1), %%xmm6 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
283 "packuswb %%xmm7, %%xmm5 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
284 "packuswb %%xmm7, %%xmm6 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
285 "punpcklbw %%xmm6, %%xmm5 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
286 "cmpl $0, %[f] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
287 "je 21f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
288 "punpcklbw %%xmm1, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
289 "punpcklbw %%xmm4, %%xmm5 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
290 "paddb %%xmm5, %%xmm3 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
291 "movdqu %%xmm3, (%[dst], %[j], 2) \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
292 "jmp 22f \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
293 "21: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
294 "punpcklbw %%xmm3, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
295 "punpcklbw %%xmm5, %%xmm4 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
296 "paddb %%xmm4, %%xmm1 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
297 "movdqu %%xmm1, (%[dst], %[j], 2) \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
298 "22: \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
299 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
300 "3: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
301 "add $8, %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
302 "4: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
303 "cmp %[xmax], %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
304 "jl 1b \n\t" |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
305 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
306 : : [dst] "r" (dst + i * stride), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
307 [alpha] "r" (alpha + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
308 [src_y] "g" (src_y + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
309 [src_u] "g" (src_u + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
310 [src_v] "g" (src_v + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
311 [j] "r" (xmin), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
312 [xmax] "g" (xmax), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
313 [f] "g" (is_uyvy) |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
314 : REG_S |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
315 ); |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
316 } |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
317 } |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
318 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
319 #endif // HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
320 |
35244 | 321 static void prepare_buffer_420p(vf_instance_t *vf) |
322 { | |
323 int outw = vf->priv->outw, | |
324 outh = vf->priv->outh; | |
325 uint8_t *dst_u = vf->priv->planes[1], | |
326 *dst_v = vf->priv->planes[2]; | |
327 uint8_t *src_a = vf->priv->alphas[0], | |
328 *dst_a = vf->priv->alphas[1]; | |
329 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
330 int i, j; | |
331 | |
332 for (i = 0; i < outh; i += 2) { | |
333 int xmin = FFMIN(dirty_rows[i].xmin, dirty_rows[i + 1].xmin) & ~1, | |
334 xmax = FFMAX(dirty_rows[i].xmax, dirty_rows[i + 1].xmax); | |
335 for (j = xmin; j < xmax; j += 2) { | |
35618 | 336 size_t p = i * outw / 2 + j / 2, |
35244 | 337 q1 = i * outw + j, |
338 q2 = q1 + outw; | |
339 dst_a[p] = (src_a[q1] + src_a[q1 + 1] + | |
340 src_a[q2] + src_a[q2 + 1] + 2) / 4; | |
341 dst_u[p] = (dst_u[q1] + dst_u[q1 + 1] + | |
342 dst_u[q2] + dst_u[q2 + 1] + 2) / 4; | |
343 dst_v[p] = (dst_v[q1] + dst_v[q1 + 1] + | |
344 dst_v[q2] + dst_v[q2 + 1] + 2) / 4; | |
345 } | |
346 } | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
347 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
348 #if HAVE_SSE4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
349 // for render_frame_yuv420p_sse4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
350 if (gCpuCaps.hasSSE4 && outw % 32 == 0) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
351 for (i = 0; i < outh; i += 2) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
352 int xmin = FFMIN(dirty_rows[i].xmin, dirty_rows[i + 1].xmin) & ~1, |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
353 xmax = FFMAX(dirty_rows[i].xmax, dirty_rows[i + 1].xmax); |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
354 if (xmin >= xmax) |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
355 continue; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
356 for (j = xmin & ~31; j < xmin; j += 2) { |
35618 | 357 size_t p = i * outw / 2 + j / 2; |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
358 dst_a[p] = 0xFF; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
359 dst_u[p] = dst_v[p] = 0; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
360 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
361 for (j = xmax; j < FFALIGN(xmax, 32); j += 2) { |
35618 | 362 size_t p = i * outw / 2 + j / 2; |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
363 dst_a[p] = 0xFF; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
364 dst_u[p] = dst_v[p] = 0; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
365 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
366 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
367 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
368 #endif // HAVE_SSE4 |
35244 | 369 } |
370 | |
371 static void render_frame_yuv420p(vf_instance_t *vf) | |
372 { | |
373 uint8_t **planes = vf->priv->planes; | |
374 uint8_t **dest = vf->dmpi->planes; | |
375 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
376 uint8_t *alpha; | |
35268 | 377 uint8_t *src_y = planes[0], |
378 *src_u = planes[1], | |
379 *src_v = planes[2]; | |
380 uint8_t *dst_y = dest[0], | |
381 *dst_u = dest[1], | |
382 *dst_v = dest[2]; | |
35244 | 383 int stride; |
384 int outw = vf->priv->outw, | |
385 outh = vf->priv->outh; | |
386 int i, j; | |
387 | |
388 // y | |
389 alpha = vf->priv->alphas[0]; | |
390 stride = vf->dmpi->stride[0]; | |
391 for (i = 0; i < outh; i++) { | |
392 int xmin = dirty_rows[i].xmin, | |
393 xmax = dirty_rows[i].xmax; | |
394 for (j = xmin; j < xmax; j++) { | |
395 size_t s = i * outw + j, | |
396 d = i * stride + j; | |
397 if (alpha[s] != 0xFF) | |
398 dst_y[d] = ((MAP_16BIT(alpha[s]) * dst_y[d]) >> 8) + src_y[s]; | |
399 } | |
400 } | |
401 | |
402 // u & v | |
403 alpha = vf->priv->alphas[1]; | |
404 stride = vf->dmpi->stride[1]; | |
405 for (i = 0; i < outh / 2; i++) { | |
406 int xmin = FFMIN(dirty_rows[i * 2].xmin, dirty_rows[i * 2 + 1].xmin), | |
407 xmax = FFMAX(dirty_rows[i * 2].xmax, dirty_rows[i * 2 + 1].xmax); | |
408 for (j = xmin / 2; j < (xmax + 1) / 2; j++) { | |
35618 | 409 size_t s = i * outw + j, |
35244 | 410 d = i * stride + j; |
411 if (alpha[s] != 0xFF) { | |
412 uint_fast16_t a = MAP_16BIT(alpha[s]); | |
413 dst_u[d] = ((a * dst_u[d]) >> 8) + src_u[s]; | |
414 dst_v[d] = ((a * dst_v[d]) >> 8) + src_v[s]; | |
415 } | |
416 } | |
417 } | |
418 } | |
419 | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
420 #if HAVE_SSE4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
421 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
422 #define CHECK_16_ALPHA \ |
35602 | 423 "cmpl $-1, 0(%[alpha], %[j], 1) \n\t" \ |
424 "jne 2f \n\t" \ | |
425 "cmpl $-1, 4(%[alpha], %[j], 1) \n\t" \ | |
426 "jne 2f \n\t" \ | |
427 "cmpl $-1, 8(%[alpha], %[j], 1) \n\t" \ | |
428 "jne 2f \n\t" \ | |
429 "cmpl $-1, 12(%[alpha], %[j], 1) \n\t" \ | |
430 "jne 2f \n\t" \ | |
431 "jmp 3f \n\t" | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
432 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
433 #define MAP_16_ALPHA \ |
35602 | 434 "movq 0(%[alpha], %[j], 1), %%xmm0 \n\t" \ |
435 "movq 8(%[alpha], %[j], 1), %%xmm2 \n\t" \ | |
436 "punpcklbw %%xmm7, %%xmm0 \n\t" \ | |
437 "punpcklbw %%xmm7, %%xmm2 \n\t" \ | |
438 "movdqa %%xmm0, %%xmm1 \n\t" \ | |
439 "movdqa %%xmm2, %%xmm3 \n\t" \ | |
440 "punpcklwd %%xmm7, %%xmm0 \n\t" \ | |
441 "punpckhwd %%xmm7, %%xmm1 \n\t" \ | |
442 "punpcklwd %%xmm7, %%xmm2 \n\t" \ | |
443 "punpckhwd %%xmm7, %%xmm3 \n\t" \ | |
444 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm0 \n\t"\ | |
445 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm1 \n\t"\ | |
446 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm2 \n\t"\ | |
447 "pmulld "MANGLE(sse_int32_map_factor)", %%xmm3 \n\t"\ | |
448 "paddd "MANGLE(sse_int32_80h)", %%xmm0 \n\t" \ | |
449 "paddd "MANGLE(sse_int32_80h)", %%xmm1 \n\t" \ | |
450 "paddd "MANGLE(sse_int32_80h)", %%xmm2 \n\t" \ | |
451 "paddd "MANGLE(sse_int32_80h)", %%xmm3 \n\t" \ | |
452 "psrld $8, %%xmm0 \n\t" \ | |
453 "psrld $8, %%xmm1 \n\t" \ | |
454 "psrld $8, %%xmm2 \n\t" \ | |
455 "psrld $8, %%xmm3 \n\t" \ | |
456 "packssdw %%xmm1, %%xmm0 \n\t" \ | |
457 "packssdw %%xmm3, %%xmm2 \n\t" | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
458 |
35584 | 459 #define DO_RENDER \ |
35602 | 460 "movq 0(%%"REG_D", %[j], 1), %%xmm1 \n\t" \ |
461 "movq 8(%%"REG_D", %[j], 1), %%xmm3 \n\t" \ | |
462 "punpcklbw %%xmm7, %%xmm1 \n\t" \ | |
463 "punpcklbw %%xmm7, %%xmm3 \n\t" \ | |
464 "pmullw %%xmm0, %%xmm1 \n\t" \ | |
465 "pmullw %%xmm2, %%xmm3 \n\t" \ | |
466 "psrlw $8, %%xmm1 \n\t" \ | |
467 "psrlw $8, %%xmm3 \n\t" \ | |
468 "packuswb %%xmm3, %%xmm1 \n\t" \ | |
469 "movdqa (%%"REG_S", %[j], 1), %%xmm4 \n\t" \ | |
470 "paddb %%xmm4, %%xmm1 \n\t" \ | |
471 "movdqu %%xmm1, (%%"REG_D", %[j], 1) \n\t" | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
472 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
473 static void render_frame_yuv420p_sse4(vf_instance_t *vf) |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
474 { |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
475 struct dirty_rows_extent *dr = vf->priv->dirty_rows; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
476 uint8_t *alpha; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
477 uint8_t *src_y = vf->priv->planes[0], |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
478 *src_u = vf->priv->planes[1], |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
479 *src_v = vf->priv->planes[2]; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
480 uint8_t *dst_y = vf->dmpi->planes[0], |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
481 *dst_u = vf->dmpi->planes[1], |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
482 *dst_v = vf->dmpi->planes[2]; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
483 int stride; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
484 int outw = vf->priv->outw, |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
485 outh = vf->priv->outh; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
486 int i; |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
487 |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
488 // y |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
489 alpha = vf->priv->alphas[0]; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
490 stride = vf->dmpi->stride[0]; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
491 for (i = 0; i < outh; i++) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
492 size_t xmin = dr[i].xmin & ~15, |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
493 xmax = dr[i].xmax; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
494 __asm__ volatile ( |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
495 "pxor %%xmm7, %%xmm7 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
496 "jmp 4f \n\t" |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
497 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
498 "1: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
499 CHECK_16_ALPHA |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
500 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
501 "2: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
502 MAP_16_ALPHA |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
503 DO_RENDER |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
504 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
505 "3: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
506 "add $16, %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
507 "4: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
508 "cmp %[xmax], %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
509 "jl 1b \n\t" |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
510 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
511 : : [j] "r" (xmin), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
512 [xmax] "g" (xmax), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
513 [alpha] "r" (alpha + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
514 [src] "S" (src_y + i * outw), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
515 [dst] "D" (dst_y + i * stride) |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
516 ); |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
517 } |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
518 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
519 // u & v |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
520 alpha = vf->priv->alphas[1]; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
521 stride = vf->dmpi->stride[1]; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
522 for (i = 0; i < outh / 2; i++) { |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
523 size_t xmin = FFMIN(dr[i * 2].xmin, dr[i * 2 + 1].xmin) & ~31, |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
524 xmax = FFMAX(dr[i * 2].xmax, dr[i * 2 + 1].xmax); |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
525 __asm__ volatile ( |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
526 "pxor %%xmm7, %%xmm7 \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
527 "jmp 4f \n\t" |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
528 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
529 "1: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
530 CHECK_16_ALPHA |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
531 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
532 "2: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
533 MAP_16_ALPHA |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
534 "mov %[src_u], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
535 "mov %[dst_u], %%"REG_D" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
536 DO_RENDER |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
537 "mov %[src_v], %%"REG_S" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
538 "mov %[dst_v], %%"REG_D" \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
539 DO_RENDER |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
540 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
541 "3: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
542 "add $16, %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
543 "4: \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
544 "cmp %[xmax], %[j] \n\t" |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
545 "jl 1b \n\t" |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
546 |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
547 : : [j] "r" (xmin / 2), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
548 [xmax] "g" ((xmax + 1) / 2), |
35618 | 549 [alpha] "r" (alpha + i * outw), |
550 [src_u] "g" (src_u + i * outw), | |
551 [src_v] "g" (src_v + i * outw), | |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
552 [dst_u] "g" (dst_u + i * stride), |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
553 [dst_v] "g" (dst_v + i * stride) |
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
554 : REG_S, REG_D |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
555 ); |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
556 } |
35597
6063e2930092
Cosmetic: reindent & move defines out of function
upsuper
parents:
35585
diff
changeset
|
557 } |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
558 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
559 #undef CHECK_16_ALPHA |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
560 #undef MAP_16_ALPHA |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
561 #undef MUL_ALPHA |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
562 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
563 #endif // HAVE_SSE4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
564 |
35244 | 565 static void clean_buffer(vf_instance_t *vf) |
566 { | |
567 int outw = vf->priv->outw, | |
568 outh = vf->priv->outh; | |
569 struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows; | |
570 uint8_t **planes = vf->priv->planes; | |
571 uint8_t *alpha = vf->priv->alphas[0]; | |
572 int i, j; | |
573 | |
35618 | 574 if (vf->priv->prepare_buffer == prepare_buffer_420p) { |
575 // HACK: prepare_buffer_420p touched u & v planes | |
576 // so we want to clean them here. | |
577 for (i = 0; i < outh; i += 2) { | |
578 int xmin = FFMIN(dirty_rows[i].xmin, dirty_rows[i + 1].xmin) & ~1, | |
579 xmax = FFMAX(dirty_rows[i].xmax, dirty_rows[i + 1].xmax); | |
580 dirty_rows[i / 2].xmin = FFMIN(dirty_rows[i / 2].xmin, xmin / 2); | |
581 dirty_rows[i / 2].xmax = FFMAX(dirty_rows[i / 2].xmax, xmax / 2); | |
582 } | |
583 } | |
35244 | 584 for (i = 0; i < MP_MAX_PLANES; i++) { |
585 uint8_t *plane = planes[i]; | |
586 if (!plane) | |
587 break; | |
588 for (j = 0; j < outh; j++) { | |
589 int xmin = dirty_rows[j].xmin; | |
590 int width = dirty_rows[j].xmax - xmin; | |
591 if (width > 0) | |
592 memset(plane + j * outw + xmin, 0, width); | |
593 } | |
594 } | |
595 for (i = 0; i < outh; i++) { | |
596 int xmin = dirty_rows[i].xmin; | |
597 int width = dirty_rows[i].xmax - xmin; | |
598 if (width > 0) | |
599 memset(alpha + i * outw + xmin, -1, width); | |
600 } | |
601 for (i = 0; i < outh; i++) { | |
602 dirty_rows[i].xmin = outw; | |
603 dirty_rows[i].xmax = 0; | |
604 } | |
605 } | |
18937 | 606 |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
607 static int config(struct vf_instance *vf, |
32096 | 608 int width, int height, int d_width, int d_height, |
609 unsigned int flags, unsigned int outfmt) | |
18937 | 610 { |
32391
b4c3659d16b1
Use a dynamic list for the sources of EOSD elements.
cigaes
parents:
32261
diff
changeset
|
611 struct mp_eosd_settings res = {0}; |
35244 | 612 struct dirty_rows_extent *dirty_rows; |
613 int outw, outh; | |
614 int planes, alphas; | |
615 int i; | |
31927 | 616 |
35574 | 617 vf->priv->outfmt = outfmt; |
618 vf->priv->outh = outh = height + ass_top_margin + ass_bottom_margin; | |
619 vf->priv->outw = outw = width; | |
620 | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
621 switch (outfmt) { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
622 case IMGFMT_YV12: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
623 case IMGFMT_I420: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
624 case IMGFMT_IYUV: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
625 vf->priv->is_planar = 1; |
35244 | 626 planes = 3; |
627 alphas = 2; | |
628 vf->priv->draw_image = draw_image_yuv; | |
629 vf->priv->render_frame = render_frame_yuv420p; | |
630 vf->priv->prepare_buffer = prepare_buffer_420p; | |
35576
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
631 #if HAVE_SSE4 |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
632 if (gCpuCaps.hasSSE4 && outw % 32 == 0) |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
633 vf->priv->render_frame = render_frame_yuv420p_sse4; |
2508973357da
Accelerate ass rendering by using SSE4 for yuv420p.
upsuper
parents:
35575
diff
changeset
|
634 #endif |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
635 break; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
636 case IMGFMT_UYVY: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
637 case IMGFMT_YUY2: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
638 vf->priv->is_planar = 0; |
35244 | 639 planes = 3; |
640 alphas = 1; | |
641 vf->priv->draw_image = draw_image_yuv; | |
642 vf->priv->render_frame = render_frame_yuv422; | |
643 vf->priv->prepare_buffer = prepare_buffer_422; | |
35575
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
644 #if HAVE_SSE4 |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
645 if (gCpuCaps.hasSSE4 && outw % 8 == 0) |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
646 vf->priv->render_frame = render_frame_yuv422_sse4; |
db12239148e9
Accelerate ass rendering by using SSE4 for yuv422.
upsuper
parents:
35574
diff
changeset
|
647 #endif |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
648 break; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
649 default: |
32096 | 650 return 0; |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
651 } |
18937 | 652 |
32096 | 653 if (!opt_screen_size_x && !opt_screen_size_y) { |
654 d_width = d_width * vf->priv->outw / width; | |
655 d_height = d_height * vf->priv->outh / height; | |
656 } | |
18937 | 657 |
35244 | 658 for (i = 0; i < planes; i++) |
659 vf->priv->planes[i] = av_malloc(outw * outh); | |
660 for (i = 0; i < alphas; i++) | |
661 vf->priv->alphas[i] = av_malloc(outw * outh); | |
662 dirty_rows = av_malloc(outh * sizeof(*dirty_rows)); | |
663 // mark all rows dirty here | |
664 // so that they can be properly cleaned in clear_buffer() | |
665 for (i = 0; i < outh; i++) { | |
666 dirty_rows[i].xmin = 0; | |
667 dirty_rows[i].xmax = outw; | |
668 } | |
669 vf->priv->dirty_rows = dirty_rows; | |
670 clean_buffer(vf); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
26727
diff
changeset
|
671 |
32096 | 672 res.w = vf->priv->outw; |
673 res.h = vf->priv->outh; | |
674 res.srcw = width; | |
675 res.srch = height; | |
676 res.mt = ass_top_margin; | |
677 res.mb = ass_bottom_margin; | |
32391
b4c3659d16b1
Use a dynamic list for the sources of EOSD elements.
cigaes
parents:
32261
diff
changeset
|
678 eosd_configure(&res); |
18937 | 679 |
32096 | 680 return vf_next_config(vf, vf->priv->outw, vf->priv->outh, d_width, |
681 d_height, flags, outfmt); | |
18937 | 682 } |
683 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
684 static void get_image(struct vf_instance *vf, mp_image_t *mpi) |
18937 | 685 { |
32096 | 686 if (mpi->type == MP_IMGTYPE_IPB) |
687 return; | |
688 if (mpi->flags & MP_IMGFLAG_PRESERVE) | |
689 return; | |
690 if (mpi->imgfmt != vf->priv->outfmt) | |
691 return; // colorspace differ | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
26727
diff
changeset
|
692 |
32096 | 693 // width never changes, always try full DR |
694 mpi->priv = vf->dmpi = vf_get_image(vf->next, mpi->imgfmt, mpi->type, | |
695 mpi->flags | MP_IMGFLAG_READABLE, | |
34882
649d4cad4619
Request a sufficiently large image for direct rendering.
reimar
parents:
34863
diff
changeset
|
696 FFMAX(mpi->width, vf->priv->outw), |
649d4cad4619
Request a sufficiently large image for direct rendering.
reimar
parents:
34863
diff
changeset
|
697 FFMAX(mpi->height, vf->priv->outh)); |
18937 | 698 |
32096 | 699 if ( (vf->dmpi->flags & MP_IMGFLAG_DRAW_CALLBACK) && |
700 !(vf->dmpi->flags & MP_IMGFLAG_DIRECT)) { | |
701 mp_msg(MSGT_ASS, MSGL_INFO, MSGTR_MPCODECS_FullDRNotPossible); | |
702 return; | |
703 } | |
704 // set up mpi as a cropped-down image of dmpi: | |
705 if (mpi->flags & MP_IMGFLAG_PLANAR) { | |
706 mpi->planes[0] = vf->dmpi->planes[0] + ass_top_margin * vf->dmpi->stride[0]; | |
707 mpi->planes[1] = vf->dmpi->planes[1] + (ass_top_margin >> mpi->chroma_y_shift) * vf->dmpi->stride[1]; | |
708 mpi->planes[2] = vf->dmpi->planes[2] + (ass_top_margin >> mpi->chroma_y_shift) * vf->dmpi->stride[2]; | |
709 mpi->stride[1] = vf->dmpi->stride[1]; | |
710 mpi->stride[2] = vf->dmpi->stride[2]; | |
711 } else { | |
712 mpi->planes[0] = vf->dmpi->planes[0] + ass_top_margin * vf->dmpi->stride[0]; | |
713 } | |
714 mpi->stride[0] = vf->dmpi->stride[0]; | |
715 mpi->width = vf->dmpi->width; | |
716 mpi->flags |= MP_IMGFLAG_DIRECT; | |
717 mpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK; | |
718 // vf->dmpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK; | |
18937 | 719 } |
720 | |
721 static void blank(mp_image_t *mpi, int y1, int y2) | |
722 { | |
32096 | 723 int color[3] = { 16, 128, 128 }; // black (YUV) |
724 int y; | |
725 unsigned char *dst; | |
726 int chroma_rows = (y2 - y1) >> mpi->chroma_y_shift; | |
18937 | 727 |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
728 if (mpi->flags & MP_IMGFLAG_PLANAR) { |
35436 | 729 dst = mpi->planes[0] + y1 * mpi->stride[0]; |
730 for (y = 0; y < y2 - y1; ++y) { | |
731 memset(dst, color[0], mpi->w); | |
732 dst += mpi->stride[0]; | |
733 } | |
734 dst = mpi->planes[1] + (y1 >> mpi->chroma_y_shift) * mpi->stride[1]; | |
735 for (y = 0; y < chroma_rows; ++y) { | |
736 memset(dst, color[1], mpi->chroma_width); | |
737 dst += mpi->stride[1]; | |
738 } | |
739 dst = mpi->planes[2] + (y1 >> mpi->chroma_y_shift) * mpi->stride[2]; | |
740 for (y = 0; y < chroma_rows; ++y) { | |
741 memset(dst, color[2], mpi->chroma_width); | |
742 dst += mpi->stride[2]; | |
743 } | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
744 } else { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
745 unsigned char packed_color[4]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
746 int x; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
747 if (mpi->imgfmt == IMGFMT_UYVY) { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
748 packed_color[0] = color[1]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
749 packed_color[1] = color[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
750 packed_color[2] = color[2]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
751 packed_color[3] = color[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
752 } else { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
753 packed_color[0] = color[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
754 packed_color[1] = color[1]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
755 packed_color[2] = color[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
756 packed_color[3] = color[2]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
757 } |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
758 dst = mpi->planes[0] + y1 * mpi->stride[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
759 for (y = y1; y < y2; ++y) { |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
760 for (x = 0; x < mpi->w / 2; ++x) |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
761 AV_COPY32(dst + 4 * x, packed_color); |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
762 dst += mpi->stride[0]; |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
763 } |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
764 } |
18937 | 765 } |
766 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
767 static int prepare_image(struct vf_instance *vf, mp_image_t *mpi) |
18937 | 768 { |
32096 | 769 if (mpi->flags & MP_IMGFLAG_DIRECT || |
770 mpi->flags & MP_IMGFLAG_DRAW_CALLBACK) { | |
771 vf->dmpi = mpi->priv; | |
772 if (!vf->dmpi) { | |
773 mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_MPCODECS_FunWhydowegetNULL); | |
774 return 0; | |
775 } | |
776 mpi->priv = NULL; | |
777 // we've used DR, so we're ready... | |
778 if (ass_top_margin) | |
779 blank(vf->dmpi, 0, ass_top_margin); | |
780 if (ass_bottom_margin) | |
781 blank(vf->dmpi, vf->priv->outh - ass_bottom_margin, vf->priv->outh); | |
782 if (!(mpi->flags & MP_IMGFLAG_PLANAR)) | |
783 vf->dmpi->planes[1] = mpi->planes[1]; // passthrough rgb8 palette | |
784 return 0; | |
785 } | |
786 // hope we'll get DR buffer: | |
787 vf->dmpi = vf_get_image(vf->next, vf->priv->outfmt, MP_IMGTYPE_TEMP, | |
788 MP_IMGFLAG_ACCEPT_STRIDE | MP_IMGFLAG_READABLE, | |
789 vf->priv->outw, vf->priv->outh); | |
18937 | 790 |
32096 | 791 // copy mpi->dmpi... |
792 if (mpi->flags & MP_IMGFLAG_PLANAR) { | |
793 memcpy_pic(vf->dmpi->planes[0] + ass_top_margin * vf->dmpi->stride[0], | |
35436 | 794 mpi->planes[0], |
795 mpi->w, | |
796 mpi->h, | |
797 vf->dmpi->stride[0], | |
798 mpi->stride[0]); | |
32096 | 799 memcpy_pic(vf->dmpi->planes[1] + (ass_top_margin >> mpi->chroma_y_shift) * vf->dmpi->stride[1], |
35436 | 800 mpi->planes[1], |
801 mpi->w >> mpi->chroma_x_shift, | |
32096 | 802 mpi->h >> mpi->chroma_y_shift, |
35436 | 803 vf->dmpi->stride[1], |
32096 | 804 mpi->stride[1]); |
805 memcpy_pic(vf->dmpi->planes[2] + (ass_top_margin >> mpi->chroma_y_shift) * vf->dmpi->stride[2], | |
35436 | 806 mpi->planes[2], |
32096 | 807 mpi->w >> mpi->chroma_x_shift, |
808 mpi->h >> mpi->chroma_y_shift, | |
35436 | 809 vf->dmpi->stride[2], |
32096 | 810 mpi->stride[2]); |
811 } else { | |
812 memcpy_pic(vf->dmpi->planes[0] + ass_top_margin * vf->dmpi->stride[0], | |
35436 | 813 mpi->planes[0], |
32096 | 814 mpi->w * (vf->dmpi->bpp / 8), |
35436 | 815 mpi->h, |
32096 | 816 vf->dmpi->stride[0], |
35436 | 817 mpi->stride[0]); |
32096 | 818 vf->dmpi->planes[1] = mpi->planes[1]; // passthrough rgb8 palette |
819 } | |
820 if (ass_top_margin) | |
821 blank(vf->dmpi, 0, ass_top_margin); | |
822 if (ass_bottom_margin) | |
823 blank(vf->dmpi, vf->priv->outh - ass_bottom_margin, vf->priv->outh); | |
824 return 0; | |
18937 | 825 } |
826 | |
35244 | 827 static void prepare_eosd(vf_instance_t *vf, struct mp_eosd_image_list *imgs) |
18937 | 828 { |
35244 | 829 struct mp_eosd_image *img = eosd_image_first(imgs); |
830 void (*draw_image)(vf_instance_t *, struct mp_eosd_image *); | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
831 |
35244 | 832 clean_buffer(vf); |
833 draw_image = vf->priv->draw_image; | |
834 for (; img; img = eosd_image_next(imgs)) | |
835 draw_image(vf, img); | |
836 vf->priv->prepare_buffer(vf); | |
18937 | 837 } |
838 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
839 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts) |
18937 | 840 { |
32391
b4c3659d16b1
Use a dynamic list for the sources of EOSD elements.
cigaes
parents:
32261
diff
changeset
|
841 struct mp_eosd_image_list images; |
b4c3659d16b1
Use a dynamic list for the sources of EOSD elements.
cigaes
parents:
32261
diff
changeset
|
842 eosd_render_frame(pts, &images); |
32096 | 843 prepare_image(vf, mpi); |
35244 | 844 if (images.changed) |
845 prepare_eosd(vf, &images); | |
846 vf->priv->render_frame(vf); | |
32096 | 847 return vf_next_put_image(vf, vf->dmpi, pts); |
18937 | 848 } |
849 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
850 static int query_format(struct vf_instance *vf, unsigned int fmt) |
18937 | 851 { |
32096 | 852 switch (fmt) { |
853 case IMGFMT_YV12: | |
854 case IMGFMT_I420: | |
855 case IMGFMT_IYUV: | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
856 case IMGFMT_UYVY: |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
857 case IMGFMT_YUY2: |
35033 | 858 return vf_next_query_format(vf, fmt) | VFCAP_EOSD; |
32096 | 859 } |
860 return 0; | |
18937 | 861 } |
862 | |
863 static int control(vf_instance_t *vf, int request, void *data) | |
864 { | |
32096 | 865 switch (request) { |
866 case VFCTRL_INIT_EOSD: | |
867 return CONTROL_TRUE; | |
868 case VFCTRL_DRAW_EOSD: | |
869 return CONTROL_TRUE; | |
870 } | |
871 return vf_next_control(vf, request, data); | |
18937 | 872 } |
873 | |
30642
a972c1a4a012
cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents:
30638
diff
changeset
|
874 static void uninit(struct vf_instance *vf) |
18937 | 875 { |
35244 | 876 int i; |
877 for (i = 0; i < MP_MAX_PLANES; i++) | |
878 av_free(vf->priv->planes[i]); | |
879 for (i = 0; i < MP_MAX_PLANES; i++) | |
880 av_free(vf->priv->alphas[i]); | |
881 av_free(vf->priv->dirty_rows); | |
18937 | 882 } |
883 | |
32096 | 884 static const unsigned int fmt_list[] = { |
885 IMGFMT_YV12, | |
886 IMGFMT_I420, | |
887 IMGFMT_IYUV, | |
35039
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
888 IMGFMT_UYVY, |
848835e1b053
vf_ass: add support for rendering on YUY2 and UYVY images.
reimar
parents:
35033
diff
changeset
|
889 IMGFMT_YUY2, |
32096 | 890 0 |
18937 | 891 }; |
892 | |
30638
a7b908875c14
Rename open() vf initialization function to vf_open().
diego
parents:
30633
diff
changeset
|
893 static int vf_open(vf_instance_t *vf, char *args) |
18937 | 894 { |
32096 | 895 int flags; |
35033 | 896 unsigned outfmt = vf_match_csp(&vf->next, fmt_list, IMGFMT_YV12); |
897 if (outfmt) | |
898 flags = vf_next_query_format(vf, outfmt); | |
899 if (!outfmt || (vf->priv->auto_insert && flags & VFCAP_EOSD)) { | |
32096 | 900 uninit(vf); |
901 return 0; | |
902 } | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
26727
diff
changeset
|
903 |
32096 | 904 if (vf->priv->auto_insert) |
905 mp_msg(MSGT_ASS, MSGL_INFO, "[ass] auto-open\n"); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
26727
diff
changeset
|
906 |
32096 | 907 vf->config = config; |
908 vf->query_format = query_format; | |
909 vf->uninit = uninit; | |
910 vf->control = control; | |
911 vf->get_image = get_image; | |
912 vf->put_image = put_image; | |
913 vf->default_caps = VFCAP_EOSD; | |
914 return 1; | |
18937 | 915 } |
916 | |
917 #define ST_OFF(f) M_ST_OFF(struct vf_priv_s,f) | |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
918 static const m_option_t vf_opts_fields[] = { |
32096 | 919 {"auto", ST_OFF(auto_insert), CONF_TYPE_FLAG, 0, 0, 1, NULL}, |
920 {NULL, NULL, 0, 0, 0, 0, NULL} | |
18937 | 921 }; |
922 | |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
923 static const m_struct_t vf_opts = { |
32096 | 924 "ass", |
925 sizeof(struct vf_priv_s), | |
926 &vf_priv_dflt, | |
927 vf_opts_fields | |
18937 | 928 }; |
929 | |
24969
c2b7ba444ade
begin moving const filter data to .text/.rodata sections
rfelker
parents:
24545
diff
changeset
|
930 const vf_info_t vf_info_ass = { |
32096 | 931 "Render ASS/SSA subtitles", |
932 "ass", | |
35244 | 933 "Evgeniy Stepanov, Xidorn Quan", |
32096 | 934 "", |
935 vf_open, | |
936 &vf_opts | |
18937 | 937 }; |