Mercurial > mplayer.hg
annotate postproc/yuv2rgb_altivec.c @ 17588:79081ba52e00
Move the v{Y,C}CoeffsBank vectors into the SwsContext, filling them in just
once when the scaler is initialized, instead of building them and freeing
them over and over. This gives massive performance improvements.
patch by Alan Curry, pacman*at*TheWorld*dot*com
author | diego |
---|---|
date | Sat, 11 Feb 2006 14:16:10 +0000 |
parents | 8084bcdb4898 |
children | ee8d71a1b7ba |
rev | line source |
---|---|
12698 | 1 /* |
2 marc.hoffman@analog.com March 8, 2004 | |
3 | |
4 Altivec Acceleration for Color Space Conversion revision 0.2 | |
5 | |
6 convert I420 YV12 to RGB in various formats, | |
7 it rejects images that are not in 420 formats | |
8 it rejects images that don't have widths of multiples of 16 | |
9 it rejects images that don't have heights of multiples of 2 | |
10 reject defers to C simulation codes. | |
11 | |
12 lots of optimizations to be done here | |
13 | |
14 1. need to fix saturation code, I just couldn't get it to fly with packs and adds. | |
15 so we currently use max min to clip | |
16 | |
17 2. the inefficient use of chroma loading needs a bit of brushing up | |
18 | |
19 3. analysis of pipeline stalls needs to be done, use shark to identify pipeline stalls | |
20 | |
21 | |
22 MODIFIED to calculate coeffs from currently selected color space. | |
23 MODIFIED core to be a macro which you spec the output format. | |
24 ADDED UYVY conversion which is never called due to some thing in SWSCALE. | |
25 CORRECTED algorithim selection to be strict on input formats. | |
26 ADDED runtime detection of altivec. | |
27 | |
28 ADDED altivec_yuv2packedX vertical scl + RGB converter | |
29 | |
30 March 27,2004 | |
31 PERFORMANCE ANALYSIS | |
32 | |
33 The C version use 25% of the processor or ~250Mips for D1 video rawvideo used as test | |
34 The ALTIVEC version uses 10% of the processor or ~100Mips for D1 video same sequence | |
35 | |
36 720*480*30 ~10MPS | |
37 | |
38 so we have roughly 10clocks per pixel this is too high something has to be wrong. | |
39 | |
40 OPTIMIZED clip codes to utilize vec_max and vec_packs removing the need for vec_min. | |
41 | |
42 OPTIMIZED DST OUTPUT cache/dma controls. we are pretty much | |
43 guaranteed to have the input video frame it was just decompressed so | |
44 it probably resides in L1 caches. However we are creating the | |
45 output video stream this needs to use the DSTST instruction to | |
46 optimize for the cache. We couple this with the fact that we are | |
47 not going to be visiting the input buffer again so we mark it Least | |
48 Recently Used. This shaves 25% of the processor cycles off. | |
49 | |
50 Now MEMCPY is the largest mips consumer in the system, probably due | |
51 to the inefficient X11 stuff. | |
52 | |
53 GL libraries seem to be very slow on this machine 1.33Ghz PB running | |
54 Jaguar, this is not the case for my 1Ghz PB. I thought it might be | |
55 a versioning issues, however i have libGL.1.2.dylib for both | |
56 machines. ((We need to figure this out now)) | |
57 | |
58 GL2 libraries work now with patch for RGB32 | |
59 | |
60 NOTE quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor | |
61 | |
62 Integrated luma prescaling adjustment for saturation/contrast/brightness adjustment. | |
63 | |
64 */ | |
65 #include <stdio.h> | |
66 #include <stdlib.h> | |
12836 | 67 #include <string.h> |
12698 | 68 #include <inttypes.h> |
69 #include <assert.h> | |
70 #include "config.h" | |
17558
ad90899eeee6
AltiVec operations need to have memory aligned on 16-byte boundaries.
diego
parents:
17557
diff
changeset
|
71 #ifdef HAVE_MALLOC_H |
ad90899eeee6
AltiVec operations need to have memory aligned on 16-byte boundaries.
diego
parents:
17557
diff
changeset
|
72 #include <malloc.h> |
ad90899eeee6
AltiVec operations need to have memory aligned on 16-byte boundaries.
diego
parents:
17557
diff
changeset
|
73 #endif |
12698 | 74 #include "rgb2rgb.h" |
75 #include "swscale.h" | |
76 #include "swscale_internal.h" | |
16985 | 77 #include "mangle.h" |
78 #include "libvo/img_format.h" //FIXME try to reduce dependency of such stuff | |
12698 | 79 |
80 #undef PROFILE_THE_BEAST | |
81 #undef INC_SCALING | |
82 | |
83 typedef unsigned char ubyte; | |
84 typedef signed char sbyte; | |
85 | |
86 | |
87 /* RGB interleaver, 16 planar pels 8-bit samples per channel in | |
88 homogeneous vector registers x0,x1,x2 are interleaved with the | |
89 following technique: | |
90 | |
91 o0 = vec_mergeh (x0,x1); | |
92 o1 = vec_perm (o0, x2, perm_rgb_0); | |
93 o2 = vec_perm (o0, x2, perm_rgb_1); | |
94 o3 = vec_mergel (x0,x1); | |
95 o4 = vec_perm (o3,o2,perm_rgb_2); | |
96 o5 = vec_perm (o3,o2,perm_rgb_3); | |
97 | |
98 perm_rgb_0: o0(RG).h v1(B) --> o1* | |
99 0 1 2 3 4 | |
100 rgbr|gbrg|brgb|rgbr | |
101 0010 0100 1001 0010 | |
102 0102 3145 2673 894A | |
103 | |
104 perm_rgb_1: o0(RG).h v1(B) --> o2 | |
105 0 1 2 3 4 | |
106 gbrg|brgb|bbbb|bbbb | |
107 0100 1001 1111 1111 | |
108 B5CD 6EF7 89AB CDEF | |
109 | |
110 perm_rgb_2: o3(RG).l o2(rgbB.l) --> o4* | |
111 0 1 2 3 4 | |
112 gbrg|brgb|rgbr|gbrg | |
113 1111 1111 0010 0100 | |
114 89AB CDEF 0182 3945 | |
115 | |
116 perm_rgb_2: o3(RG).l o2(rgbB.l) ---> o5* | |
117 0 1 2 3 4 | |
118 brgb|rgbr|gbrg|brgb | |
119 1001 0010 0100 1001 | |
120 a67b 89cA BdCD eEFf | |
121 | |
122 */ | |
123 static | |
124 const vector unsigned char | |
13564 | 125 perm_rgb_0 = (const vector unsigned char)AVV(0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05, |
12698 | 126 0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a), |
13564 | 127 perm_rgb_1 = (const vector unsigned char)AVV(0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17, |
12698 | 128 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f), |
13564 | 129 perm_rgb_2 = (const vector unsigned char)AVV(0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, |
12698 | 130 0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05), |
13564 | 131 perm_rgb_3 = (const vector unsigned char)AVV(0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a, |
12698 | 132 0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f); |
133 | |
134 #define vec_merge3(x2,x1,x0,y0,y1,y2) \ | |
135 do { \ | |
136 typeof(x0) o0,o2,o3; \ | |
137 o0 = vec_mergeh (x0,x1); \ | |
138 y0 = vec_perm (o0, x2, perm_rgb_0);\ | |
139 o2 = vec_perm (o0, x2, perm_rgb_1);\ | |
140 o3 = vec_mergel (x0,x1); \ | |
141 y1 = vec_perm (o3,o2,perm_rgb_2); \ | |
142 y2 = vec_perm (o3,o2,perm_rgb_3); \ | |
143 } while(0) | |
144 | |
17563
8084bcdb4898
Correct RGB vs. BGR confusion, the macros vec_mstrgb24 and vec_mstbgr24 each
diego
parents:
17560
diff
changeset
|
145 #define vec_mstbgr24(x0,x1,x2,ptr) \ |
12698 | 146 do { \ |
147 typeof(x0) _0,_1,_2; \ | |
148 vec_merge3 (x0,x1,x2,_0,_1,_2); \ | |
149 vec_st (_0, 0, ptr++); \ | |
150 vec_st (_1, 0, ptr++); \ | |
151 vec_st (_2, 0, ptr++); \ | |
152 } while (0); | |
153 | |
17563
8084bcdb4898
Correct RGB vs. BGR confusion, the macros vec_mstrgb24 and vec_mstbgr24 each
diego
parents:
17560
diff
changeset
|
154 #define vec_mstrgb24(x0,x1,x2,ptr) \ |
12698 | 155 do { \ |
156 typeof(x0) _0,_1,_2; \ | |
157 vec_merge3 (x2,x1,x0,_0,_1,_2); \ | |
158 vec_st (_0, 0, ptr++); \ | |
159 vec_st (_1, 0, ptr++); \ | |
160 vec_st (_2, 0, ptr++); \ | |
161 } while (0); | |
162 | |
163 /* pack the pixels in rgb0 format | |
164 msb R | |
165 lsb 0 | |
166 */ | |
167 #define vec_mstrgb32(T,x0,x1,x2,x3,ptr) \ | |
168 do { \ | |
169 T _0,_1,_2,_3; \ | |
170 _0 = vec_mergeh (x0,x1); \ | |
171 _1 = vec_mergeh (x2,x3); \ | |
172 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \ | |
173 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \ | |
174 vec_st (_2, 0*16, (T *)ptr); \ | |
175 vec_st (_3, 1*16, (T *)ptr); \ | |
176 _0 = vec_mergel (x0,x1); \ | |
177 _1 = vec_mergel (x2,x3); \ | |
178 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \ | |
179 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \ | |
180 vec_st (_2, 2*16, (T *)ptr); \ | |
181 vec_st (_3, 3*16, (T *)ptr); \ | |
182 ptr += 4; \ | |
183 } while (0); | |
184 | |
185 /* | |
186 | |
187 | 1 0 1.4021 | | Y | | |
188 | 1 -0.3441 -0.7142 |x| Cb| | |
189 | 1 1.7718 0 | | Cr| | |
190 | |
191 | |
192 Y: [-128 127] | |
193 Cb/Cr : [-128 127] | |
194 | |
195 typical yuv conversion work on Y: 0-255 this version has been optimized for jpeg decode. | |
196 | |
197 */ | |
198 | |
199 | |
200 | |
201 | |
202 #define vec_unh(x) \ | |
203 (vector signed short) \ | |
13564 | 204 vec_perm(x,(typeof(x))AVV(0),\ |
205 (vector unsigned char)AVV(0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\ | |
12698 | 206 0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07)) |
207 #define vec_unl(x) \ | |
208 (vector signed short) \ | |
13564 | 209 vec_perm(x,(typeof(x))AVV(0),\ |
210 (vector unsigned char)AVV(0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\ | |
12698 | 211 0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F)) |
212 | |
213 #define vec_clip(x) \ | |
13564 | 214 vec_max (vec_min (x, (typeof(x))AVV(235)), (typeof(x))AVV(16)) |
12698 | 215 |
216 #define vec_packclp_a(x,y) \ | |
217 (vector unsigned char)vec_pack (vec_clip (x), vec_clip (y)) | |
218 | |
219 #define vec_packclp(x,y) \ | |
220 (vector unsigned char)vec_packs \ | |
13564 | 221 ((vector unsigned short)vec_max (x,(vector signed short) AVV(0)), \ |
222 (vector unsigned short)vec_max (y,(vector signed short) AVV(0))) | |
12698 | 223 |
13564 | 224 //#define out_pixels(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),a,a,a,ptr) |
12698 | 225 |
226 | |
12836 | 227 static inline void cvtyuvtoRGB (SwsContext *c, |
12698 | 228 vector signed short Y, vector signed short U, vector signed short V, |
229 vector signed short *R, vector signed short *G, vector signed short *B) | |
230 { | |
231 vector signed short vx,ux,uvx; | |
232 | |
233 Y = vec_mradds (Y, c->CY, c->OY); | |
13564 | 234 U = vec_sub (U,(vector signed short) |
235 vec_splat((vector signed short)AVV(128),0)); | |
236 V = vec_sub (V,(vector signed short) | |
237 vec_splat((vector signed short)AVV(128),0)); | |
12698 | 238 |
239 // ux = (CBU*(u<<c->CSHIFT)+0x4000)>>15; | |
240 ux = vec_sl (U, c->CSHIFT); | |
241 *B = vec_mradds (ux, c->CBU, Y); | |
242 | |
243 // vx = (CRV*(v<<c->CSHIFT)+0x4000)>>15; | |
244 vx = vec_sl (V, c->CSHIFT); | |
245 *R = vec_mradds (vx, c->CRV, Y); | |
246 | |
247 // uvx = ((CGU*u) + (CGV*v))>>15; | |
248 uvx = vec_mradds (U, c->CGU, Y); | |
249 *G = vec_mradds (V, c->CGV, uvx); | |
250 } | |
251 | |
252 | |
253 /* | |
254 ------------------------------------------------------------------------------ | |
255 CS converters | |
256 ------------------------------------------------------------------------------ | |
257 */ | |
258 | |
259 | |
260 #define DEFCSP420_CVT(name,out_pixels) \ | |
261 static int altivec_##name (SwsContext *c, \ | |
262 unsigned char **in, int *instrides, \ | |
263 int srcSliceY, int srcSliceH, \ | |
264 unsigned char **oplanes, int *outstrides) \ | |
265 { \ | |
266 int w = c->srcW; \ | |
267 int h = srcSliceH; \ | |
268 int i,j; \ | |
269 int instrides_scl[3]; \ | |
270 vector unsigned char y0,y1; \ | |
271 \ | |
272 vector signed char u,v; \ | |
273 \ | |
274 vector signed short Y0,Y1,Y2,Y3; \ | |
275 vector signed short U,V; \ | |
276 vector signed short vx,ux,uvx; \ | |
277 vector signed short vx0,ux0,uvx0; \ | |
278 vector signed short vx1,ux1,uvx1; \ | |
279 vector signed short R0,G0,B0; \ | |
280 vector signed short R1,G1,B1; \ | |
281 vector unsigned char R,G,B; \ | |
282 \ | |
283 vector unsigned char *uivP, *vivP; \ | |
284 vector unsigned char align_perm; \ | |
285 \ | |
286 vector signed short \ | |
287 lCY = c->CY, \ | |
288 lOY = c->OY, \ | |
289 lCRV = c->CRV, \ | |
290 lCBU = c->CBU, \ | |
291 lCGU = c->CGU, \ | |
292 lCGV = c->CGV; \ | |
293 \ | |
294 vector unsigned short lCSHIFT = c->CSHIFT; \ | |
295 \ | |
296 ubyte *y1i = in[0]; \ | |
297 ubyte *y2i = in[0]+w; \ | |
298 ubyte *ui = in[1]; \ | |
299 ubyte *vi = in[2]; \ | |
300 \ | |
301 vector unsigned char *oute \ | |
302 = (vector unsigned char *) \ | |
303 (oplanes[0]+srcSliceY*outstrides[0]); \ | |
304 vector unsigned char *outo \ | |
305 = (vector unsigned char *) \ | |
306 (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); \ | |
307 \ | |
308 \ | |
309 instrides_scl[0] = instrides[0]; \ | |
310 instrides_scl[1] = instrides[1]-w/2; /* the loop moves ui by w/2 */ \ | |
311 instrides_scl[2] = instrides[2]-w/2; /* the loop moves vi by w/2 */ \ | |
312 \ | |
313 \ | |
314 for (i=0;i<h/2;i++) { \ | |
315 vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0); \ | |
316 vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1); \ | |
317 \ | |
318 for (j=0;j<w/16;j++) { \ | |
319 \ | |
320 y0 = vec_ldl (0,y1i); \ | |
321 y1 = vec_ldl (0,y2i); \ | |
322 uivP = (vector unsigned char *)ui; \ | |
323 vivP = (vector unsigned char *)vi; \ | |
324 \ | |
325 align_perm = vec_lvsl (0, ui); \ | |
326 u = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm); \ | |
327 \ | |
328 align_perm = vec_lvsl (0, vi); \ | |
329 v = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm); \ | |
330 \ | |
13564 | 331 u = (vector signed char) \ |
332 vec_sub (u,(vector signed char) \ | |
333 vec_splat((vector signed char)AVV(128),0));\ | |
334 v = (vector signed char) \ | |
335 vec_sub (v,(vector signed char) \ | |
336 vec_splat((vector signed char)AVV(128),0));\ | |
337 \ | |
12698 | 338 U = vec_unpackh (u); \ |
339 V = vec_unpackh (v); \ | |
340 \ | |
341 \ | |
342 Y0 = vec_unh (y0); \ | |
343 Y1 = vec_unl (y0); \ | |
344 Y2 = vec_unh (y1); \ | |
345 Y3 = vec_unl (y1); \ | |
346 \ | |
347 Y0 = vec_mradds (Y0, lCY, lOY); \ | |
348 Y1 = vec_mradds (Y1, lCY, lOY); \ | |
349 Y2 = vec_mradds (Y2, lCY, lOY); \ | |
350 Y3 = vec_mradds (Y3, lCY, lOY); \ | |
351 \ | |
352 /* ux = (CBU*(u<<CSHIFT)+0x4000)>>15 */ \ | |
353 ux = vec_sl (U, lCSHIFT); \ | |
13564 | 354 ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0)); \ |
12698 | 355 ux0 = vec_mergeh (ux,ux); \ |
356 ux1 = vec_mergel (ux,ux); \ | |
357 \ | |
358 /* vx = (CRV*(v<<CSHIFT)+0x4000)>>15; */ \ | |
359 vx = vec_sl (V, lCSHIFT); \ | |
13564 | 360 vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0)); \ |
12698 | 361 vx0 = vec_mergeh (vx,vx); \ |
362 vx1 = vec_mergel (vx,vx); \ | |
363 \ | |
364 /* uvx = ((CGU*u) + (CGV*v))>>15 */ \ | |
13564 | 365 uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0)); \ |
12698 | 366 uvx = vec_mradds (V, lCGV, uvx); \ |
367 uvx0 = vec_mergeh (uvx,uvx); \ | |
368 uvx1 = vec_mergel (uvx,uvx); \ | |
369 \ | |
370 R0 = vec_add (Y0,vx0); \ | |
371 G0 = vec_add (Y0,uvx0); \ | |
372 B0 = vec_add (Y0,ux0); \ | |
373 R1 = vec_add (Y1,vx1); \ | |
374 G1 = vec_add (Y1,uvx1); \ | |
375 B1 = vec_add (Y1,ux1); \ | |
376 \ | |
377 R = vec_packclp (R0,R1); \ | |
378 G = vec_packclp (G0,G1); \ | |
379 B = vec_packclp (B0,B1); \ | |
380 \ | |
381 out_pixels(R,G,B,oute); \ | |
382 \ | |
383 R0 = vec_add (Y2,vx0); \ | |
384 G0 = vec_add (Y2,uvx0); \ | |
385 B0 = vec_add (Y2,ux0); \ | |
386 R1 = vec_add (Y3,vx1); \ | |
387 G1 = vec_add (Y3,uvx1); \ | |
388 B1 = vec_add (Y3,ux1); \ | |
389 R = vec_packclp (R0,R1); \ | |
390 G = vec_packclp (G0,G1); \ | |
391 B = vec_packclp (B0,B1); \ | |
392 \ | |
393 \ | |
394 out_pixels(R,G,B,outo); \ | |
395 \ | |
396 y1i += 16; \ | |
397 y2i += 16; \ | |
398 ui += 8; \ | |
399 vi += 8; \ | |
400 \ | |
401 } \ | |
402 \ | |
403 outo += (outstrides[0])>>4; \ | |
404 oute += (outstrides[0])>>4; \ | |
405 \ | |
406 ui += instrides_scl[1]; \ | |
407 vi += instrides_scl[2]; \ | |
408 y1i += instrides_scl[0]; \ | |
409 y2i += instrides_scl[0]; \ | |
410 } \ | |
411 return srcSliceH; \ | |
412 } | |
413 | |
414 | |
13564 | 415 #define out_abgr(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),c,b,a,ptr) |
416 #define out_bgra(a,b,c,ptr) vec_mstrgb32(typeof(a),c,b,a,((typeof (a))AVV(0)),ptr) | |
417 #define out_rgba(a,b,c,ptr) vec_mstrgb32(typeof(a),a,b,c,((typeof (a))AVV(0)),ptr) | |
418 #define out_argb(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),a,b,c,ptr) | |
12698 | 419 #define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr) |
17563
8084bcdb4898
Correct RGB vs. BGR confusion, the macros vec_mstrgb24 and vec_mstbgr24 each
diego
parents:
17560
diff
changeset
|
420 #define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr) |
12698 | 421 |
422 DEFCSP420_CVT (yuv2_abgr32, out_abgr) | |
13564 | 423 #if 1 |
12698 | 424 DEFCSP420_CVT (yuv2_bgra32, out_argb) |
13564 | 425 #else |
426 static int altivec_yuv2_bgra32 (SwsContext *c, | |
427 unsigned char **in, int *instrides, | |
428 int srcSliceY, int srcSliceH, | |
429 unsigned char **oplanes, int *outstrides) | |
430 { | |
431 int w = c->srcW; | |
432 int h = srcSliceH; | |
433 int i,j; | |
434 int instrides_scl[3]; | |
435 vector unsigned char y0,y1; | |
436 | |
437 vector signed char u,v; | |
438 | |
439 vector signed short Y0,Y1,Y2,Y3; | |
440 vector signed short U,V; | |
441 vector signed short vx,ux,uvx; | |
442 vector signed short vx0,ux0,uvx0; | |
443 vector signed short vx1,ux1,uvx1; | |
444 vector signed short R0,G0,B0; | |
445 vector signed short R1,G1,B1; | |
446 vector unsigned char R,G,B; | |
447 | |
448 vector unsigned char *uivP, *vivP; | |
449 vector unsigned char align_perm; | |
450 | |
451 vector signed short | |
452 lCY = c->CY, | |
453 lOY = c->OY, | |
454 lCRV = c->CRV, | |
455 lCBU = c->CBU, | |
456 lCGU = c->CGU, | |
457 lCGV = c->CGV; | |
458 | |
459 vector unsigned short lCSHIFT = c->CSHIFT; | |
460 | |
461 ubyte *y1i = in[0]; | |
462 ubyte *y2i = in[0]+w; | |
463 ubyte *ui = in[1]; | |
464 ubyte *vi = in[2]; | |
465 | |
466 vector unsigned char *oute | |
467 = (vector unsigned char *) | |
468 (oplanes[0]+srcSliceY*outstrides[0]); | |
469 vector unsigned char *outo | |
470 = (vector unsigned char *) | |
471 (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); | |
472 | |
473 | |
474 instrides_scl[0] = instrides[0]; | |
475 instrides_scl[1] = instrides[1]-w/2; /* the loop moves ui by w/2 */ | |
476 instrides_scl[2] = instrides[2]-w/2; /* the loop moves vi by w/2 */ | |
477 | |
478 | |
479 for (i=0;i<h/2;i++) { | |
480 vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0); | |
481 vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1); | |
482 | |
483 for (j=0;j<w/16;j++) { | |
484 | |
485 y0 = vec_ldl (0,y1i); | |
486 y1 = vec_ldl (0,y2i); | |
487 uivP = (vector unsigned char *)ui; | |
488 vivP = (vector unsigned char *)vi; | |
489 | |
490 align_perm = vec_lvsl (0, ui); | |
491 u = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm); | |
492 | |
493 align_perm = vec_lvsl (0, vi); | |
494 v = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm); | |
495 u = (vector signed char) | |
496 vec_sub (u,(vector signed char) | |
497 vec_splat((vector signed char)AVV(128),0)); | |
498 | |
499 v = (vector signed char) | |
500 vec_sub (v, (vector signed char) | |
501 vec_splat((vector signed char)AVV(128),0)); | |
502 | |
503 U = vec_unpackh (u); | |
504 V = vec_unpackh (v); | |
505 | |
506 | |
507 Y0 = vec_unh (y0); | |
508 Y1 = vec_unl (y0); | |
509 Y2 = vec_unh (y1); | |
510 Y3 = vec_unl (y1); | |
511 | |
512 Y0 = vec_mradds (Y0, lCY, lOY); | |
513 Y1 = vec_mradds (Y1, lCY, lOY); | |
514 Y2 = vec_mradds (Y2, lCY, lOY); | |
515 Y3 = vec_mradds (Y3, lCY, lOY); | |
516 | |
517 /* ux = (CBU*(u<<CSHIFT)+0x4000)>>15 */ | |
518 ux = vec_sl (U, lCSHIFT); | |
519 ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0)); | |
520 ux0 = vec_mergeh (ux,ux); | |
521 ux1 = vec_mergel (ux,ux); | |
522 | |
523 /* vx = (CRV*(v<<CSHIFT)+0x4000)>>15; */ | |
524 vx = vec_sl (V, lCSHIFT); | |
525 vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0)); | |
526 vx0 = vec_mergeh (vx,vx); | |
527 vx1 = vec_mergel (vx,vx); | |
528 /* uvx = ((CGU*u) + (CGV*v))>>15 */ | |
529 uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0)); | |
530 uvx = vec_mradds (V, lCGV, uvx); | |
531 uvx0 = vec_mergeh (uvx,uvx); | |
532 uvx1 = vec_mergel (uvx,uvx); | |
533 R0 = vec_add (Y0,vx0); | |
534 G0 = vec_add (Y0,uvx0); | |
535 B0 = vec_add (Y0,ux0); | |
536 R1 = vec_add (Y1,vx1); | |
537 G1 = vec_add (Y1,uvx1); | |
538 B1 = vec_add (Y1,ux1); | |
539 R = vec_packclp (R0,R1); | |
540 G = vec_packclp (G0,G1); | |
541 B = vec_packclp (B0,B1); | |
542 | |
543 out_argb(R,G,B,oute); | |
544 R0 = vec_add (Y2,vx0); | |
545 G0 = vec_add (Y2,uvx0); | |
546 B0 = vec_add (Y2,ux0); | |
547 R1 = vec_add (Y3,vx1); | |
548 G1 = vec_add (Y3,uvx1); | |
549 B1 = vec_add (Y3,ux1); | |
550 R = vec_packclp (R0,R1); | |
551 G = vec_packclp (G0,G1); | |
552 B = vec_packclp (B0,B1); | |
553 | |
554 out_argb(R,G,B,outo); | |
555 y1i += 16; | |
556 y2i += 16; | |
557 ui += 8; | |
558 vi += 8; | |
559 | |
560 } | |
561 | |
562 outo += (outstrides[0])>>4; | |
563 oute += (outstrides[0])>>4; | |
564 | |
565 ui += instrides_scl[1]; | |
566 vi += instrides_scl[2]; | |
567 y1i += instrides_scl[0]; | |
568 y2i += instrides_scl[0]; | |
569 } | |
570 return srcSliceH; | |
571 } | |
572 | |
573 #endif | |
574 | |
575 | |
12698 | 576 DEFCSP420_CVT (yuv2_rgba32, out_rgba) |
577 DEFCSP420_CVT (yuv2_argb32, out_argb) | |
578 DEFCSP420_CVT (yuv2_rgb24, out_rgb24) | |
579 DEFCSP420_CVT (yuv2_bgr24, out_bgr24) | |
580 | |
581 | |
582 // uyvy|uyvy|uyvy|uyvy | |
583 // 0123 4567 89ab cdef | |
584 static | |
585 const vector unsigned char | |
13564 | 586 demux_u = (const vector unsigned char)AVV(0x10,0x00,0x10,0x00, |
12698 | 587 0x10,0x04,0x10,0x04, |
588 0x10,0x08,0x10,0x08, | |
589 0x10,0x0c,0x10,0x0c), | |
13564 | 590 demux_v = (const vector unsigned char)AVV(0x10,0x02,0x10,0x02, |
12698 | 591 0x10,0x06,0x10,0x06, |
592 0x10,0x0A,0x10,0x0A, | |
593 0x10,0x0E,0x10,0x0E), | |
13564 | 594 demux_y = (const vector unsigned char)AVV(0x10,0x01,0x10,0x03, |
12698 | 595 0x10,0x05,0x10,0x07, |
596 0x10,0x09,0x10,0x0B, | |
597 0x10,0x0D,0x10,0x0F); | |
598 | |
599 /* | |
600 this is so I can play live CCIR raw video | |
601 */ | |
602 static int altivec_uyvy_rgb32 (SwsContext *c, | |
603 unsigned char **in, int *instrides, | |
604 int srcSliceY, int srcSliceH, | |
605 unsigned char **oplanes, int *outstrides) | |
606 { | |
607 int w = c->srcW; | |
608 int h = srcSliceH; | |
609 int i,j; | |
610 vector unsigned char uyvy; | |
611 vector signed short Y,U,V; | |
612 vector signed short vx,ux,uvx; | |
613 vector signed short R0,G0,B0,R1,G1,B1; | |
614 vector unsigned char R,G,B; | |
615 vector unsigned char *out; | |
616 ubyte *img; | |
617 | |
618 img = in[0]; | |
619 out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]); | |
620 | |
621 for (i=0;i<h;i++) { | |
622 for (j=0;j<w/16;j++) { | |
623 uyvy = vec_ld (0, img); | |
624 U = (vector signed short) | |
13564 | 625 vec_perm (uyvy, (vector unsigned char)AVV(0), demux_u); |
12698 | 626 |
627 V = (vector signed short) | |
13564 | 628 vec_perm (uyvy, (vector unsigned char)AVV(0), demux_v); |
12698 | 629 |
630 Y = (vector signed short) | |
13564 | 631 vec_perm (uyvy, (vector unsigned char)AVV(0), demux_y); |
12698 | 632 |
633 cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0); | |
634 | |
635 uyvy = vec_ld (16, img); | |
636 U = (vector signed short) | |
13564 | 637 vec_perm (uyvy, (vector unsigned char)AVV(0), demux_u); |
12698 | 638 |
639 V = (vector signed short) | |
13564 | 640 vec_perm (uyvy, (vector unsigned char)AVV(0), demux_v); |
12698 | 641 |
642 Y = (vector signed short) | |
13564 | 643 vec_perm (uyvy, (vector unsigned char)AVV(0), demux_y); |
12698 | 644 |
645 cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1); | |
646 | |
647 R = vec_packclp (R0,R1); | |
648 G = vec_packclp (G0,G1); | |
649 B = vec_packclp (B0,B1); | |
650 | |
651 // vec_mstbgr24 (R,G,B, out); | |
652 out_rgba (R,G,B,out); | |
653 | |
654 img += 32; | |
655 } | |
656 } | |
12836 | 657 return srcSliceH; |
12698 | 658 } |
659 | |
660 | |
661 | |
662 /* Ok currently the acceleration routine only supports | |
663 inputs of widths a multiple of 16 | |
664 and heights a multiple 2 | |
665 | |
666 So we just fall back to the C codes for this. | |
667 */ | |
668 SwsFunc yuv2rgb_init_altivec (SwsContext *c) | |
669 { | |
670 if (!(c->flags & SWS_CPU_CAPS_ALTIVEC)) | |
671 return NULL; | |
672 | |
673 /* | |
674 and this seems not to matter too much I tried a bunch of | |
675 videos with abnormal widths and mplayer crashes else where. | |
676 mplayer -vo x11 -rawvideo on:w=350:h=240 raw-350x240.eyuv | |
677 boom with X11 bad match. | |
678 | |
679 */ | |
680 if ((c->srcW & 0xf) != 0) return NULL; | |
681 | |
682 switch (c->srcFormat) { | |
683 case IMGFMT_YVU9: | |
684 case IMGFMT_IF09: | |
685 case IMGFMT_YV12: | |
686 case IMGFMT_I420: | |
687 case IMGFMT_IYUV: | |
688 case IMGFMT_CLPL: | |
689 case IMGFMT_Y800: | |
690 case IMGFMT_Y8: | |
691 case IMGFMT_NV12: | |
692 case IMGFMT_NV21: | |
693 if ((c->srcH & 0x1) != 0) | |
694 return NULL; | |
695 | |
696 switch(c->dstFormat){ | |
697 case IMGFMT_RGB24: | |
698 MSG_WARN("ALTIVEC: Color Space RGB24\n"); | |
699 return altivec_yuv2_rgb24; | |
700 case IMGFMT_BGR24: | |
701 MSG_WARN("ALTIVEC: Color Space BGR24\n"); | |
702 return altivec_yuv2_bgr24; | |
703 case IMGFMT_RGB32: | |
704 MSG_WARN("ALTIVEC: Color Space ARGB32\n"); | |
705 return altivec_yuv2_argb32; | |
706 case IMGFMT_BGR32: | |
707 MSG_WARN("ALTIVEC: Color Space BGRA32\n"); | |
708 // return profile_altivec_bgra32; | |
709 | |
710 return altivec_yuv2_bgra32; | |
711 default: return NULL; | |
712 } | |
713 break; | |
714 | |
715 case IMGFMT_UYVY: | |
716 switch(c->dstFormat){ | |
717 case IMGFMT_RGB32: | |
718 MSG_WARN("ALTIVEC: Color Space UYVY -> RGB32\n"); | |
719 return altivec_uyvy_rgb32; | |
720 default: return NULL; | |
721 } | |
722 break; | |
723 | |
724 } | |
725 return NULL; | |
726 } | |
727 | |
13564 | 728 static uint16_t roundToInt16(int64_t f){ |
729 int r= (f + (1<<15))>>16; | |
730 if(r<-0x7FFF) return 0x8000; | |
731 else if(r> 0x7FFF) return 0x7FFF; | |
732 else return r; | |
733 } | |
12698 | 734 |
13564 | 735 void yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation) |
736 { | |
737 union { | |
738 signed short tmp[8] __attribute__ ((aligned(16))); | |
739 vector signed short vec; | |
740 } buf; | |
12698 | 741 |
13564 | 742 buf.tmp[0] = ( (0xffffLL) * contrast>>8 )>>9; //cy |
743 buf.tmp[1] = -256*brightness; //oy | |
744 buf.tmp[2] = (inv_table[0]>>3) *(contrast>>16)*(saturation>>16); //crv | |
745 buf.tmp[3] = (inv_table[1]>>3) *(contrast>>16)*(saturation>>16); //cbu | |
746 buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16)); //cgu | |
747 buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16)); //cgv | |
12698 | 748 |
749 | |
13564 | 750 c->CSHIFT = (vector unsigned short)vec_splat((vector unsigned short)AVV(2),0); |
751 c->CY = vec_splat ((vector signed short)buf.vec, 0); | |
752 c->OY = vec_splat ((vector signed short)buf.vec, 1); | |
753 c->CRV = vec_splat ((vector signed short)buf.vec, 2); | |
754 c->CBU = vec_splat ((vector signed short)buf.vec, 3); | |
755 c->CGU = vec_splat ((vector signed short)buf.vec, 4); | |
756 c->CGV = vec_splat ((vector signed short)buf.vec, 5); | |
12836 | 757 #if 0 |
13564 | 758 { |
759 int i; | |
760 char *v[6]={"cy","oy","crv","cbu","cgu","cgv"}; | |
761 for (i=0; i<6;i++) | |
762 printf("%s %d ", v[i],buf.tmp[i] ); | |
763 printf("\n"); | |
764 } | |
12698 | 765 #endif |
12836 | 766 return; |
12698 | 767 } |
768 | |
769 | |
770 void | |
771 altivec_yuv2packedX (SwsContext *c, | |
772 int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, | |
773 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | |
774 uint8_t *dest, int dstW, int dstY) | |
775 { | |
776 int i,j; | |
777 short *f; | |
778 vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V; | |
779 vector signed short R0,G0,B0,R1,G1,B1; | |
780 | |
781 vector unsigned char R,G,B,pels[3]; | |
782 vector unsigned char *out,*nout; | |
13564 | 783 |
784 vector signed short RND = vec_splat((vector signed short)AVV(1<<3),0); | |
785 vector unsigned short SCL = vec_splat((vector unsigned short)AVV(4),0); | |
12698 | 786 unsigned long scratch[16] __attribute__ ((aligned (16))); |
787 | |
788 vector signed short *YCoeffs, *CCoeffs; | |
789 | |
17588
79081ba52e00
Move the v{Y,C}CoeffsBank vectors into the SwsContext, filling them in just
diego
parents:
17563
diff
changeset
|
790 YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize; |
79081ba52e00
Move the v{Y,C}CoeffsBank vectors into the SwsContext, filling them in just
diego
parents:
17563
diff
changeset
|
791 CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize; |
12698 | 792 |
793 out = (vector unsigned char *)dest; | |
794 | |
795 for(i=0; i<dstW; i+=16){ | |
796 Y0 = RND; | |
797 Y1 = RND; | |
798 /* extract 16 coeffs from lumSrc */ | |
799 for(j=0; j<lumFilterSize; j++) { | |
800 X0 = vec_ld (0, &lumSrc[j][i]); | |
801 X1 = vec_ld (16, &lumSrc[j][i]); | |
802 Y0 = vec_mradds (X0, YCoeffs[j], Y0); | |
803 Y1 = vec_mradds (X1, YCoeffs[j], Y1); | |
804 } | |
805 | |
806 U = RND; | |
807 V = RND; | |
808 /* extract 8 coeffs from U,V */ | |
809 for(j=0; j<chrFilterSize; j++) { | |
810 X = vec_ld (0, &chrSrc[j][i/2]); | |
811 U = vec_mradds (X, CCoeffs[j], U); | |
812 X = vec_ld (0, &chrSrc[j][i/2+2048]); | |
813 V = vec_mradds (X, CCoeffs[j], V); | |
814 } | |
815 | |
816 /* scale and clip signals */ | |
817 Y0 = vec_sra (Y0, SCL); | |
818 Y1 = vec_sra (Y1, SCL); | |
819 U = vec_sra (U, SCL); | |
820 V = vec_sra (V, SCL); | |
821 | |
822 Y0 = vec_clip (Y0); | |
823 Y1 = vec_clip (Y1); | |
824 U = vec_clip (U); | |
825 V = vec_clip (V); | |
826 | |
827 /* now we have | |
828 Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15 | |
829 U= u0 u1 u2 u3 u4 u5 u6 u7 V= v0 v1 v2 v3 v4 v5 v6 v7 | |
830 | |
831 Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15 | |
832 U0= u0 u0 u1 u1 u2 u2 u3 u3 U1= u4 u4 u5 u5 u6 u6 u7 u7 | |
833 V0= v0 v0 v1 v1 v2 v2 v3 v3 V1= v4 v4 v5 v5 v6 v6 v7 v7 | |
834 */ | |
835 | |
836 U0 = vec_mergeh (U,U); | |
837 V0 = vec_mergeh (V,V); | |
838 | |
839 U1 = vec_mergel (U,U); | |
840 V1 = vec_mergel (V,V); | |
841 | |
842 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0); | |
843 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1); | |
844 | |
845 R = vec_packclp (R0,R1); | |
846 G = vec_packclp (G0,G1); | |
847 B = vec_packclp (B0,B1); | |
848 | |
17560
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
849 switch(c->dstFormat) { |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
850 case IMGFMT_ABGR: out_abgr (R,G,B,out); break; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
851 case IMGFMT_BGRA: out_bgra (R,G,B,out); break; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
852 case IMGFMT_RGBA: out_rgba (R,G,B,out); break; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
853 case IMGFMT_ARGB: out_argb (R,G,B,out); break; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
854 case IMGFMT_RGB24: out_rgb24 (R,G,B,out); break; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
855 case IMGFMT_BGR24: out_bgr24 (R,G,B,out); break; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
856 default: |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
857 { |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
858 /* FIXME: either write more out_* macros or punt to yuv2packedXinC */ |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
859 static int printed_error_message; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
860 if(!printed_error_message) { |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
861 MSG_ERR("altivec_yuv2packedX doesn't support %s output\n", |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
862 vo_format_name(c->dstFormat)); |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
863 printed_error_message=1; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
864 } |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
865 return; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
866 } |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
867 } |
12698 | 868 } |
869 | |
870 if (i < dstW) { | |
871 i -= 16; | |
872 | |
873 Y0 = RND; | |
874 Y1 = RND; | |
875 /* extract 16 coeffs from lumSrc */ | |
876 for(j=0; j<lumFilterSize; j++) { | |
877 X0 = vec_ld (0, &lumSrc[j][i]); | |
878 X1 = vec_ld (16, &lumSrc[j][i]); | |
879 Y0 = vec_mradds (X0, YCoeffs[j], Y0); | |
880 Y1 = vec_mradds (X1, YCoeffs[j], Y1); | |
881 } | |
882 | |
883 U = RND; | |
884 V = RND; | |
885 /* extract 8 coeffs from U,V */ | |
886 for(j=0; j<chrFilterSize; j++) { | |
887 X = vec_ld (0, &chrSrc[j][i/2]); | |
888 U = vec_mradds (X, CCoeffs[j], U); | |
889 X = vec_ld (0, &chrSrc[j][i/2+2048]); | |
890 V = vec_mradds (X, CCoeffs[j], V); | |
891 } | |
892 | |
893 /* scale and clip signals */ | |
894 Y0 = vec_sra (Y0, SCL); | |
895 Y1 = vec_sra (Y1, SCL); | |
896 U = vec_sra (U, SCL); | |
897 V = vec_sra (V, SCL); | |
898 | |
899 Y0 = vec_clip (Y0); | |
900 Y1 = vec_clip (Y1); | |
901 U = vec_clip (U); | |
902 V = vec_clip (V); | |
903 | |
904 /* now we have | |
905 Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15 | |
906 U= u0 u1 u2 u3 u4 u5 u6 u7 V= v0 v1 v2 v3 v4 v5 v6 v7 | |
907 | |
908 Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15 | |
909 U0= u0 u0 u1 u1 u2 u2 u3 u3 U1= u4 u4 u5 u5 u6 u6 u7 u7 | |
910 V0= v0 v0 v1 v1 v2 v2 v3 v3 V1= v4 v4 v5 v5 v6 v6 v7 v7 | |
911 */ | |
912 | |
913 U0 = vec_mergeh (U,U); | |
914 V0 = vec_mergeh (V,V); | |
915 | |
916 U1 = vec_mergel (U,U); | |
917 V1 = vec_mergel (V,V); | |
918 | |
919 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0); | |
920 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1); | |
921 | |
922 R = vec_packclp (R0,R1); | |
923 G = vec_packclp (G0,G1); | |
924 B = vec_packclp (B0,B1); | |
925 | |
926 nout = (vector unsigned char *)scratch; | |
17560
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
927 switch(c->dstFormat) { |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
928 case IMGFMT_ABGR: out_abgr (R,G,B,nout); break; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
929 case IMGFMT_BGRA: out_bgra (R,G,B,nout); break; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
930 case IMGFMT_RGBA: out_rgba (R,G,B,nout); break; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
931 case IMGFMT_ARGB: out_argb (R,G,B,nout); break; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
932 case IMGFMT_RGB24: out_rgb24 (R,G,B,nout); break; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
933 case IMGFMT_BGR24: out_bgr24 (R,G,B,nout); break; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
934 default: |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
935 /* Unreachable, I think. */ |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
936 MSG_ERR("altivec_yuv2packedX doesn't support %s output\n", |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
937 vo_format_name(c->dstFormat)); |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
938 return; |
1a54f39404b9
altivec_yuv2packedX() ignores the requested output format and unconditionally
diego
parents:
17558
diff
changeset
|
939 } |
12698 | 940 |
941 memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4); | |
942 } | |
943 | |
944 } |