Mercurial > mplayer.hg
annotate libswscale/yuv2rgb_vis.c @ 27815:f92271dc5f17
Remove X11 backing store: this is now a useless flag.
Also, it is mandatory for Xserver 1.5.x (part of Xorg 7.4, shipped on all
Linux distributions starting from Oct. 08) and will be removed
from Xserver 1.6 anyhow ...
Patch by Stephane Marchesin (marchesin at icps dot u dash strasbg dot fr).
For more info, see long flame thread at:
http://lists.mplayerhq.hu/pipermail/mplayer-dev-eng/2008-August/058323.html
author | ben |
---|---|
date | Wed, 29 Oct 2008 22:03:36 +0000 |
parents | 7b83cbade239 |
children | 75586eb0750d |
rev | line source |
---|---|
23805 | 1 /* |
2 * VIS optimized software YUV to RGB converter | |
3 * Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu> | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 #include <inttypes.h> | |
23 #include <stdlib.h> | |
24 | |
25 #include "swscale.h" | |
27001
afcf4c4d2505
Add missing #include, patch by Jan Knutar, jknutar nic fi.
diego
parents:
25750
diff
changeset
|
26 #include "swscale_internal.h" |
23805 | 27 |
28 #define YUV2RGB_INIT \ | |
29 "wr %%g0, 0x10, %%gsr \n\t" \ | |
30 "ldd [%5], %%f32 \n\t" \ | |
31 "ldd [%5+8], %%f34 \n\t" \ | |
32 "ldd [%5+16], %%f36 \n\t" \ | |
33 "ldd [%5+24], %%f38 \n\t" \ | |
34 "ldd [%5+32], %%f40 \n\t" \ | |
35 "ldd [%5+40], %%f42 \n\t" \ | |
36 "ldd [%5+48], %%f44 \n\t" \ | |
37 "ldd [%5+56], %%f46 \n\t" \ | |
38 "ldd [%5+64], %%f48 \n\t" \ | |
39 "ldd [%5+72], %%f50 \n\t" | |
40 | |
41 #define YUV2RGB_KERNEL \ | |
42 /* ^^^^ f0=Y f3=u f5=v */ \ | |
43 "fmul8x16 %%f3, %%f48, %%f6 \n\t" \ | |
44 "fmul8x16 %%f19, %%f48, %%f22 \n\t" \ | |
45 "fmul8x16 %%f5, %%f44, %%f8 \n\t" \ | |
46 "fmul8x16 %%f21, %%f44, %%f24 \n\t" \ | |
47 "fmul8x16 %%f0, %%f42, %%f0 \n\t" \ | |
48 "fmul8x16 %%f16, %%f42, %%f16 \n\t" \ | |
49 "fmul8x16 %%f3, %%f50, %%f2 \n\t" \ | |
50 "fmul8x16 %%f19, %%f50, %%f18 \n\t" \ | |
51 "fmul8x16 %%f5, %%f46, %%f4 \n\t" \ | |
52 "fmul8x16 %%f21, %%f46, %%f20 \n\t" \ | |
53 \ | |
54 "fpsub16 %%f6, %%f34, %%f6 \n\t" /* 1 */ \ | |
55 "fpsub16 %%f22, %%f34, %%f22 \n\t" /* 1 */ \ | |
56 "fpsub16 %%f8, %%f38, %%f8 \n\t" /* 3 */ \ | |
57 "fpsub16 %%f24, %%f38, %%f24 \n\t" /* 3 */ \ | |
58 "fpsub16 %%f0, %%f32, %%f0 \n\t" /* 0 */ \ | |
59 "fpsub16 %%f16, %%f32, %%f16 \n\t" /* 0 */ \ | |
60 "fpsub16 %%f2, %%f36, %%f2 \n\t" /* 2 */ \ | |
61 "fpsub16 %%f18, %%f36, %%f18 \n\t" /* 2 */ \ | |
62 "fpsub16 %%f4, %%f40, %%f4 \n\t" /* 4 */ \ | |
63 "fpsub16 %%f20, %%f40, %%f20 \n\t" /* 4 */ \ | |
64 \ | |
65 "fpadd16 %%f0, %%f8, %%f8 \n\t" /* Gt */ \ | |
66 "fpadd16 %%f16, %%f24, %%f24 \n\t" /* Gt */ \ | |
67 "fpadd16 %%f0, %%f4, %%f4 \n\t" /* R */ \ | |
68 "fpadd16 %%f16, %%f20, %%f20 \n\t" /* R */ \ | |
69 "fpadd16 %%f0, %%f6, %%f6 \n\t" /* B */ \ | |
70 "fpadd16 %%f16, %%f22, %%f22 \n\t" /* B */ \ | |
71 "fpadd16 %%f8, %%f2, %%f2 \n\t" /* G */ \ | |
72 "fpadd16 %%f24, %%f18, %%f18 \n\t" /* G */ \ | |
73 \ | |
74 "fpack16 %%f4, %%f4 \n\t" \ | |
75 "fpack16 %%f20, %%f20 \n\t" \ | |
76 "fpack16 %%f6, %%f6 \n\t" \ | |
77 "fpack16 %%f22, %%f22 \n\t" \ | |
78 "fpack16 %%f2, %%f2 \n\t" \ | |
79 "fpack16 %%f18, %%f18 \n\t" | |
80 | |
81 | |
82 | |
83 static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |
84 int srcSliceH, uint8_t* dst[], int dstStride[]){ | |
85 int y, out1, out2, out3, out4, out5, out6; | |
86 | |
87 for(y=0;y < srcSliceH;++y) { | |
27744 | 88 __asm__ volatile ( |
23805 | 89 YUV2RGB_INIT |
90 "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ | |
91 "1: \n\t" | |
92 "ldda [%1] %%asi, %%f2 \n\t" | |
93 "ldda [%1+2] %%asi, %%f18 \n\t" | |
94 "ldda [%2] %%asi, %%f4 \n\t" | |
95 "ldda [%2+2] %%asi, %%f20 \n\t" | |
96 "ld [%0], %%f0 \n\t" | |
97 "ld [%0+4], %%f16 \n\t" | |
98 "fpmerge %%f3, %%f3, %%f2 \n\t" | |
99 "fpmerge %%f19, %%f19, %%f18 \n\t" | |
100 "fpmerge %%f5, %%f5, %%f4 \n\t" | |
101 "fpmerge %%f21, %%f21, %%f20 \n\t" | |
102 YUV2RGB_KERNEL | |
103 "fzero %%f0 \n\t" | |
104 "fpmerge %%f4, %%f6, %%f8 \n\t" // r,b,t1 | |
105 "fpmerge %%f20, %%f22, %%f24 \n\t" // r,b,t1 | |
106 "fpmerge %%f0, %%f2, %%f10 \n\t" // 0,g,t2 | |
107 "fpmerge %%f0, %%f18, %%f26 \n\t" // 0,g,t2 | |
108 "fpmerge %%f10, %%f8, %%f4 \n\t" // t2,t1,msb | |
109 "fpmerge %%f26, %%f24, %%f20 \n\t" // t2,t1,msb | |
110 "fpmerge %%f11, %%f9, %%f6 \n\t" // t2,t1,lsb | |
111 "fpmerge %%f27, %%f25, %%f22 \n\t" // t2,t1,lsb | |
112 "std %%f4, [%3] \n\t" | |
113 "std %%f20, [%3+16] \n\t" | |
114 "std %%f6, [%3+8] \n\t" | |
115 "std %%f22, [%3+24] \n\t" | |
116 | |
117 "add %0, 8, %0 \n\t" | |
118 "add %1, 4, %1 \n\t" | |
119 "add %2, 4, %2 \n\t" | |
120 "subcc %4, 8, %4 \n\t" | |
121 "bne 1b \n\t" | |
122 "add %3, 32, %3 \n\t" //delay slot | |
123 : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) | |
124 : "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+((y+srcSliceY)>>1)*srcStride[1]), | |
125 "2" (src[2]+((y+srcSliceY)>>1)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]), | |
126 "4" (c->dstW), | |
127 "5" (c->sparc_coeffs) | |
128 ); | |
129 } | |
130 | |
131 return srcSliceH; | |
132 } | |
133 | |
134 static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |
135 int srcSliceH, uint8_t* dst[], int dstStride[]){ | |
136 int y, out1, out2, out3, out4, out5, out6; | |
137 | |
138 for(y=0;y < srcSliceH;++y) { | |
27744 | 139 __asm__ volatile ( |
23805 | 140 YUV2RGB_INIT |
141 "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ | |
142 "1: \n\t" | |
143 "ldda [%1] %%asi, %%f2 \n\t" | |
144 "ldda [%1+2] %%asi, %%f18 \n\t" | |
145 "ldda [%2] %%asi, %%f4 \n\t" | |
146 "ldda [%2+2] %%asi, %%f20 \n\t" | |
147 "ld [%0], %%f0 \n\t" | |
148 "ld [%0+4], %%f16 \n\t" | |
149 "fpmerge %%f3, %%f3, %%f2 \n\t" | |
150 "fpmerge %%f19, %%f19, %%f18 \n\t" | |
151 "fpmerge %%f5, %%f5, %%f4 \n\t" | |
152 "fpmerge %%f21, %%f21, %%f20 \n\t" | |
153 YUV2RGB_KERNEL | |
154 "fzero %%f0 \n\t" | |
155 "fpmerge %%f4, %%f6, %%f8 \n\t" // r,b,t1 | |
156 "fpmerge %%f20, %%f22, %%f24 \n\t" // r,b,t1 | |
157 "fpmerge %%f0, %%f2, %%f10 \n\t" // 0,g,t2 | |
158 "fpmerge %%f0, %%f18, %%f26 \n\t" // 0,g,t2 | |
159 "fpmerge %%f10, %%f8, %%f4 \n\t" // t2,t1,msb | |
160 "fpmerge %%f26, %%f24, %%f20 \n\t" // t2,t1,msb | |
161 "fpmerge %%f11, %%f9, %%f6 \n\t" // t2,t1,lsb | |
162 "fpmerge %%f27, %%f25, %%f22 \n\t" // t2,t1,lsb | |
163 "std %%f4, [%3] \n\t" | |
164 "std %%f20, [%3+16] \n\t" | |
165 "std %%f6, [%3+8] \n\t" | |
166 "std %%f22, [%3+24] \n\t" | |
167 | |
168 "add %0, 8, %0 \n\t" | |
169 "add %1, 4, %1 \n\t" | |
170 "add %2, 4, %2 \n\t" | |
171 "subcc %4, 8, %4 \n\t" | |
172 "bne 1b \n\t" | |
173 "add %3, 32, %3 \n\t" //delay slot | |
174 : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) | |
175 : "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+(y+srcSliceY)*srcStride[1]), | |
176 "2" (src[2]+(y+srcSliceY)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]), | |
177 "4" (c->dstW), | |
178 "5" (c->sparc_coeffs) | |
179 ); | |
180 } | |
181 | |
182 return srcSliceH; | |
183 } | |
184 | |
185 SwsFunc yuv2rgb_init_vis(SwsContext *c) { | |
186 c->sparc_coeffs[5]=c->yCoeff; | |
187 c->sparc_coeffs[6]=c->vgCoeff; | |
188 c->sparc_coeffs[7]=c->vrCoeff; | |
189 c->sparc_coeffs[8]=c->ubCoeff; | |
190 c->sparc_coeffs[9]=c->ugCoeff; | |
191 | |
25750 | 192 c->sparc_coeffs[0]=(((int16_t)c->yOffset*(int16_t)c->yCoeff >>11) & 0xffff) * 0x0001000100010001ULL; |
23805 | 193 c->sparc_coeffs[1]=(((int16_t)c->uOffset*(int16_t)c->ubCoeff>>11) & 0xffff) * 0x0001000100010001ULL; |
194 c->sparc_coeffs[2]=(((int16_t)c->uOffset*(int16_t)c->ugCoeff>>11) & 0xffff) * 0x0001000100010001ULL; | |
195 c->sparc_coeffs[3]=(((int16_t)c->vOffset*(int16_t)c->vgCoeff>>11) & 0xffff) * 0x0001000100010001ULL; | |
196 c->sparc_coeffs[4]=(((int16_t)c->vOffset*(int16_t)c->vrCoeff>>11) & 0xffff) * 0x0001000100010001ULL; | |
197 | |
198 if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV422P && (c->dstW & 7)==0) { | |
199 av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV422P -> RGB32\n"); | |
200 return vis_422P_ARGB32; | |
201 } | |
202 else if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV420P && (c->dstW & 7)==0) { | |
203 av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV420P -> RGB32\n"); | |
204 return vis_420P_ARGB32; | |
205 } | |
206 return NULL; | |
207 } |