Mercurial > mplayer.hg
annotate libswscale/yuv2rgb_vis.c @ 28872:589e9fc4d4ce
Remove native nuv demuxer, it only needs more code to achieve the same thing
as the libavformat demuxer.
author | reimar |
---|---|
date | Mon, 09 Mar 2009 13:11:37 +0000 |
parents | 28aa6d8a23ba |
children | 1e56ea9937ce |
rev | line source |
---|---|
23805 | 1 /* |
2 * VIS optimized software YUV to RGB converter | |
3 * Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu> | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 #include <inttypes.h> | |
23 #include <stdlib.h> | |
24 | |
25 #include "swscale.h" | |
27001
afcf4c4d2505
Add missing #include, patch by Jan Knutar, jknutar nic fi.
diego
parents:
25750
diff
changeset
|
26 #include "swscale_internal.h" |
23805 | 27 |
28 #define YUV2RGB_INIT \ | |
29 "wr %%g0, 0x10, %%gsr \n\t" \ | |
30 "ldd [%5], %%f32 \n\t" \ | |
31 "ldd [%5+8], %%f34 \n\t" \ | |
32 "ldd [%5+16], %%f36 \n\t" \ | |
33 "ldd [%5+24], %%f38 \n\t" \ | |
34 "ldd [%5+32], %%f40 \n\t" \ | |
35 "ldd [%5+40], %%f42 \n\t" \ | |
36 "ldd [%5+48], %%f44 \n\t" \ | |
37 "ldd [%5+56], %%f46 \n\t" \ | |
38 "ldd [%5+64], %%f48 \n\t" \ | |
39 "ldd [%5+72], %%f50 \n\t" | |
40 | |
41 #define YUV2RGB_KERNEL \ | |
42 /* ^^^^ f0=Y f3=u f5=v */ \ | |
43 "fmul8x16 %%f3, %%f48, %%f6 \n\t" \ | |
44 "fmul8x16 %%f19, %%f48, %%f22 \n\t" \ | |
45 "fmul8x16 %%f5, %%f44, %%f8 \n\t" \ | |
46 "fmul8x16 %%f21, %%f44, %%f24 \n\t" \ | |
47 "fmul8x16 %%f0, %%f42, %%f0 \n\t" \ | |
48 "fmul8x16 %%f16, %%f42, %%f16 \n\t" \ | |
49 "fmul8x16 %%f3, %%f50, %%f2 \n\t" \ | |
50 "fmul8x16 %%f19, %%f50, %%f18 \n\t" \ | |
51 "fmul8x16 %%f5, %%f46, %%f4 \n\t" \ | |
52 "fmul8x16 %%f21, %%f46, %%f20 \n\t" \ | |
53 \ | |
54 "fpsub16 %%f6, %%f34, %%f6 \n\t" /* 1 */ \ | |
55 "fpsub16 %%f22, %%f34, %%f22 \n\t" /* 1 */ \ | |
56 "fpsub16 %%f8, %%f38, %%f8 \n\t" /* 3 */ \ | |
57 "fpsub16 %%f24, %%f38, %%f24 \n\t" /* 3 */ \ | |
58 "fpsub16 %%f0, %%f32, %%f0 \n\t" /* 0 */ \ | |
59 "fpsub16 %%f16, %%f32, %%f16 \n\t" /* 0 */ \ | |
60 "fpsub16 %%f2, %%f36, %%f2 \n\t" /* 2 */ \ | |
61 "fpsub16 %%f18, %%f36, %%f18 \n\t" /* 2 */ \ | |
62 "fpsub16 %%f4, %%f40, %%f4 \n\t" /* 4 */ \ | |
63 "fpsub16 %%f20, %%f40, %%f20 \n\t" /* 4 */ \ | |
64 \ | |
65 "fpadd16 %%f0, %%f8, %%f8 \n\t" /* Gt */ \ | |
66 "fpadd16 %%f16, %%f24, %%f24 \n\t" /* Gt */ \ | |
67 "fpadd16 %%f0, %%f4, %%f4 \n\t" /* R */ \ | |
68 "fpadd16 %%f16, %%f20, %%f20 \n\t" /* R */ \ | |
69 "fpadd16 %%f0, %%f6, %%f6 \n\t" /* B */ \ | |
70 "fpadd16 %%f16, %%f22, %%f22 \n\t" /* B */ \ | |
71 "fpadd16 %%f8, %%f2, %%f2 \n\t" /* G */ \ | |
72 "fpadd16 %%f24, %%f18, %%f18 \n\t" /* G */ \ | |
73 \ | |
74 "fpack16 %%f4, %%f4 \n\t" \ | |
75 "fpack16 %%f20, %%f20 \n\t" \ | |
76 "fpack16 %%f6, %%f6 \n\t" \ | |
77 "fpack16 %%f22, %%f22 \n\t" \ | |
78 "fpack16 %%f2, %%f2 \n\t" \ | |
79 "fpack16 %%f18, %%f18 \n\t" | |
80 | |
81 | |
82 | |
28742 | 83 // FIXME: must be changed to set alpha to 255 instead of 0 |
23805 | 84 static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, |
85 int srcSliceH, uint8_t* dst[], int dstStride[]){ | |
86 int y, out1, out2, out3, out4, out5, out6; | |
87 | |
88 for(y=0;y < srcSliceH;++y) { | |
27744 | 89 __asm__ volatile ( |
23805 | 90 YUV2RGB_INIT |
91 "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ | |
92 "1: \n\t" | |
93 "ldda [%1] %%asi, %%f2 \n\t" | |
94 "ldda [%1+2] %%asi, %%f18 \n\t" | |
95 "ldda [%2] %%asi, %%f4 \n\t" | |
96 "ldda [%2+2] %%asi, %%f20 \n\t" | |
97 "ld [%0], %%f0 \n\t" | |
98 "ld [%0+4], %%f16 \n\t" | |
99 "fpmerge %%f3, %%f3, %%f2 \n\t" | |
100 "fpmerge %%f19, %%f19, %%f18 \n\t" | |
101 "fpmerge %%f5, %%f5, %%f4 \n\t" | |
102 "fpmerge %%f21, %%f21, %%f20 \n\t" | |
103 YUV2RGB_KERNEL | |
104 "fzero %%f0 \n\t" | |
105 "fpmerge %%f4, %%f6, %%f8 \n\t" // r,b,t1 | |
106 "fpmerge %%f20, %%f22, %%f24 \n\t" // r,b,t1 | |
107 "fpmerge %%f0, %%f2, %%f10 \n\t" // 0,g,t2 | |
108 "fpmerge %%f0, %%f18, %%f26 \n\t" // 0,g,t2 | |
109 "fpmerge %%f10, %%f8, %%f4 \n\t" // t2,t1,msb | |
110 "fpmerge %%f26, %%f24, %%f20 \n\t" // t2,t1,msb | |
111 "fpmerge %%f11, %%f9, %%f6 \n\t" // t2,t1,lsb | |
112 "fpmerge %%f27, %%f25, %%f22 \n\t" // t2,t1,lsb | |
113 "std %%f4, [%3] \n\t" | |
114 "std %%f20, [%3+16] \n\t" | |
115 "std %%f6, [%3+8] \n\t" | |
116 "std %%f22, [%3+24] \n\t" | |
117 | |
118 "add %0, 8, %0 \n\t" | |
119 "add %1, 4, %1 \n\t" | |
120 "add %2, 4, %2 \n\t" | |
121 "subcc %4, 8, %4 \n\t" | |
122 "bne 1b \n\t" | |
123 "add %3, 32, %3 \n\t" //delay slot | |
124 : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) | |
125 : "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+((y+srcSliceY)>>1)*srcStride[1]), | |
126 "2" (src[2]+((y+srcSliceY)>>1)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]), | |
127 "4" (c->dstW), | |
128 "5" (c->sparc_coeffs) | |
129 ); | |
130 } | |
131 | |
132 return srcSliceH; | |
133 } | |
134 | |
28742 | 135 // FIXME: must be changed to set alpha to 255 instead of 0 |
23805 | 136 static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, |
137 int srcSliceH, uint8_t* dst[], int dstStride[]){ | |
138 int y, out1, out2, out3, out4, out5, out6; | |
139 | |
140 for(y=0;y < srcSliceH;++y) { | |
27744 | 141 __asm__ volatile ( |
23805 | 142 YUV2RGB_INIT |
143 "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ | |
144 "1: \n\t" | |
145 "ldda [%1] %%asi, %%f2 \n\t" | |
146 "ldda [%1+2] %%asi, %%f18 \n\t" | |
147 "ldda [%2] %%asi, %%f4 \n\t" | |
148 "ldda [%2+2] %%asi, %%f20 \n\t" | |
149 "ld [%0], %%f0 \n\t" | |
150 "ld [%0+4], %%f16 \n\t" | |
151 "fpmerge %%f3, %%f3, %%f2 \n\t" | |
152 "fpmerge %%f19, %%f19, %%f18 \n\t" | |
153 "fpmerge %%f5, %%f5, %%f4 \n\t" | |
154 "fpmerge %%f21, %%f21, %%f20 \n\t" | |
155 YUV2RGB_KERNEL | |
156 "fzero %%f0 \n\t" | |
157 "fpmerge %%f4, %%f6, %%f8 \n\t" // r,b,t1 | |
158 "fpmerge %%f20, %%f22, %%f24 \n\t" // r,b,t1 | |
159 "fpmerge %%f0, %%f2, %%f10 \n\t" // 0,g,t2 | |
160 "fpmerge %%f0, %%f18, %%f26 \n\t" // 0,g,t2 | |
161 "fpmerge %%f10, %%f8, %%f4 \n\t" // t2,t1,msb | |
162 "fpmerge %%f26, %%f24, %%f20 \n\t" // t2,t1,msb | |
163 "fpmerge %%f11, %%f9, %%f6 \n\t" // t2,t1,lsb | |
164 "fpmerge %%f27, %%f25, %%f22 \n\t" // t2,t1,lsb | |
165 "std %%f4, [%3] \n\t" | |
166 "std %%f20, [%3+16] \n\t" | |
167 "std %%f6, [%3+8] \n\t" | |
168 "std %%f22, [%3+24] \n\t" | |
169 | |
170 "add %0, 8, %0 \n\t" | |
171 "add %1, 4, %1 \n\t" | |
172 "add %2, 4, %2 \n\t" | |
173 "subcc %4, 8, %4 \n\t" | |
174 "bne 1b \n\t" | |
175 "add %3, 32, %3 \n\t" //delay slot | |
176 : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) | |
177 : "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+(y+srcSliceY)*srcStride[1]), | |
178 "2" (src[2]+(y+srcSliceY)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]), | |
179 "4" (c->dstW), | |
180 "5" (c->sparc_coeffs) | |
181 ); | |
182 } | |
183 | |
184 return srcSliceH; | |
185 } | |
186 | |
28461 | 187 SwsFunc sws_yuv2rgb_init_vis(SwsContext *c) { |
23805 | 188 c->sparc_coeffs[5]=c->yCoeff; |
189 c->sparc_coeffs[6]=c->vgCoeff; | |
190 c->sparc_coeffs[7]=c->vrCoeff; | |
191 c->sparc_coeffs[8]=c->ubCoeff; | |
192 c->sparc_coeffs[9]=c->ugCoeff; | |
193 | |
25750 | 194 c->sparc_coeffs[0]=(((int16_t)c->yOffset*(int16_t)c->yCoeff >>11) & 0xffff) * 0x0001000100010001ULL; |
23805 | 195 c->sparc_coeffs[1]=(((int16_t)c->uOffset*(int16_t)c->ubCoeff>>11) & 0xffff) * 0x0001000100010001ULL; |
196 c->sparc_coeffs[2]=(((int16_t)c->uOffset*(int16_t)c->ugCoeff>>11) & 0xffff) * 0x0001000100010001ULL; | |
197 c->sparc_coeffs[3]=(((int16_t)c->vOffset*(int16_t)c->vgCoeff>>11) & 0xffff) * 0x0001000100010001ULL; | |
198 c->sparc_coeffs[4]=(((int16_t)c->vOffset*(int16_t)c->vrCoeff>>11) & 0xffff) * 0x0001000100010001ULL; | |
199 | |
200 if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV422P && (c->dstW & 7)==0) { | |
28741
b6e1b6af8e99
Add warnings to yuv2rgb_vis.c because alpha is set wrong (0 instead of 255).
reimar
parents:
28461
diff
changeset
|
201 av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV422P -> RGB32 (WARNING: alpha value is wrong)\n"); |
23805 | 202 return vis_422P_ARGB32; |
203 } | |
204 else if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV420P && (c->dstW & 7)==0) { | |
28741
b6e1b6af8e99
Add warnings to yuv2rgb_vis.c because alpha is set wrong (0 instead of 255).
reimar
parents:
28461
diff
changeset
|
205 av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV420P -> RGB32 (WARNING: alpha value is wrong)\n"); |
23805 | 206 return vis_420P_ARGB32; |
207 } | |
208 return NULL; | |
209 } |