Mercurial > mplayer.hg
annotate libswscale/yuv2rgb_vis.c @ 27762:7efce8fe3a04
fixed image format detection for 15 bit color depths
author | faust3 |
---|---|
date | Fri, 17 Oct 2008 10:01:44 +0000 |
parents | 7b83cbade239 |
children | 75586eb0750d |
rev | line source |
---|---|
23805 | 1 /* |
2 * VIS optimized software YUV to RGB converter | |
3 * Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu> | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 #include <inttypes.h> | |
23 #include <stdlib.h> | |
24 | |
25 #include "swscale.h" | |
27001
afcf4c4d2505
Add missing #include, patch by Jan Knutar, jknutar nic fi.
diego
parents:
25750
diff
changeset
|
26 #include "swscale_internal.h" |
23805 | 27 |
28 #define YUV2RGB_INIT \ | |
29 "wr %%g0, 0x10, %%gsr \n\t" \ | |
30 "ldd [%5], %%f32 \n\t" \ | |
31 "ldd [%5+8], %%f34 \n\t" \ | |
32 "ldd [%5+16], %%f36 \n\t" \ | |
33 "ldd [%5+24], %%f38 \n\t" \ | |
34 "ldd [%5+32], %%f40 \n\t" \ | |
35 "ldd [%5+40], %%f42 \n\t" \ | |
36 "ldd [%5+48], %%f44 \n\t" \ | |
37 "ldd [%5+56], %%f46 \n\t" \ | |
38 "ldd [%5+64], %%f48 \n\t" \ | |
39 "ldd [%5+72], %%f50 \n\t" | |
40 | |
41 #define YUV2RGB_KERNEL \ | |
42 /* ^^^^ f0=Y f3=u f5=v */ \ | |
43 "fmul8x16 %%f3, %%f48, %%f6 \n\t" \ | |
44 "fmul8x16 %%f19, %%f48, %%f22 \n\t" \ | |
45 "fmul8x16 %%f5, %%f44, %%f8 \n\t" \ | |
46 "fmul8x16 %%f21, %%f44, %%f24 \n\t" \ | |
47 "fmul8x16 %%f0, %%f42, %%f0 \n\t" \ | |
48 "fmul8x16 %%f16, %%f42, %%f16 \n\t" \ | |
49 "fmul8x16 %%f3, %%f50, %%f2 \n\t" \ | |
50 "fmul8x16 %%f19, %%f50, %%f18 \n\t" \ | |
51 "fmul8x16 %%f5, %%f46, %%f4 \n\t" \ | |
52 "fmul8x16 %%f21, %%f46, %%f20 \n\t" \ | |
53 \ | |
54 "fpsub16 %%f6, %%f34, %%f6 \n\t" /* 1 */ \ | |
55 "fpsub16 %%f22, %%f34, %%f22 \n\t" /* 1 */ \ | |
56 "fpsub16 %%f8, %%f38, %%f8 \n\t" /* 3 */ \ | |
57 "fpsub16 %%f24, %%f38, %%f24 \n\t" /* 3 */ \ | |
58 "fpsub16 %%f0, %%f32, %%f0 \n\t" /* 0 */ \ | |
59 "fpsub16 %%f16, %%f32, %%f16 \n\t" /* 0 */ \ | |
60 "fpsub16 %%f2, %%f36, %%f2 \n\t" /* 2 */ \ | |
61 "fpsub16 %%f18, %%f36, %%f18 \n\t" /* 2 */ \ | |
62 "fpsub16 %%f4, %%f40, %%f4 \n\t" /* 4 */ \ | |
63 "fpsub16 %%f20, %%f40, %%f20 \n\t" /* 4 */ \ | |
64 \ | |
65 "fpadd16 %%f0, %%f8, %%f8 \n\t" /* Gt */ \ | |
66 "fpadd16 %%f16, %%f24, %%f24 \n\t" /* Gt */ \ | |
67 "fpadd16 %%f0, %%f4, %%f4 \n\t" /* R */ \ | |
68 "fpadd16 %%f16, %%f20, %%f20 \n\t" /* R */ \ | |
69 "fpadd16 %%f0, %%f6, %%f6 \n\t" /* B */ \ | |
70 "fpadd16 %%f16, %%f22, %%f22 \n\t" /* B */ \ | |
71 "fpadd16 %%f8, %%f2, %%f2 \n\t" /* G */ \ | |
72 "fpadd16 %%f24, %%f18, %%f18 \n\t" /* G */ \ | |
73 \ | |
74 "fpack16 %%f4, %%f4 \n\t" \ | |
75 "fpack16 %%f20, %%f20 \n\t" \ | |
76 "fpack16 %%f6, %%f6 \n\t" \ | |
77 "fpack16 %%f22, %%f22 \n\t" \ | |
78 "fpack16 %%f2, %%f2 \n\t" \ | |
79 "fpack16 %%f18, %%f18 \n\t" | |
80 | |
81 | |
82 | |
83 static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |
84 int srcSliceH, uint8_t* dst[], int dstStride[]){ | |
85 int y, out1, out2, out3, out4, out5, out6; | |
86 | |
87 for(y=0;y < srcSliceH;++y) { | |
27744 | 88 __asm__ volatile ( |
23805 | 89 YUV2RGB_INIT |
90 "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ | |
91 "1: \n\t" | |
92 "ldda [%1] %%asi, %%f2 \n\t" | |
93 "ldda [%1+2] %%asi, %%f18 \n\t" | |
94 "ldda [%2] %%asi, %%f4 \n\t" | |
95 "ldda [%2+2] %%asi, %%f20 \n\t" | |
96 "ld [%0], %%f0 \n\t" | |
97 "ld [%0+4], %%f16 \n\t" | |
98 "fpmerge %%f3, %%f3, %%f2 \n\t" | |
99 "fpmerge %%f19, %%f19, %%f18 \n\t" | |
100 "fpmerge %%f5, %%f5, %%f4 \n\t" | |
101 "fpmerge %%f21, %%f21, %%f20 \n\t" | |
102 YUV2RGB_KERNEL | |
103 "fzero %%f0 \n\t" | |
104 "fpmerge %%f4, %%f6, %%f8 \n\t" // r,b,t1 | |
105 "fpmerge %%f20, %%f22, %%f24 \n\t" // r,b,t1 | |
106 "fpmerge %%f0, %%f2, %%f10 \n\t" // 0,g,t2 | |
107 "fpmerge %%f0, %%f18, %%f26 \n\t" // 0,g,t2 | |
108 "fpmerge %%f10, %%f8, %%f4 \n\t" // t2,t1,msb | |
109 "fpmerge %%f26, %%f24, %%f20 \n\t" // t2,t1,msb | |
110 "fpmerge %%f11, %%f9, %%f6 \n\t" // t2,t1,lsb | |
111 "fpmerge %%f27, %%f25, %%f22 \n\t" // t2,t1,lsb | |
112 "std %%f4, [%3] \n\t" | |
113 "std %%f20, [%3+16] \n\t" | |
114 "std %%f6, [%3+8] \n\t" | |
115 "std %%f22, [%3+24] \n\t" | |
116 | |
117 "add %0, 8, %0 \n\t" | |
118 "add %1, 4, %1 \n\t" | |
119 "add %2, 4, %2 \n\t" | |
120 "subcc %4, 8, %4 \n\t" | |
121 "bne 1b \n\t" | |
122 "add %3, 32, %3 \n\t" //delay slot | |
123 : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) | |
124 : "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+((y+srcSliceY)>>1)*srcStride[1]), | |
125 "2" (src[2]+((y+srcSliceY)>>1)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]), | |
126 "4" (c->dstW), | |
127 "5" (c->sparc_coeffs) | |
128 ); | |
129 } | |
130 | |
131 return srcSliceH; | |
132 } | |
133 | |
134 static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |
135 int srcSliceH, uint8_t* dst[], int dstStride[]){ | |
136 int y, out1, out2, out3, out4, out5, out6; | |
137 | |
138 for(y=0;y < srcSliceH;++y) { | |
27744 | 139 __asm__ volatile ( |
23805 | 140 YUV2RGB_INIT |
141 "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ | |
142 "1: \n\t" | |
143 "ldda [%1] %%asi, %%f2 \n\t" | |
144 "ldda [%1+2] %%asi, %%f18 \n\t" | |
145 "ldda [%2] %%asi, %%f4 \n\t" | |
146 "ldda [%2+2] %%asi, %%f20 \n\t" | |
147 "ld [%0], %%f0 \n\t" | |
148 "ld [%0+4], %%f16 \n\t" | |
149 "fpmerge %%f3, %%f3, %%f2 \n\t" | |
150 "fpmerge %%f19, %%f19, %%f18 \n\t" | |
151 "fpmerge %%f5, %%f5, %%f4 \n\t" | |
152 "fpmerge %%f21, %%f21, %%f20 \n\t" | |
153 YUV2RGB_KERNEL | |
154 "fzero %%f0 \n\t" | |
155 "fpmerge %%f4, %%f6, %%f8 \n\t" // r,b,t1 | |
156 "fpmerge %%f20, %%f22, %%f24 \n\t" // r,b,t1 | |
157 "fpmerge %%f0, %%f2, %%f10 \n\t" // 0,g,t2 | |
158 "fpmerge %%f0, %%f18, %%f26 \n\t" // 0,g,t2 | |
159 "fpmerge %%f10, %%f8, %%f4 \n\t" // t2,t1,msb | |
160 "fpmerge %%f26, %%f24, %%f20 \n\t" // t2,t1,msb | |
161 "fpmerge %%f11, %%f9, %%f6 \n\t" // t2,t1,lsb | |
162 "fpmerge %%f27, %%f25, %%f22 \n\t" // t2,t1,lsb | |
163 "std %%f4, [%3] \n\t" | |
164 "std %%f20, [%3+16] \n\t" | |
165 "std %%f6, [%3+8] \n\t" | |
166 "std %%f22, [%3+24] \n\t" | |
167 | |
168 "add %0, 8, %0 \n\t" | |
169 "add %1, 4, %1 \n\t" | |
170 "add %2, 4, %2 \n\t" | |
171 "subcc %4, 8, %4 \n\t" | |
172 "bne 1b \n\t" | |
173 "add %3, 32, %3 \n\t" //delay slot | |
174 : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) | |
175 : "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+(y+srcSliceY)*srcStride[1]), | |
176 "2" (src[2]+(y+srcSliceY)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]), | |
177 "4" (c->dstW), | |
178 "5" (c->sparc_coeffs) | |
179 ); | |
180 } | |
181 | |
182 return srcSliceH; | |
183 } | |
184 | |
185 SwsFunc yuv2rgb_init_vis(SwsContext *c) { | |
186 c->sparc_coeffs[5]=c->yCoeff; | |
187 c->sparc_coeffs[6]=c->vgCoeff; | |
188 c->sparc_coeffs[7]=c->vrCoeff; | |
189 c->sparc_coeffs[8]=c->ubCoeff; | |
190 c->sparc_coeffs[9]=c->ugCoeff; | |
191 | |
25750 | 192 c->sparc_coeffs[0]=(((int16_t)c->yOffset*(int16_t)c->yCoeff >>11) & 0xffff) * 0x0001000100010001ULL; |
23805 | 193 c->sparc_coeffs[1]=(((int16_t)c->uOffset*(int16_t)c->ubCoeff>>11) & 0xffff) * 0x0001000100010001ULL; |
194 c->sparc_coeffs[2]=(((int16_t)c->uOffset*(int16_t)c->ugCoeff>>11) & 0xffff) * 0x0001000100010001ULL; | |
195 c->sparc_coeffs[3]=(((int16_t)c->vOffset*(int16_t)c->vgCoeff>>11) & 0xffff) * 0x0001000100010001ULL; | |
196 c->sparc_coeffs[4]=(((int16_t)c->vOffset*(int16_t)c->vrCoeff>>11) & 0xffff) * 0x0001000100010001ULL; | |
197 | |
198 if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV422P && (c->dstW & 7)==0) { | |
199 av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV422P -> RGB32\n"); | |
200 return vis_422P_ARGB32; | |
201 } | |
202 else if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV420P && (c->dstW & 7)==0) { | |
203 av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV420P -> RGB32\n"); | |
204 return vis_420P_ARGB32; | |
205 } | |
206 return NULL; | |
207 } |