Mercurial > mplayer.hg
annotate libswscale/yuv2rgb_vis.c @ 28615:15e7abed4291
Use the same code to convert fps in float to fraction as used in mencoder,
it ensures all the common frame rates work right.
If this causes issues, it should be changed in the same way in mencoder.c
author | reimar |
---|---|
date | Wed, 18 Feb 2009 16:49:12 +0000 |
parents | 75586eb0750d |
children | b6e1b6af8e99 |
rev | line source |
---|---|
23805 | 1 /* |
2 * VIS optimized software YUV to RGB converter | |
3 * Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu> | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 #include <inttypes.h> | |
23 #include <stdlib.h> | |
24 | |
25 #include "swscale.h" | |
27001
afcf4c4d2505
Add missing #include, patch by Jan Knutar, jknutar nic fi.
diego
parents:
25750
diff
changeset
|
26 #include "swscale_internal.h" |
23805 | 27 |
28 #define YUV2RGB_INIT \ | |
29 "wr %%g0, 0x10, %%gsr \n\t" \ | |
30 "ldd [%5], %%f32 \n\t" \ | |
31 "ldd [%5+8], %%f34 \n\t" \ | |
32 "ldd [%5+16], %%f36 \n\t" \ | |
33 "ldd [%5+24], %%f38 \n\t" \ | |
34 "ldd [%5+32], %%f40 \n\t" \ | |
35 "ldd [%5+40], %%f42 \n\t" \ | |
36 "ldd [%5+48], %%f44 \n\t" \ | |
37 "ldd [%5+56], %%f46 \n\t" \ | |
38 "ldd [%5+64], %%f48 \n\t" \ | |
39 "ldd [%5+72], %%f50 \n\t" | |
40 | |
41 #define YUV2RGB_KERNEL \ | |
42 /* ^^^^ f0=Y f3=u f5=v */ \ | |
43 "fmul8x16 %%f3, %%f48, %%f6 \n\t" \ | |
44 "fmul8x16 %%f19, %%f48, %%f22 \n\t" \ | |
45 "fmul8x16 %%f5, %%f44, %%f8 \n\t" \ | |
46 "fmul8x16 %%f21, %%f44, %%f24 \n\t" \ | |
47 "fmul8x16 %%f0, %%f42, %%f0 \n\t" \ | |
48 "fmul8x16 %%f16, %%f42, %%f16 \n\t" \ | |
49 "fmul8x16 %%f3, %%f50, %%f2 \n\t" \ | |
50 "fmul8x16 %%f19, %%f50, %%f18 \n\t" \ | |
51 "fmul8x16 %%f5, %%f46, %%f4 \n\t" \ | |
52 "fmul8x16 %%f21, %%f46, %%f20 \n\t" \ | |
53 \ | |
54 "fpsub16 %%f6, %%f34, %%f6 \n\t" /* 1 */ \ | |
55 "fpsub16 %%f22, %%f34, %%f22 \n\t" /* 1 */ \ | |
56 "fpsub16 %%f8, %%f38, %%f8 \n\t" /* 3 */ \ | |
57 "fpsub16 %%f24, %%f38, %%f24 \n\t" /* 3 */ \ | |
58 "fpsub16 %%f0, %%f32, %%f0 \n\t" /* 0 */ \ | |
59 "fpsub16 %%f16, %%f32, %%f16 \n\t" /* 0 */ \ | |
60 "fpsub16 %%f2, %%f36, %%f2 \n\t" /* 2 */ \ | |
61 "fpsub16 %%f18, %%f36, %%f18 \n\t" /* 2 */ \ | |
62 "fpsub16 %%f4, %%f40, %%f4 \n\t" /* 4 */ \ | |
63 "fpsub16 %%f20, %%f40, %%f20 \n\t" /* 4 */ \ | |
64 \ | |
65 "fpadd16 %%f0, %%f8, %%f8 \n\t" /* Gt */ \ | |
66 "fpadd16 %%f16, %%f24, %%f24 \n\t" /* Gt */ \ | |
67 "fpadd16 %%f0, %%f4, %%f4 \n\t" /* R */ \ | |
68 "fpadd16 %%f16, %%f20, %%f20 \n\t" /* R */ \ | |
69 "fpadd16 %%f0, %%f6, %%f6 \n\t" /* B */ \ | |
70 "fpadd16 %%f16, %%f22, %%f22 \n\t" /* B */ \ | |
71 "fpadd16 %%f8, %%f2, %%f2 \n\t" /* G */ \ | |
72 "fpadd16 %%f24, %%f18, %%f18 \n\t" /* G */ \ | |
73 \ | |
74 "fpack16 %%f4, %%f4 \n\t" \ | |
75 "fpack16 %%f20, %%f20 \n\t" \ | |
76 "fpack16 %%f6, %%f6 \n\t" \ | |
77 "fpack16 %%f22, %%f22 \n\t" \ | |
78 "fpack16 %%f2, %%f2 \n\t" \ | |
79 "fpack16 %%f18, %%f18 \n\t" | |
80 | |
81 | |
82 | |
83 static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |
84 int srcSliceH, uint8_t* dst[], int dstStride[]){ | |
85 int y, out1, out2, out3, out4, out5, out6; | |
86 | |
87 for(y=0;y < srcSliceH;++y) { | |
27744 | 88 __asm__ volatile ( |
23805 | 89 YUV2RGB_INIT |
90 "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ | |
91 "1: \n\t" | |
92 "ldda [%1] %%asi, %%f2 \n\t" | |
93 "ldda [%1+2] %%asi, %%f18 \n\t" | |
94 "ldda [%2] %%asi, %%f4 \n\t" | |
95 "ldda [%2+2] %%asi, %%f20 \n\t" | |
96 "ld [%0], %%f0 \n\t" | |
97 "ld [%0+4], %%f16 \n\t" | |
98 "fpmerge %%f3, %%f3, %%f2 \n\t" | |
99 "fpmerge %%f19, %%f19, %%f18 \n\t" | |
100 "fpmerge %%f5, %%f5, %%f4 \n\t" | |
101 "fpmerge %%f21, %%f21, %%f20 \n\t" | |
102 YUV2RGB_KERNEL | |
103 "fzero %%f0 \n\t" | |
104 "fpmerge %%f4, %%f6, %%f8 \n\t" // r,b,t1 | |
105 "fpmerge %%f20, %%f22, %%f24 \n\t" // r,b,t1 | |
106 "fpmerge %%f0, %%f2, %%f10 \n\t" // 0,g,t2 | |
107 "fpmerge %%f0, %%f18, %%f26 \n\t" // 0,g,t2 | |
108 "fpmerge %%f10, %%f8, %%f4 \n\t" // t2,t1,msb | |
109 "fpmerge %%f26, %%f24, %%f20 \n\t" // t2,t1,msb | |
110 "fpmerge %%f11, %%f9, %%f6 \n\t" // t2,t1,lsb | |
111 "fpmerge %%f27, %%f25, %%f22 \n\t" // t2,t1,lsb | |
112 "std %%f4, [%3] \n\t" | |
113 "std %%f20, [%3+16] \n\t" | |
114 "std %%f6, [%3+8] \n\t" | |
115 "std %%f22, [%3+24] \n\t" | |
116 | |
117 "add %0, 8, %0 \n\t" | |
118 "add %1, 4, %1 \n\t" | |
119 "add %2, 4, %2 \n\t" | |
120 "subcc %4, 8, %4 \n\t" | |
121 "bne 1b \n\t" | |
122 "add %3, 32, %3 \n\t" //delay slot | |
123 : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) | |
124 : "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+((y+srcSliceY)>>1)*srcStride[1]), | |
125 "2" (src[2]+((y+srcSliceY)>>1)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]), | |
126 "4" (c->dstW), | |
127 "5" (c->sparc_coeffs) | |
128 ); | |
129 } | |
130 | |
131 return srcSliceH; | |
132 } | |
133 | |
134 static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |
135 int srcSliceH, uint8_t* dst[], int dstStride[]){ | |
136 int y, out1, out2, out3, out4, out5, out6; | |
137 | |
138 for(y=0;y < srcSliceH;++y) { | |
27744 | 139 __asm__ volatile ( |
23805 | 140 YUV2RGB_INIT |
141 "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ | |
142 "1: \n\t" | |
143 "ldda [%1] %%asi, %%f2 \n\t" | |
144 "ldda [%1+2] %%asi, %%f18 \n\t" | |
145 "ldda [%2] %%asi, %%f4 \n\t" | |
146 "ldda [%2+2] %%asi, %%f20 \n\t" | |
147 "ld [%0], %%f0 \n\t" | |
148 "ld [%0+4], %%f16 \n\t" | |
149 "fpmerge %%f3, %%f3, %%f2 \n\t" | |
150 "fpmerge %%f19, %%f19, %%f18 \n\t" | |
151 "fpmerge %%f5, %%f5, %%f4 \n\t" | |
152 "fpmerge %%f21, %%f21, %%f20 \n\t" | |
153 YUV2RGB_KERNEL | |
154 "fzero %%f0 \n\t" | |
155 "fpmerge %%f4, %%f6, %%f8 \n\t" // r,b,t1 | |
156 "fpmerge %%f20, %%f22, %%f24 \n\t" // r,b,t1 | |
157 "fpmerge %%f0, %%f2, %%f10 \n\t" // 0,g,t2 | |
158 "fpmerge %%f0, %%f18, %%f26 \n\t" // 0,g,t2 | |
159 "fpmerge %%f10, %%f8, %%f4 \n\t" // t2,t1,msb | |
160 "fpmerge %%f26, %%f24, %%f20 \n\t" // t2,t1,msb | |
161 "fpmerge %%f11, %%f9, %%f6 \n\t" // t2,t1,lsb | |
162 "fpmerge %%f27, %%f25, %%f22 \n\t" // t2,t1,lsb | |
163 "std %%f4, [%3] \n\t" | |
164 "std %%f20, [%3+16] \n\t" | |
165 "std %%f6, [%3+8] \n\t" | |
166 "std %%f22, [%3+24] \n\t" | |
167 | |
168 "add %0, 8, %0 \n\t" | |
169 "add %1, 4, %1 \n\t" | |
170 "add %2, 4, %2 \n\t" | |
171 "subcc %4, 8, %4 \n\t" | |
172 "bne 1b \n\t" | |
173 "add %3, 32, %3 \n\t" //delay slot | |
174 : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) | |
175 : "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+(y+srcSliceY)*srcStride[1]), | |
176 "2" (src[2]+(y+srcSliceY)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]), | |
177 "4" (c->dstW), | |
178 "5" (c->sparc_coeffs) | |
179 ); | |
180 } | |
181 | |
182 return srcSliceH; | |
183 } | |
184 | |
28461 | 185 SwsFunc sws_yuv2rgb_init_vis(SwsContext *c) { |
23805 | 186 c->sparc_coeffs[5]=c->yCoeff; |
187 c->sparc_coeffs[6]=c->vgCoeff; | |
188 c->sparc_coeffs[7]=c->vrCoeff; | |
189 c->sparc_coeffs[8]=c->ubCoeff; | |
190 c->sparc_coeffs[9]=c->ugCoeff; | |
191 | |
25750 | 192 c->sparc_coeffs[0]=(((int16_t)c->yOffset*(int16_t)c->yCoeff >>11) & 0xffff) * 0x0001000100010001ULL; |
23805 | 193 c->sparc_coeffs[1]=(((int16_t)c->uOffset*(int16_t)c->ubCoeff>>11) & 0xffff) * 0x0001000100010001ULL; |
194 c->sparc_coeffs[2]=(((int16_t)c->uOffset*(int16_t)c->ugCoeff>>11) & 0xffff) * 0x0001000100010001ULL; | |
195 c->sparc_coeffs[3]=(((int16_t)c->vOffset*(int16_t)c->vgCoeff>>11) & 0xffff) * 0x0001000100010001ULL; | |
196 c->sparc_coeffs[4]=(((int16_t)c->vOffset*(int16_t)c->vrCoeff>>11) & 0xffff) * 0x0001000100010001ULL; | |
197 | |
198 if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV422P && (c->dstW & 7)==0) { | |
199 av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV422P -> RGB32\n"); | |
200 return vis_422P_ARGB32; | |
201 } | |
202 else if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV420P && (c->dstW & 7)==0) { | |
203 av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV420P -> RGB32\n"); | |
204 return vis_420P_ARGB32; | |
205 } | |
206 return NULL; | |
207 } |