Mercurial > mplayer.hg
annotate tremor/asm_arm.h @ 30953:d3f31670562d
Share more code between the two ATI fragment shader YUV to RGB
conversion methods and extend them to support more accurate
conversion (though at the cost of some speed).
author | reimar |
---|---|
date | Sun, 04 Apr 2010 11:45:05 +0000 |
parents | e83eef58b30a |
children |
rev | line source |
---|---|
14280 | 1 /******************************************************************** |
2 * * | |
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. * | |
4 * * | |
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * | |
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * | |
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * | |
8 * * | |
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 * | |
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ * | |
11 * * | |
12 ******************************************************************** | |
13 | |
14 function: arm7 and later wide math functions | |
15 | |
16 ********************************************************************/ | |
17 | |
18 #ifdef _ARM_ASSEM_ | |
19 | |
20 #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_) | |
21 #define _V_WIDE_MATH | |
22 | |
23 static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { | |
24 int lo,hi; | |
25 asm volatile("smull\t%0, %1, %2, %3" | |
26 : "=&r"(lo),"=&r"(hi) | |
27 : "%r"(x),"r"(y) | |
28 : "cc"); | |
29 return(hi); | |
30 } | |
31 | |
32 static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { | |
33 return MULT32(x,y)<<1; | |
34 } | |
35 | |
36 static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { | |
37 int lo,hi; | |
38 asm volatile("smull %0, %1, %2, %3\n\t" | |
39 "movs %0, %0, lsr #15\n\t" | |
40 "adc %1, %0, %1, lsl #17\n\t" | |
41 : "=&r"(lo),"=&r"(hi) | |
42 : "%r"(x),"r"(y) | |
43 : "cc"); | |
44 return(hi); | |
45 } | |
46 | |
47 #define MB() asm volatile ("" : : : "memory") | |
48 | |
49 static inline void XPROD32(ogg_int32_t a, ogg_int32_t b, | |
50 ogg_int32_t t, ogg_int32_t v, | |
51 ogg_int32_t *x, ogg_int32_t *y) | |
52 { | |
53 int x1, y1, l; | |
54 asm( "smull %0, %1, %4, %6\n\t" | |
55 "smlal %0, %1, %5, %7\n\t" | |
56 "rsb %3, %4, #0\n\t" | |
57 "smull %0, %2, %5, %6\n\t" | |
58 "smlal %0, %2, %3, %7" | |
59 : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a) | |
60 : "3" (a), "r" (b), "r" (t), "r" (v) | |
61 : "cc" ); | |
62 *x = x1; | |
63 MB(); | |
64 *y = y1; | |
65 } | |
66 | |
67 static inline void XPROD31(ogg_int32_t a, ogg_int32_t b, | |
68 ogg_int32_t t, ogg_int32_t v, | |
69 ogg_int32_t *x, ogg_int32_t *y) | |
70 { | |
71 int x1, y1, l; | |
72 asm( "smull %0, %1, %4, %6\n\t" | |
73 "smlal %0, %1, %5, %7\n\t" | |
74 "rsb %3, %4, #0\n\t" | |
75 "smull %0, %2, %5, %6\n\t" | |
76 "smlal %0, %2, %3, %7" | |
77 : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a) | |
78 : "3" (a), "r" (b), "r" (t), "r" (v) | |
79 : "cc" ); | |
80 *x = x1 << 1; | |
81 MB(); | |
82 *y = y1 << 1; | |
83 } | |
84 | |
85 static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, | |
86 ogg_int32_t t, ogg_int32_t v, | |
87 ogg_int32_t *x, ogg_int32_t *y) | |
88 { | |
89 int x1, y1, l; | |
90 asm( "rsb %2, %4, #0\n\t" | |
91 "smull %0, %1, %3, %5\n\t" | |
92 "smlal %0, %1, %2, %6\n\t" | |
93 "smull %0, %2, %4, %5\n\t" | |
94 "smlal %0, %2, %3, %6" | |
95 : "=&r" (l), "=&r" (x1), "=&r" (y1) | |
96 : "r" (a), "r" (b), "r" (t), "r" (v) | |
97 : "cc" ); | |
98 *x = x1 << 1; | |
99 MB(); | |
100 *y = y1 << 1; | |
101 } | |
102 | |
103 #endif | |
104 | |
105 #ifndef _V_CLIP_MATH | |
106 #define _V_CLIP_MATH | |
107 | |
108 static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) { | |
109 int tmp; | |
110 asm volatile("subs %1, %0, #32768\n\t" | |
111 "movpl %0, #0x7f00\n\t" | |
112 "orrpl %0, %0, #0xff\n" | |
113 "adds %1, %0, #32768\n\t" | |
114 "movmi %0, #0x8000" | |
115 : "+r"(x),"=r"(tmp) | |
116 : | |
117 : "cc"); | |
118 return(x); | |
119 } | |
120 | |
121 #endif | |
122 | |
123 #ifndef _V_LSP_MATH_ASM | |
124 #define _V_LSP_MATH_ASM | |
125 | |
126 static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip, | |
127 ogg_int32_t *qexpp, | |
128 ogg_int32_t *ilsp,ogg_int32_t wi, | |
129 ogg_int32_t m){ | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
14280
diff
changeset
|
130 |
14280 | 131 ogg_uint32_t qi=*qip,pi=*pip; |
132 ogg_int32_t qexp=*qexpp; | |
133 | |
134 asm("mov r0,%3;" | |
135 "mov r1,%5,asr#1;" | |
136 "add r0,r0,r1,lsl#3;" | |
137 "1:" | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
14280
diff
changeset
|
138 |
14280 | 139 "ldmdb r0!,{r1,r3};" |
140 "subs r1,r1,%4;" //ilsp[j]-wi | |
141 "rsbmi r1,r1,#0;" //labs(ilsp[j]-wi) | |
142 "umull %0,r2,r1,%0;" //qi*=labs(ilsp[j]-wi) | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
14280
diff
changeset
|
143 |
14280 | 144 "subs r1,r3,%4;" //ilsp[j+1]-wi |
145 "rsbmi r1,r1,#0;" //labs(ilsp[j+1]-wi) | |
146 "umull %1,r3,r1,%1;" //pi*=labs(ilsp[j+1]-wi) | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
14280
diff
changeset
|
147 |
14280 | 148 "cmn r2,r3;" // shift down 16? |
149 "beq 0f;" | |
150 "add %2,%2,#16;" | |
151 "mov %0,%0,lsr #16;" | |
152 "orr %0,%0,r2,lsl #16;" | |
153 "mov %1,%1,lsr #16;" | |
154 "orr %1,%1,r3,lsl #16;" | |
155 "0:" | |
156 "cmp r0,%3;\n" | |
157 "bhi 1b;\n" | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
14280
diff
changeset
|
158 |
14280 | 159 // odd filter assymetry |
160 "ands r0,%5,#1;\n" | |
161 "beq 2f;\n" | |
162 "add r0,%3,%5,lsl#2;\n" | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
14280
diff
changeset
|
163 |
14280 | 164 "ldr r1,[r0,#-4];\n" |
165 "mov r0,#0x4000;\n" | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
14280
diff
changeset
|
166 |
14280 | 167 "subs r1,r1,%4;\n" //ilsp[j]-wi |
168 "rsbmi r1,r1,#0;\n" //labs(ilsp[j]-wi) | |
169 "umull %0,r2,r1,%0;\n" //qi*=labs(ilsp[j]-wi) | |
170 "umull %1,r3,r0,%1;\n" //pi*=labs(ilsp[j+1]-wi) | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
14280
diff
changeset
|
171 |
14280 | 172 "cmn r2,r3;\n" // shift down 16? |
173 "beq 2f;\n" | |
174 "add %2,%2,#16;\n" | |
175 "mov %0,%0,lsr #16;\n" | |
176 "orr %0,%0,r2,lsl #16;\n" | |
177 "mov %1,%1,lsr #16;\n" | |
178 "orr %1,%1,r3,lsl #16;\n" | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
14280
diff
changeset
|
179 |
14280 | 180 //qi=(pi>>shift)*labs(ilsp[j]-wi); |
181 //pi=(qi>>shift)*labs(ilsp[j+1]-wi); | |
182 //qexp+=shift; | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
14280
diff
changeset
|
183 |
14280 | 184 //} |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
14280
diff
changeset
|
185 |
14280 | 186 /* normalize to max 16 sig figs */ |
187 "2:" | |
188 "mov r2,#0;" | |
189 "orr r1,%0,%1;" | |
190 "tst r1,#0xff000000;" | |
191 "addne r2,r2,#8;" | |
192 "movne r1,r1,lsr #8;" | |
193 "tst r1,#0x00f00000;" | |
194 "addne r2,r2,#4;" | |
195 "movne r1,r1,lsr #4;" | |
196 "tst r1,#0x000c0000;" | |
197 "addne r2,r2,#2;" | |
198 "movne r1,r1,lsr #2;" | |
199 "tst r1,#0x00020000;" | |
200 "addne r2,r2,#1;" | |
201 "movne r1,r1,lsr #1;" | |
202 "tst r1,#0x00010000;" | |
203 "addne r2,r2,#1;" | |
204 "mov %0,%0,lsr r2;" | |
205 "mov %1,%1,lsr r2;" | |
206 "add %2,%2,r2;" | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
14280
diff
changeset
|
207 |
14280 | 208 : "+r"(qi),"+r"(pi),"+r"(qexp) |
209 : "r"(ilsp),"r"(wi),"r"(m) | |
210 : "r0","r1","r2","r3","cc"); | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
14280
diff
changeset
|
211 |
14280 | 212 *qip=qi; |
213 *pip=pi; | |
214 *qexpp=qexp; | |
215 } | |
216 | |
217 static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){ | |
218 | |
219 ogg_uint32_t qi=*qip; | |
220 ogg_int32_t qexp=*qexpp; | |
221 | |
222 asm("tst %0,#0x0000ff00;" | |
223 "moveq %0,%0,lsl #8;" | |
224 "subeq %1,%1,#8;" | |
225 "tst %0,#0x0000f000;" | |
226 "moveq %0,%0,lsl #4;" | |
227 "subeq %1,%1,#4;" | |
228 "tst %0,#0x0000c000;" | |
229 "moveq %0,%0,lsl #2;" | |
230 "subeq %1,%1,#2;" | |
231 "tst %0,#0x00008000;" | |
232 "moveq %0,%0,lsl #1;" | |
233 "subeq %1,%1,#1;" | |
234 : "+r"(qi),"+r"(qexp) | |
235 : | |
236 : "cc"); | |
237 *qip=qi; | |
238 *qexpp=qexp; | |
239 } | |
240 | |
241 #endif | |
242 #endif | |
243 |