Mercurial > libavcodec.hg
comparison ppc/dsputil_altivec.c @ 828:ace3ccd18dd2 libavcodec
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
author | michaelni |
---|---|
date | Sat, 02 Nov 2002 11:28:08 +0000 |
parents | 0012f75c92bb |
children | 6ea69518e5f7 |
comparison
equal
deleted
inserted
replaced
827:770578c6c300 | 828:ace3ccd18dd2 |
---|---|
1 /* | |
2 * Copyright (c) 2002 Brian Foley | |
3 * Copyright (c) 2002 Dieter Shirley | |
4 * | |
5 * This library is free software; you can redistribute it and/or | |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
9 * | |
10 * This library is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 * Lesser General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU Lesser General Public | |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
18 */ | |
19 | |
1 #include "../dsputil.h" | 20 #include "../dsputil.h" |
21 #include "dsputil_altivec.h" | |
2 | 22 |
3 #if CONFIG_DARWIN | 23 #if CONFIG_DARWIN |
4 #include <sys/sysctl.h> | 24 #include <sys/sysctl.h> |
5 #endif | 25 #endif |
6 | |
7 int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); | |
8 int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); | |
9 int pix_sum_altivec(UINT8 * pix, int line_size); | |
10 | |
11 int has_altivec(void); | |
12 | 26 |
13 int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) | 27 int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) |
14 { | 28 { |
15 int i, s; | 29 int i, s; |
16 vector unsigned char perm1, perm2, *pix1v, *pix2v; | 30 vector unsigned char perm1, perm2, *pix1v, *pix2v; |
125 vec_ste(sumdiffs, 0, &s); | 139 vec_ste(sumdiffs, 0, &s); |
126 | 140 |
127 return s; | 141 return s; |
128 } | 142 } |
129 | 143 |
144 void get_pixels_altivec(DCTELEM *restrict block, const UINT8 *pixels, int line_size) | |
145 { | |
146 int i; | |
147 vector unsigned char perm, bytes, *pixv; | |
148 vector unsigned char zero = (vector unsigned char) (0); | |
149 vector signed short shorts; | |
150 | |
151 for(i=0;i<8;i++) | |
152 { | |
153 // Read potentially unaligned pixels. | |
154 // We're reading 16 pixels, and actually only want 8, | |
155 // but we simply ignore the extras. | |
156 perm = vec_lvsl(0, pixels); | |
157 pixv = (vector unsigned char *) pixels; | |
158 bytes = vec_perm(pixv[0], pixv[1], perm); | |
159 | |
160 // convert the bytes into shorts | |
161 shorts = (vector signed short)vec_mergeh(zero, bytes); | |
162 | |
163 // save the data to the block, we assume the block is 16-byte aligned | |
164 vec_st(shorts, i*16, (vector signed short*)block); | |
165 | |
166 pixels += line_size; | |
167 } | |
168 } | |
169 | |
170 void diff_pixels_altivec(DCTELEM *restrict block, const UINT8 *s1, | |
171 const UINT8 *s2, int stride) | |
172 { | |
173 int i; | |
174 vector unsigned char perm, bytes, *pixv; | |
175 vector unsigned char zero = (vector unsigned char) (0); | |
176 vector signed short shorts1, shorts2; | |
177 | |
178 for(i=0;i<4;i++) | |
179 { | |
180 // Read potentially unaligned pixels | |
181 // We're reading 16 pixels, and actually only want 8, | |
182 // but we simply ignore the extras. | |
183 perm = vec_lvsl(0, s1); | |
184 pixv = (vector unsigned char *) s1; | |
185 bytes = vec_perm(pixv[0], pixv[1], perm); | |
186 | |
187 // convert the bytes into shorts | |
188 shorts1 = (vector signed short)vec_mergeh(zero, bytes); | |
189 | |
190 // Do the same for the second block of pixels | |
191 perm = vec_lvsl(0, s2); | |
192 pixv = (vector unsigned char *) s2; | |
193 bytes = vec_perm(pixv[0], pixv[1], perm); | |
194 | |
195 // convert the bytes into shorts | |
196 shorts2 = (vector signed short)vec_mergeh(zero, bytes); | |
197 | |
198 // Do the subtraction | |
199 shorts1 = vec_sub(shorts1, shorts2); | |
200 | |
201 // save the data to the block, we assume the block is 16-byte aligned | |
202 vec_st(shorts1, 0, (vector signed short*)block); | |
203 | |
204 s1 += stride; | |
205 s2 += stride; | |
206 block += 8; | |
207 | |
208 | |
209 // The code below is a copy of the code above... This is a manual | |
210 // unroll. | |
211 | |
212 // Read potentially unaligned pixels | |
213 // We're reading 16 pixels, and actually only want 8, | |
214 // but we simply ignore the extras. | |
215 perm = vec_lvsl(0, s1); | |
216 pixv = (vector unsigned char *) s1; | |
217 bytes = vec_perm(pixv[0], pixv[1], perm); | |
218 | |
219 // convert the bytes into shorts | |
220 shorts1 = (vector signed short)vec_mergeh(zero, bytes); | |
221 | |
222 // Do the same for the second block of pixels | |
223 perm = vec_lvsl(0, s2); | |
224 pixv = (vector unsigned char *) s2; | |
225 bytes = vec_perm(pixv[0], pixv[1], perm); | |
226 | |
227 // convert the bytes into shorts | |
228 shorts2 = (vector signed short)vec_mergeh(zero, bytes); | |
229 | |
230 // Do the subtraction | |
231 shorts1 = vec_sub(shorts1, shorts2); | |
232 | |
233 // save the data to the block, we assume the block is 16-byte aligned | |
234 vec_st(shorts1, 0, (vector signed short*)block); | |
235 | |
236 s1 += stride; | |
237 s2 += stride; | |
238 block += 8; | |
239 } | |
240 } | |
241 | |
242 | |
130 int has_altivec(void) | 243 int has_altivec(void) |
131 { | 244 { |
132 #if CONFIG_DARWIN | 245 #if CONFIG_DARWIN |
133 int sels[2] = {CTL_HW, HW_VECTORUNIT}; | 246 int sels[2] = {CTL_HW, HW_VECTORUNIT}; |
134 int has_vu = 0; | 247 int has_vu = 0; |
139 | 252 |
140 if (err == 0) return (has_vu != 0); | 253 if (err == 0) return (has_vu != 0); |
141 #endif | 254 #endif |
142 return 0; | 255 return 0; |
143 } | 256 } |
257 |