comparison ppc/dsputil_altivec.c @ 828:ace3ccd18dd2 libavcodec

Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
author michaelni
date Sat, 02 Nov 2002 11:28:08 +0000
parents 0012f75c92bb
children 6ea69518e5f7
comparison
equal deleted inserted replaced
827:770578c6c300 828:ace3ccd18dd2
1 /*
2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
1 #include "../dsputil.h" 20 #include "../dsputil.h"
21 #include "dsputil_altivec.h"
2 22
3 #if CONFIG_DARWIN 23 #if CONFIG_DARWIN
4 #include <sys/sysctl.h> 24 #include <sys/sysctl.h>
5 #endif 25 #endif
6
7 int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
8 int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
9 int pix_sum_altivec(UINT8 * pix, int line_size);
10
11 int has_altivec(void);
12 26
13 int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) 27 int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
14 { 28 {
15 int i, s; 29 int i, s;
16 vector unsigned char perm1, perm2, *pix1v, *pix2v; 30 vector unsigned char perm1, perm2, *pix1v, *pix2v;
125 vec_ste(sumdiffs, 0, &s); 139 vec_ste(sumdiffs, 0, &s);
126 140
127 return s; 141 return s;
128 } 142 }
129 143
144 void get_pixels_altivec(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
145 {
146 int i;
147 vector unsigned char perm, bytes, *pixv;
148 vector unsigned char zero = (vector unsigned char) (0);
149 vector signed short shorts;
150
151 for(i=0;i<8;i++)
152 {
153 // Read potentially unaligned pixels.
154 // We're reading 16 pixels, and actually only want 8,
155 // but we simply ignore the extras.
156 perm = vec_lvsl(0, pixels);
157 pixv = (vector unsigned char *) pixels;
158 bytes = vec_perm(pixv[0], pixv[1], perm);
159
160 // convert the bytes into shorts
161 shorts = (vector signed short)vec_mergeh(zero, bytes);
162
163 // save the data to the block, we assume the block is 16-byte aligned
164 vec_st(shorts, i*16, (vector signed short*)block);
165
166 pixels += line_size;
167 }
168 }
169
170 void diff_pixels_altivec(DCTELEM *restrict block, const UINT8 *s1,
171 const UINT8 *s2, int stride)
172 {
173 int i;
174 vector unsigned char perm, bytes, *pixv;
175 vector unsigned char zero = (vector unsigned char) (0);
176 vector signed short shorts1, shorts2;
177
178 for(i=0;i<4;i++)
179 {
180 // Read potentially unaligned pixels
181 // We're reading 16 pixels, and actually only want 8,
182 // but we simply ignore the extras.
183 perm = vec_lvsl(0, s1);
184 pixv = (vector unsigned char *) s1;
185 bytes = vec_perm(pixv[0], pixv[1], perm);
186
187 // convert the bytes into shorts
188 shorts1 = (vector signed short)vec_mergeh(zero, bytes);
189
190 // Do the same for the second block of pixels
191 perm = vec_lvsl(0, s2);
192 pixv = (vector unsigned char *) s2;
193 bytes = vec_perm(pixv[0], pixv[1], perm);
194
195 // convert the bytes into shorts
196 shorts2 = (vector signed short)vec_mergeh(zero, bytes);
197
198 // Do the subtraction
199 shorts1 = vec_sub(shorts1, shorts2);
200
201 // save the data to the block, we assume the block is 16-byte aligned
202 vec_st(shorts1, 0, (vector signed short*)block);
203
204 s1 += stride;
205 s2 += stride;
206 block += 8;
207
208
209 // The code below is a copy of the code above... This is a manual
210 // unroll.
211
212 // Read potentially unaligned pixels
213 // We're reading 16 pixels, and actually only want 8,
214 // but we simply ignore the extras.
215 perm = vec_lvsl(0, s1);
216 pixv = (vector unsigned char *) s1;
217 bytes = vec_perm(pixv[0], pixv[1], perm);
218
219 // convert the bytes into shorts
220 shorts1 = (vector signed short)vec_mergeh(zero, bytes);
221
222 // Do the same for the second block of pixels
223 perm = vec_lvsl(0, s2);
224 pixv = (vector unsigned char *) s2;
225 bytes = vec_perm(pixv[0], pixv[1], perm);
226
227 // convert the bytes into shorts
228 shorts2 = (vector signed short)vec_mergeh(zero, bytes);
229
230 // Do the subtraction
231 shorts1 = vec_sub(shorts1, shorts2);
232
233 // save the data to the block, we assume the block is 16-byte aligned
234 vec_st(shorts1, 0, (vector signed short*)block);
235
236 s1 += stride;
237 s2 += stride;
238 block += 8;
239 }
240 }
241
242
130 int has_altivec(void) 243 int has_altivec(void)
131 { 244 {
132 #if CONFIG_DARWIN 245 #if CONFIG_DARWIN
133 int sels[2] = {CTL_HW, HW_VECTORUNIT}; 246 int sels[2] = {CTL_HW, HW_VECTORUNIT};
134 int has_vu = 0; 247 int has_vu = 0;
139 252
140 if (err == 0) return (has_vu != 0); 253 if (err == 0) return (has_vu != 0);
141 #endif 254 #endif
142 return 0; 255 return 0;
143 } 256 }
257