annotate ppc/dsputil_altivec.c @ 995:edc10966b081 libavcodec

altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
author michaelni
date Sat, 11 Jan 2003 20:51:03 +0000
parents 8bec850dc9c7
children 3b7cc8e4b83f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
828
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
1 /*
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
2 * Copyright (c) 2002 Brian Foley
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
3 * Copyright (c) 2002 Dieter Shirley
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
4 *
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
5 * This library is free software; you can redistribute it and/or
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
6 * modify it under the terms of the GNU Lesser General Public
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
7 * License as published by the Free Software Foundation; either
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
8 * version 2 of the License, or (at your option) any later version.
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
9 *
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
10 * This library is distributed in the hope that it will be useful,
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
13 * Lesser General Public License for more details.
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
14 *
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
15 * You should have received a copy of the GNU Lesser General Public
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
16 * License along with this library; if not, write to the Free Software
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
18 */
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
19
623
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
20 #include "../dsputil.h"
828
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
21 #include "dsputil_altivec.h"
623
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
22
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
23 #if CONFIG_DARWIN
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
24 #include <sys/sysctl.h>
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
25 #endif
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
26
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
27 int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
28 {
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
29 int i;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
30 int s __attribute__((aligned(16)));
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
31 const vector unsigned char zero = (const vector unsigned char)(0);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
32 vector unsigned char *tv;
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
33 vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
34 vector unsigned int sad;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
35 vector signed int sumdiffs;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
36
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
37 s = 0;
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
38 sad = (vector unsigned int)(0);
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
39 for(i=0;i<16;i++) {
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
40 /*
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
41 Read unaligned pixels into our vectors. The vectors are as follows:
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
42 pix1v: pix1[0]-pix1[15]
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
43 pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
44 */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
45 tv = (vector unsigned char *) pix1;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
46 pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
47
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
48 tv = (vector unsigned char *) &pix2[0];
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
49 pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
50
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
51 tv = (vector unsigned char *) &pix2[1];
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
52 pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1]));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
53
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
54 /* Calculate the average vector */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
55 avgv = vec_avg(pix2v, pix2iv);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
56
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
57 /* Calculate a sum of abs differences vector */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
58 t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
59
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
60 /* Add each 4 pixel group together and put 4 results into sad */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
61 sad = vec_sum4s(t5, sad);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
62
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
63 pix1 += line_size;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
64 pix2 += line_size;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
65 }
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
66 /* Sum up the four partial sums, and put the result into s */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
67 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
68 sumdiffs = vec_splat(sumdiffs, 3);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
69 vec_ste(sumdiffs, 0, &s);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
70
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
71 return s;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
72 }
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
73
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
74 int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
75 {
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
76 int i;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
77 int s __attribute__((aligned(16)));
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
78 const vector unsigned char zero = (const vector unsigned char)(0);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
79 vector unsigned char *tv;
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
80 vector unsigned char pix1v, pix2v, pix3v, avgv, t5;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
81 vector unsigned int sad;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
82 vector signed int sumdiffs;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
83 uint8_t *pix3 = pix2 + line_size;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
84
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
85 s = 0;
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
86 sad = (vector unsigned int)(0);
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
87
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
88 /*
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
89 Due to the fact that pix3 = pix2 + line_size, the pix3 of one
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
90 iteration becomes pix2 in the next iteration. We can use this
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
91 fact to avoid a potentially expensive unaligned read, each
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
92 time around the loop.
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
93 Read unaligned pixels into our vectors. The vectors are as follows:
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
94 pix2v: pix2[0]-pix2[15]
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
95 Split the pixel vectors into shorts
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
96 */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
97 tv = (vector unsigned char *) &pix2[0];
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
98 pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
99
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
100 for(i=0;i<16;i++) {
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
101 /*
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
102 Read unaligned pixels into our vectors. The vectors are as follows:
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
103 pix1v: pix1[0]-pix1[15]
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
104 pix3v: pix3[0]-pix3[15]
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
105 */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
106 tv = (vector unsigned char *) pix1;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
107 pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
108
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
109 tv = (vector unsigned char *) &pix3[0];
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
110 pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0]));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
111
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
112 /* Calculate the average vector */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
113 avgv = vec_avg(pix2v, pix3v);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
114
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
115 /* Calculate a sum of abs differences vector */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
116 t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
117
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
118 /* Add each 4 pixel group together and put 4 results into sad */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
119 sad = vec_sum4s(t5, sad);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
120
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
121 pix1 += line_size;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
122 pix2v = pix3v;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
123 pix3 += line_size;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
124
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
125 }
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
126
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
127 /* Sum up the four partial sums, and put the result into s */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
128 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
129 sumdiffs = vec_splat(sumdiffs, 3);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
130 vec_ste(sumdiffs, 0, &s);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
131 return s;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
132 }
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
133
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
134 int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
135 {
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
136 int i;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
137 int s __attribute__((aligned(16)));
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
138 uint8_t *pix3 = pix2 + line_size;
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
139 const vector unsigned char zero = (const vector unsigned char)(0);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
140 const vector unsigned short two = (const vector unsigned short)(2);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
141 vector unsigned char *tv, avgv, t5;
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
142 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
143 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
144 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
145 vector unsigned short avghv, avglv;
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
146 vector unsigned short t1, t2, t3, t4;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
147 vector unsigned int sad;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
148 vector signed int sumdiffs;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
149
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
150 sad = (vector unsigned int)(0);
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
151
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
152 s = 0;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
153
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
154 /*
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
155 Due to the fact that pix3 = pix2 + line_size, the pix3 of one
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
156 iteration becomes pix2 in the next iteration. We can use this
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
157 fact to avoid a potentially expensive unaligned read, as well
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
158 as some splitting, and vector addition each time around the loop.
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
159 Read unaligned pixels into our vectors. The vectors are as follows:
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
160 pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
161 Split the pixel vectors into shorts
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
162 */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
163 tv = (vector unsigned char *) &pix2[0];
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
164 pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
165
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
166 tv = (vector unsigned char *) &pix2[1];
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
167 pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1]));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
168
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
169 pix2hv = (vector unsigned short) vec_mergeh(zero, pix2v);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
170 pix2lv = (vector unsigned short) vec_mergel(zero, pix2v);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
171 pix2ihv = (vector unsigned short) vec_mergeh(zero, pix2iv);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
172 pix2ilv = (vector unsigned short) vec_mergel(zero, pix2iv);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
173 t1 = vec_add(pix2hv, pix2ihv);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
174 t2 = vec_add(pix2lv, pix2ilv);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
175
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
176 for(i=0;i<16;i++) {
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
177 /*
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
178 Read unaligned pixels into our vectors. The vectors are as follows:
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
179 pix1v: pix1[0]-pix1[15]
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
180 pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16]
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
181 */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
182 tv = (vector unsigned char *) pix1;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
183 pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
184
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
185 tv = (vector unsigned char *) &pix3[0];
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
186 pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0]));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
187
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
188 tv = (vector unsigned char *) &pix3[1];
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
189 pix3iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[1]));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
190
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
191 /*
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
192 Note that Altivec does have vec_avg, but this works on vector pairs
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
193 and rounds up. We could do avg(avg(a,b),avg(c,d)), but the rounding
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
194 would mean that, for example, avg(3,0,0,1) = 2, when it should be 1.
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
195 Instead, we have to split the pixel vectors into vectors of shorts,
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
196 and do the averaging by hand.
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
197 */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
198
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
199 /* Split the pixel vectors into shorts */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
200 pix3hv = (vector unsigned short) vec_mergeh(zero, pix3v);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
201 pix3lv = (vector unsigned short) vec_mergel(zero, pix3v);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
202 pix3ihv = (vector unsigned short) vec_mergeh(zero, pix3iv);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
203 pix3ilv = (vector unsigned short) vec_mergel(zero, pix3iv);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
204
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
205 /* Do the averaging on them */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
206 t3 = vec_add(pix3hv, pix3ihv);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
207 t4 = vec_add(pix3lv, pix3ilv);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
208
884
2cef5c4c0ca6 * altivec and pix_norm patch by Brian Foley
kabi
parents: 878
diff changeset
209 avghv = vec_sr(vec_add(vec_add(t1, t3), two), two);
2cef5c4c0ca6 * altivec and pix_norm patch by Brian Foley
kabi
parents: 878
diff changeset
210 avglv = vec_sr(vec_add(vec_add(t2, t4), two), two);
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
211
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
212 /* Pack the shorts back into a result */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
213 avgv = vec_pack(avghv, avglv);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
214
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
215 /* Calculate a sum of abs differences vector */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
216 t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
217
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
218 /* Add each 4 pixel group together and put 4 results into sad */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
219 sad = vec_sum4s(t5, sad);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
220
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
221 pix1 += line_size;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
222 pix3 += line_size;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
223 /* Transfer the calculated values for pix3 into pix2 */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
224 t1 = t3;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
225 t2 = t4;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
226 }
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
227 /* Sum up the four partial sums, and put the result into s */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
228 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
229 sumdiffs = vec_splat(sumdiffs, 3);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
230 vec_ste(sumdiffs, 0, &s);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
231
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
232 return s;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
233 }
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
234
623
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
235 int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
236 {
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
237 int i;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
238 int s __attribute__((aligned(16)));
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
239 const vector unsigned int zero = (const vector unsigned int)(0);
623
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
240 vector unsigned char perm1, perm2, *pix1v, *pix2v;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
241 vector unsigned char t1, t2, t3,t4, t5;
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
242 vector unsigned int sad;
623
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
243 vector signed int sumdiffs;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
244
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
245 sad = (vector unsigned int) (0);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
246
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
247
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
248 for(i=0;i<16;i++) {
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
249 /* Read potentially unaligned pixels into t1 and t2 */
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
250 perm1 = vec_lvsl(0, pix1);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
251 pix1v = (vector unsigned char *) pix1;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
252 perm2 = vec_lvsl(0, pix2);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
253 pix2v = (vector unsigned char *) pix2;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
254 t1 = vec_perm(pix1v[0], pix1v[1], perm1);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
255 t2 = vec_perm(pix2v[0], pix2v[1], perm2);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
256
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
257 /* Calculate a sum of abs differences vector */
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
258 t3 = vec_max(t1, t2);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
259 t4 = vec_min(t1, t2);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
260 t5 = vec_sub(t3, t4);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
261
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
262 /* Add each 4 pixel group together and put 4 results into sad */
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
263 sad = vec_sum4s(t5, sad);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
264
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
265 pix1 += line_size;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
266 pix2 += line_size;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
267 }
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
268
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
269 /* Sum up the four partial sums, and put the result into s */
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
270 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
271 sumdiffs = vec_splat(sumdiffs, 3);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
272 vec_ste(sumdiffs, 0, &s);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
273
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
274 return s;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
275 }
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
276
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
277 int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
278 {
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
279 int i;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
280 int s __attribute__((aligned(16)));
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
281 const vector unsigned int zero = (const vector unsigned int)(0);
623
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
282 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
283 vector unsigned char t1, t2, t3,t4, t5;
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
284 vector unsigned int sad;
623
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
285 vector signed int sumdiffs;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
286
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
287 sad = (vector unsigned int)(0);
623
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
288 permclear = (vector unsigned char) (255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
289
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
290 for(i=0;i<8;i++) {
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
291 /* Read potentially unaligned pixels into t1 and t2
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
292 Since we're reading 16 pixels, and actually only want 8,
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
293 mask out the last 8 pixels. The 0s don't change the sum. */
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
294 perm1 = vec_lvsl(0, pix1);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
295 pix1v = (vector unsigned char *) pix1;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
296 perm2 = vec_lvsl(0, pix2);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
297 pix2v = (vector unsigned char *) pix2;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
298 t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
299 t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
300
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
301 /* Calculate a sum of abs differences vector */
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
302 t3 = vec_max(t1, t2);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
303 t4 = vec_min(t1, t2);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
304 t5 = vec_sub(t3, t4);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
305
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
306 /* Add each 4 pixel group together and put 4 results into sad */
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
307 sad = vec_sum4s(t5, sad);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
308
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
309 pix1 += line_size;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
310 pix2 += line_size;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
311 }
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
312
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
313 /* Sum up the four partial sums, and put the result into s */
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
314 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
315 sumdiffs = vec_splat(sumdiffs, 3);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
316 vec_ste(sumdiffs, 0, &s);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
317
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
318 return s;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
319 }
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
320
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
321 int pix_norm1_altivec(uint8_t *pix, int line_size)
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
322 {
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
323 int i;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
324 int s __attribute__((aligned(16)));
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
325 const vector unsigned int zero = (const vector unsigned int)(0);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
326 vector unsigned char *tv;
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
327 vector unsigned char pixv;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
328 vector unsigned int sv;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
329 vector signed int sum;
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
330
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
331 sv = (vector unsigned int)(0);
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
332
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
333 s = 0;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
334 for (i = 0; i < 16; i++) {
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
335 /* Read in the potentially unaligned pixels */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
336 tv = (vector unsigned char *) pix;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
337 pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix));
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
338
884
2cef5c4c0ca6 * altivec and pix_norm patch by Brian Foley
kabi
parents: 878
diff changeset
339 /* Square the values, and add them to our sum */
2cef5c4c0ca6 * altivec and pix_norm patch by Brian Foley
kabi
parents: 878
diff changeset
340 sv = vec_msum(pixv, pixv, sv);
878
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
341
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
342 pix += line_size;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
343 }
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
344 /* Sum up the four partial sums, and put the result into s */
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
345 sum = vec_sums((vector signed int) sv, (vector signed int) zero);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
346 sum = vec_splat(sum, 3);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
347 vec_ste(sum, 0, &s);
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
348
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
349 return s;
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
350 }
6ea69518e5f7 altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents: 828
diff changeset
351
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
352 /**
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
353 * Sum of Squared Errors for a 8x8 block.
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
354 * AltiVec-enhanced.
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
355 * It's the pix_abs8x8_altivec code above w/ squaring added.
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
356 */
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
357 int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
358 {
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
359 int i;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
360 int s __attribute__((aligned(16)));
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
361 const vector unsigned int zero = (const vector unsigned int)(0);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
362 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
363 vector unsigned char t1, t2, t3,t4, t5;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
364 vector unsigned int sum;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
365 vector signed int sumsqr;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
366
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
367 sum = (vector unsigned int)(0);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
368 permclear = (vector unsigned char)(0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
369
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
370 for(i=0;i<8;i++) {
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
371 /* Read potentially unaligned pixels into t1 and t2
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
372 Since we're reading 16 pixels, and actually only want 8,
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
373 mask out the last 8 pixels. The 0s don't change the sum. */
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
374 perm1 = vec_lvsl(0, pix1);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
375 pix1v = (vector unsigned char *) pix1;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
376 perm2 = vec_lvsl(0, pix2);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
377 pix2v = (vector unsigned char *) pix2;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
378 t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
379 t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
380
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
381 /*
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
382 Since we want to use unsigned chars, we can take advantage
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
383 of the fact that abs(a-b)^2 = (a-b)^2.
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
384 */
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
385
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
386 /* Calculate abs differences vector */
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
387 t3 = vec_max(t1, t2);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
388 t4 = vec_min(t1, t2);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
389 t5 = vec_sub(t3, t4);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
390
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
391 /* Square the values and add them to our sum */
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
392 sum = vec_msum(t5, t5, sum);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
393
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
394 pix1 += line_size;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
395 pix2 += line_size;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
396 }
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
397
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
398 /* Sum up the four partial sums, and put the result into s */
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
399 sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
400 sumsqr = vec_splat(sumsqr, 3);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
401 vec_ste(sumsqr, 0, &s);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
402
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
403 return s;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
404 }
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
405
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
406 /**
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
407 * Sum of Squared Errors for a 16x16 block.
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
408 * AltiVec-enhanced.
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
409 * It's the pix_abs16x16_altivec code above w/ squaring added.
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
410 */
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
411 int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
412 {
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
413 int i;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
414 int s __attribute__((aligned(16)));
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
415 const vector unsigned int zero = (const vector unsigned int)(0);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
416 vector unsigned char perm1, perm2, *pix1v, *pix2v;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
417 vector unsigned char t1, t2, t3,t4, t5;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
418 vector unsigned int sum;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
419 vector signed int sumsqr;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
420
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
421 sum = (vector unsigned int)(0);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
422
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
423 for(i=0;i<16;i++) {
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
424 /* Read potentially unaligned pixels into t1 and t2 */
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
425 perm1 = vec_lvsl(0, pix1);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
426 pix1v = (vector unsigned char *) pix1;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
427 perm2 = vec_lvsl(0, pix2);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
428 pix2v = (vector unsigned char *) pix2;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
429 t1 = vec_perm(pix1v[0], pix1v[1], perm1);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
430 t2 = vec_perm(pix2v[0], pix2v[1], perm2);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
431
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
432 /*
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
433 Since we want to use unsigned chars, we can take advantage
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
434 of the fact that abs(a-b)^2 = (a-b)^2.
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
435 */
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
436
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
437 /* Calculate abs differences vector */
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
438 t3 = vec_max(t1, t2);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
439 t4 = vec_min(t1, t2);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
440 t5 = vec_sub(t3, t4);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
441
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
442 /* Square the values and add them to our sum */
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
443 sum = vec_msum(t5, t5, sum);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
444
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
445 pix1 += line_size;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
446 pix2 += line_size;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
447 }
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
448
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
449 /* Sum up the four partial sums, and put the result into s */
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
450 sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
451 sumsqr = vec_splat(sumsqr, 3);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
452 vec_ste(sumsqr, 0, &s);
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
453
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
454 return s;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
455 }
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
456
623
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
457 int pix_sum_altivec(UINT8 * pix, int line_size)
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
458 {
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
459 const vector unsigned int zero = (const vector unsigned int)(0);
623
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
460 vector unsigned char perm, *pixv;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
461 vector unsigned char t1;
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
462 vector unsigned int sad;
623
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
463 vector signed int sumdiffs;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
464
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
465 int i;
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
466 int s __attribute__((aligned(16)));
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
467
623
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
468 sad = (vector unsigned int) (0);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
469
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
470 for (i = 0; i < 16; i++) {
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
471 /* Read the potentially unaligned 16 pixels into t1 */
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
472 perm = vec_lvsl(0, pix);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
473 pixv = (vector unsigned char *) pix;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
474 t1 = vec_perm(pixv[0], pixv[1], perm);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
475
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
476 /* Add each 4 pixel group together and put 4 results into sad */
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
477 sad = vec_sum4s(t1, sad);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
478
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
479 pix += line_size;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
480 }
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
481
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
482 /* Sum up the four partial sums, and put the result into s */
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
483 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
484 sumdiffs = vec_splat(sumdiffs, 3);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
485 vec_ste(sumdiffs, 0, &s);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
486
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
487 return s;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
488 }
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
489
828
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
490 void get_pixels_altivec(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
491 {
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
492 int i;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
493 vector unsigned char perm, bytes, *pixv;
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
494 const vector unsigned char zero = (const vector unsigned char) (0);
828
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
495 vector signed short shorts;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
496
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
497 for(i=0;i<8;i++)
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
498 {
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
499 // Read potentially unaligned pixels.
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
500 // We're reading 16 pixels, and actually only want 8,
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
501 // but we simply ignore the extras.
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
502 perm = vec_lvsl(0, pixels);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
503 pixv = (vector unsigned char *) pixels;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
504 bytes = vec_perm(pixv[0], pixv[1], perm);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
505
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
506 // convert the bytes into shorts
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
507 shorts = (vector signed short)vec_mergeh(zero, bytes);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
508
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
509 // save the data to the block, we assume the block is 16-byte aligned
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
510 vec_st(shorts, i*16, (vector signed short*)block);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
511
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
512 pixels += line_size;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
513 }
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
514 }
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
515
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
516 void diff_pixels_altivec(DCTELEM *restrict block, const UINT8 *s1,
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
517 const UINT8 *s2, int stride)
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
518 {
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
519 int i;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
520 vector unsigned char perm, bytes, *pixv;
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 978
diff changeset
521 const vector unsigned char zero = (const vector unsigned char) (0);
828
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
522 vector signed short shorts1, shorts2;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
523
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
524 for(i=0;i<4;i++)
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
525 {
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
526 // Read potentially unaligned pixels
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
527 // We're reading 16 pixels, and actually only want 8,
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
528 // but we simply ignore the extras.
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
529 perm = vec_lvsl(0, s1);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
530 pixv = (vector unsigned char *) s1;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
531 bytes = vec_perm(pixv[0], pixv[1], perm);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
532
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
533 // convert the bytes into shorts
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
534 shorts1 = (vector signed short)vec_mergeh(zero, bytes);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
535
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
536 // Do the same for the second block of pixels
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
537 perm = vec_lvsl(0, s2);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
538 pixv = (vector unsigned char *) s2;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
539 bytes = vec_perm(pixv[0], pixv[1], perm);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
540
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
541 // convert the bytes into shorts
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
542 shorts2 = (vector signed short)vec_mergeh(zero, bytes);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
543
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
544 // Do the subtraction
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
545 shorts1 = vec_sub(shorts1, shorts2);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
546
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
547 // save the data to the block, we assume the block is 16-byte aligned
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
548 vec_st(shorts1, 0, (vector signed short*)block);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
549
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
550 s1 += stride;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
551 s2 += stride;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
552 block += 8;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
553
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
554
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
555 // The code below is a copy of the code above... This is a manual
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
556 // unroll.
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
557
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
558 // Read potentially unaligned pixels
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
559 // We're reading 16 pixels, and actually only want 8,
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
560 // but we simply ignore the extras.
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
561 perm = vec_lvsl(0, s1);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
562 pixv = (vector unsigned char *) s1;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
563 bytes = vec_perm(pixv[0], pixv[1], perm);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
564
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
565 // convert the bytes into shorts
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
566 shorts1 = (vector signed short)vec_mergeh(zero, bytes);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
567
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
568 // Do the same for the second block of pixels
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
569 perm = vec_lvsl(0, s2);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
570 pixv = (vector unsigned char *) s2;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
571 bytes = vec_perm(pixv[0], pixv[1], perm);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
572
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
573 // convert the bytes into shorts
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
574 shorts2 = (vector signed short)vec_mergeh(zero, bytes);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
575
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
576 // Do the subtraction
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
577 shorts1 = vec_sub(shorts1, shorts2);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
578
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
579 // save the data to the block, we assume the block is 16-byte aligned
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
580 vec_st(shorts1, 0, (vector signed short*)block);
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
581
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
582 s1 += stride;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
583 s2 += stride;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
584 block += 8;
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
585 }
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
586 }
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
587
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
588 int sad16x16_altivec(void *s, uint8_t *a, uint8_t *b, int stride) {
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
589 return pix_abs16x16_altivec(a,b,stride);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
590 }
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
591
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
592 int sad8x8_altivec(void *s, uint8_t *a, uint8_t *b, int stride) {
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
593 return pix_abs8x8_altivec(a,b,stride);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
594 }
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
595
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
596 void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
597 #if 0
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
598 int i;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
599 for(i=0; i+7<w; i++){
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
600 dst[i+0] += src[i+0];
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
601 dst[i+1] += src[i+1];
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
602 dst[i+2] += src[i+2];
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
603 dst[i+3] += src[i+3];
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
604 dst[i+4] += src[i+4];
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
605 dst[i+5] += src[i+5];
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
606 dst[i+6] += src[i+6];
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
607 dst[i+7] += src[i+7];
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
608 }
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
609 for(; i<w; i++)
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
610 dst[i+0] += src[i+0];
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
611 #else
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
612 register int i;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
613 register uint8_t *temp_src = src, *temp_dst = dst;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
614 register vector unsigned char vdst, vsrc, temp1, temp2;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
615 register vector unsigned char perm;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
616 register int count = 0;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
617
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
618 for (i = 0; (i < w) && ((unsigned long)temp_dst & 0x0000000F) ; i++)
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
619 {
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
620 dst[i] = src[i];
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
621 temp_src ++;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
622 temp_dst ++;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
623 }
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
624 /* temp_dst is a properly aligned pointer */
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
625 /* we still need to deal with ill-aligned src */
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
626 perm = vec_lvsl(0, temp_src);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
627 temp1 = vec_ld(0, temp_src);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
628 while ((i + 15) < w)
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
629 {
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
630 temp2 = vec_ld(count + 16, temp_src);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
631 vdst = vec_ld(count, temp_dst);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
632 vsrc = vec_perm(temp1, temp2, perm);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
633 temp1 = temp2;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
634 vdst = vec_add(vsrc, vdst);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
635 vec_st(vdst, count, temp_dst);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
636 count += 16;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
637 }
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
638 for (; (i < w) ; i++)
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
639 {
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
640 dst[i] = src[i];
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
641 }
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
642 #endif
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
643 }
828
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 638
diff changeset
644
623
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
645 int has_altivec(void)
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
646 {
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
647 #if CONFIG_DARWIN
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
648 int sels[2] = {CTL_HW, HW_VECTORUNIT};
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
649 int has_vu = 0;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
650 size_t len = sizeof(has_vu);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
651 int err;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
652
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
653 err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
654
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
655 if (err == 0) return (has_vu != 0);
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
656 #endif
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
657 return 0;
92e99e506920 first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
658 }