Mercurial > libavcodec.hg
comparison ppc/dsputil_altivec.c @ 623:92e99e506920 libavcodec
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
author | michaelni |
---|---|
date | Wed, 28 Aug 2002 13:14:36 +0000 |
parents | |
children | 0012f75c92bb |
comparison
equal
deleted
inserted
replaced
622:a1e54c24f221 | 623:92e99e506920 |
---|---|
1 #include "../dsputil.h" | |
2 | |
3 #if CONFIG_DARWIN | |
4 #include <sys/sysctl.h> | |
5 #endif | |
6 | |
7 int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); | |
8 int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); | |
9 int pix_sum_altivec(UINT8 * pix, int line_size); | |
10 | |
11 int has_altivec(void); | |
12 | |
13 int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) | |
14 { | |
15 int i, s; | |
16 vector unsigned char perm1, perm2, *pix1v, *pix2v; | |
17 vector unsigned char t1, t2, t3,t4, t5; | |
18 vector unsigned int sad, zero; | |
19 vector signed int sumdiffs; | |
20 | |
21 zero = (vector unsigned int) (0); | |
22 sad = (vector unsigned int) (0); | |
23 | |
24 | |
25 for(i=0;i<16;i++) { | |
26 /* Read potentially unaligned pixels into t1 and t2 */ | |
27 perm1 = vec_lvsl(0, pix1); | |
28 pix1v = (vector unsigned char *) pix1; | |
29 perm2 = vec_lvsl(0, pix2); | |
30 pix2v = (vector unsigned char *) pix2; | |
31 t1 = vec_perm(pix1v[0], pix1v[1], perm1); | |
32 t2 = vec_perm(pix2v[0], pix2v[1], perm2); | |
33 | |
34 /* Calculate a sum of abs differences vector */ | |
35 t3 = vec_max(t1, t2); | |
36 t4 = vec_min(t1, t2); | |
37 t5 = vec_sub(t3, t4); | |
38 | |
39 /* Add each 4 pixel group together and put 4 results into sad */ | |
40 sad = vec_sum4s(t5, sad); | |
41 | |
42 pix1 += line_size; | |
43 pix2 += line_size; | |
44 } | |
45 | |
46 /* Sum up the four partial sums, and put the result into s */ | |
47 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); | |
48 sumdiffs = vec_splat(sumdiffs, 3); | |
49 vec_ste(sumdiffs, 0, &s); | |
50 | |
51 return s; | |
52 } | |
53 | |
54 int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) | |
55 { | |
56 int i, s; | |
57 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; | |
58 vector unsigned char t1, t2, t3,t4, t5; | |
59 vector unsigned int sad, zero; | |
60 vector signed int sumdiffs; | |
61 | |
62 zero = (vector unsigned int) (0); | |
63 sad = (vector unsigned int) (0); | |
64 permclear = (vector unsigned char) (255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); | |
65 | |
66 for(i=0;i<8;i++) { | |
67 /* Read potentially unaligned pixels into t1 and t2 | |
68 Since we're reading 16 pixels, and actually only want 8, | |
69 mask out the last 8 pixels. The 0s don't change the sum. */ | |
70 perm1 = vec_lvsl(0, pix1); | |
71 pix1v = (vector unsigned char *) pix1; | |
72 perm2 = vec_lvsl(0, pix2); | |
73 pix2v = (vector unsigned char *) pix2; | |
74 t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear); | |
75 t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear); | |
76 | |
77 /* Calculate a sum of abs differences vector */ | |
78 t3 = vec_max(t1, t2); | |
79 t4 = vec_min(t1, t2); | |
80 t5 = vec_sub(t3, t4); | |
81 | |
82 /* Add each 4 pixel group together and put 4 results into sad */ | |
83 sad = vec_sum4s(t5, sad); | |
84 | |
85 pix1 += line_size; | |
86 pix2 += line_size; | |
87 } | |
88 | |
89 /* Sum up the four partial sums, and put the result into s */ | |
90 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); | |
91 sumdiffs = vec_splat(sumdiffs, 3); | |
92 vec_ste(sumdiffs, 0, &s); | |
93 | |
94 return s; | |
95 } | |
96 | |
97 int pix_sum_altivec(UINT8 * pix, int line_size) | |
98 { | |
99 | |
100 vector unsigned char perm, *pixv; | |
101 vector unsigned char t1; | |
102 vector unsigned int sad, zero; | |
103 vector signed int sumdiffs; | |
104 | |
105 int s, i; | |
106 | |
107 zero = (vector unsigned int) (0); | |
108 sad = (vector unsigned int) (0); | |
109 | |
110 for (i = 0; i < 16; i++) { | |
111 /* Read the potentially unaligned 16 pixels into t1 */ | |
112 perm = vec_lvsl(0, pix); | |
113 pixv = (vector unsigned char *) pix; | |
114 t1 = vec_perm(pixv[0], pixv[1], perm); | |
115 | |
116 /* Add each 4 pixel group together and put 4 results into sad */ | |
117 sad = vec_sum4s(t1, sad); | |
118 | |
119 pix += line_size; | |
120 } | |
121 | |
122 /* Sum up the four partial sums, and put the result into s */ | |
123 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); | |
124 sumdiffs = vec_splat(sumdiffs, 3); | |
125 vec_ste(sumdiffs, 0, &s); | |
126 | |
127 return s; | |
128 } | |
129 | |
130 void dsputil_init_altivec(void) | |
131 { | |
132 if (has_altivec()) { | |
133 pix_abs16x16 = pix_abs16x16_altivec; | |
134 pix_abs8x8 = pix_abs8x8_altivec; | |
135 pix_sum = pix_sum_altivec; | |
136 } | |
137 } | |
138 | |
139 int has_altivec(void) | |
140 { | |
141 #if CONFIG_DARWIN | |
142 int sels[2] = {CTL_HW, HW_VECTORUNIT}; | |
143 int has_vu = 0; | |
144 size_t len = sizeof(has_vu); | |
145 int err; | |
146 | |
147 err = sysctl(sels, 2, &has_vu, &len, NULL, 0); | |
148 | |
149 if (err == 0) return (has_vu != 0); | |
150 #endif | |
151 return 0; | |
152 } |