Mercurial > libavcodec.hg
annotate ppc/imgresample_altivec.c @ 8546:5800aecff5f8 libavcodec
Use <> instead of "" for system headers.
author | diego |
---|---|
date | Wed, 07 Jan 2009 18:24:16 +0000 |
parents | cfa400ec2a75 |
children | 04423b2f6e0b |
rev | line source |
---|---|
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
1 /* |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
2 * High quality image resampling with polyphase filters |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
3 * Copyright (c) 2001 Fabrice Bellard. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
4 * |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
5 * This file is part of FFmpeg. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
6 * |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
9 * License as published by the Free Software Foundation; either |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
11 * |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
15 * Lesser General Public License for more details. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
16 * |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
20 */ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
21 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
22 /** |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
23 * @file imgresample_altivec.c |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
24 * High quality image resampling with polyphase filters - AltiVec bits |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
25 */ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
26 |
8310 | 27 #include "util_altivec.h" |
28 #define FILTER_BITS 8 | |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
29 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
30 typedef union { |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
31 vector signed short v; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
32 signed short s[8]; |
8311 | 33 } vec_ss; |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
34 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
35 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
36 int wrap, int16_t *filter) |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
37 { |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
38 int sum, i; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
39 const uint8_t *s; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
40 vector unsigned char *tv, tmp, dstv, zero; |
8311 | 41 vec_ss srchv[4], srclv[4], fv[4]; |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
42 vector signed short zeros, sumhv, sumlv; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
43 s = src; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
44 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
45 for(i=0;i<4;i++) { |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
46 /* |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
47 The vec_madds later on does an implicit >>15 on the result. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
48 Since FILTER_BITS is 8, and we have 15 bits of magnitude in |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
49 a signed short, we have just enough bits to pre-shift our |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
50 filter constants <<7 to compensate for vec_madds. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
51 */ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
52 fv[i].s[0] = filter[i] << (15-FILTER_BITS); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
53 fv[i].v = vec_splat(fv[i].v, 0); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
54 } |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
55 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
56 zero = vec_splat_u8(0); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
57 zeros = vec_splat_s16(0); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
58 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
59 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
60 /* |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
61 When we're resampling, we'd ideally like both our input buffers, |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
62 and output buffers to be 16-byte aligned, so we can do both aligned |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
63 reads and writes. Sadly we can't always have this at the moment, so |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
64 we opt for aligned writes, as unaligned writes have a huge overhead. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
65 To do this, do enough scalar resamples to get dst 16-byte aligned. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
66 */ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
67 i = (-(int)dst) & 0xf; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
68 while(i>0) { |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
69 sum = s[0 * wrap] * filter[0] + |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
70 s[1 * wrap] * filter[1] + |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
71 s[2 * wrap] * filter[2] + |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
72 s[3 * wrap] * filter[3]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
73 sum = sum >> FILTER_BITS; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
74 if (sum<0) sum = 0; else if (sum>255) sum=255; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
75 dst[0] = sum; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
76 dst++; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
77 s++; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
78 dst_width--; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
79 i--; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
80 } |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
81 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
82 /* Do our altivec resampling on 16 pixels at once. */ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
83 while(dst_width>=16) { |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
84 /* Read 16 (potentially unaligned) bytes from each of |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
85 4 lines into 4 vectors, and split them into shorts. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
86 Interleave the multipy/accumulate for the resample |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
87 filter with the loads to hide the 3 cycle latency |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
88 the vec_madds have. */ |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
89 tv = (vector unsigned char *) &s[0 * wrap]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
90 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
91 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
92 srclv[0].v = (vector signed short) vec_mergel(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
93 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
94 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
95 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
96 tv = (vector unsigned char *) &s[1 * wrap]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
97 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
98 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
99 srclv[1].v = (vector signed short) vec_mergel(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
100 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
101 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
102 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
103 tv = (vector unsigned char *) &s[2 * wrap]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
104 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
105 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
106 srclv[2].v = (vector signed short) vec_mergel(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
107 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
108 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
109 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
110 tv = (vector unsigned char *) &s[3 * wrap]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
111 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
112 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
113 srclv[3].v = (vector signed short) vec_mergel(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
114 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
115 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
116 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
117 /* Pack the results into our destination vector, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
118 and do an aligned write of that back to memory. */ |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
119 dstv = vec_packsu(sumhv, sumlv) ; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
120 vec_st(dstv, 0, (vector unsigned char *) dst); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
121 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
122 dst+=16; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
123 s+=16; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
124 dst_width-=16; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
125 } |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
126 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
127 /* If there are any leftover pixels, resample them |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
128 with the slow scalar method. */ |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
129 while(dst_width>0) { |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
130 sum = s[0 * wrap] * filter[0] + |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
131 s[1 * wrap] * filter[1] + |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
132 s[2 * wrap] * filter[2] + |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
133 s[3 * wrap] * filter[3]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
134 sum = sum >> FILTER_BITS; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
135 if (sum<0) sum = 0; else if (sum>255) sum=255; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
136 dst[0] = sum; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
137 dst++; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
138 s++; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
139 dst_width--; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
140 } |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
141 } |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
142 |