Mercurial > libavcodec.hg
annotate ppc/imgresample_altivec.c @ 7546:97383e012cb9 libavcodec
remove mdct tmp buffer
author | lorenm |
---|---|
date | Tue, 12 Aug 2008 00:36:36 +0000 |
parents | a8a79f5385f6 |
children | 4e58133ef122 |
rev | line source |
---|---|
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
1 /* |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
2 * High quality image resampling with polyphase filters |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
3 * Copyright (c) 2001 Fabrice Bellard. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
4 * |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
5 * This file is part of FFmpeg. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
6 * |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
9 * License as published by the Free Software Foundation; either |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
11 * |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
15 * Lesser General Public License for more details. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
16 * |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
20 */ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
21 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
22 /** |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
23 * @file imgresample_altivec.c |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
24 * High quality image resampling with polyphase filters - AltiVec bits |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
25 */ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
26 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
27 #include "gcc_fixes.h" |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
28 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
29 typedef union { |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
30 vector unsigned char v; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
31 unsigned char c[16]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
32 } vec_uc_t; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
33 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
34 typedef union { |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
35 vector signed short v; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
36 signed short s[8]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
37 } vec_ss_t; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
38 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
39 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
40 int wrap, int16_t *filter) |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
41 { |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
42 int sum, i; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
43 const uint8_t *s; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
44 vector unsigned char *tv, tmp, dstv, zero; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
45 vec_ss_t srchv[4], srclv[4], fv[4]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
46 vector signed short zeros, sumhv, sumlv; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
47 s = src; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
48 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
49 for(i=0;i<4;i++) { |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
50 /* |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
51 The vec_madds later on does an implicit >>15 on the result. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
52 Since FILTER_BITS is 8, and we have 15 bits of magnitude in |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
53 a signed short, we have just enough bits to pre-shift our |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
54 filter constants <<7 to compensate for vec_madds. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
55 */ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
56 fv[i].s[0] = filter[i] << (15-FILTER_BITS); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
57 fv[i].v = vec_splat(fv[i].v, 0); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
58 } |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
59 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
60 zero = vec_splat_u8(0); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
61 zeros = vec_splat_s16(0); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
62 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
63 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
64 /* |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
65 When we're resampling, we'd ideally like both our input buffers, |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
66 and output buffers to be 16-byte aligned, so we can do both aligned |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
67 reads and writes. Sadly we can't always have this at the moment, so |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
68 we opt for aligned writes, as unaligned writes have a huge overhead. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
69 To do this, do enough scalar resamples to get dst 16-byte aligned. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
70 */ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
71 i = (-(int)dst) & 0xf; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
72 while(i>0) { |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
73 sum = s[0 * wrap] * filter[0] + |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
74 s[1 * wrap] * filter[1] + |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
75 s[2 * wrap] * filter[2] + |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
76 s[3 * wrap] * filter[3]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
77 sum = sum >> FILTER_BITS; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
78 if (sum<0) sum = 0; else if (sum>255) sum=255; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
79 dst[0] = sum; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
80 dst++; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
81 s++; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
82 dst_width--; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
83 i--; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
84 } |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
85 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
86 /* Do our altivec resampling on 16 pixels at once. */ |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
87 while(dst_width>=16) { |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
88 /* Read 16 (potentially unaligned) bytes from each of |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
89 4 lines into 4 vectors, and split them into shorts. |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
90 Interleave the multipy/accumulate for the resample |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
91 filter with the loads to hide the 3 cycle latency |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
92 the vec_madds have. */ |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
93 tv = (vector unsigned char *) &s[0 * wrap]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
94 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
95 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
96 srclv[0].v = (vector signed short) vec_mergel(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
97 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
98 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
99 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
100 tv = (vector unsigned char *) &s[1 * wrap]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
101 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
102 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
103 srclv[1].v = (vector signed short) vec_mergel(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
104 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
105 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
106 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
107 tv = (vector unsigned char *) &s[2 * wrap]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
108 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
109 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
110 srclv[2].v = (vector signed short) vec_mergel(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
111 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
112 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
113 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
114 tv = (vector unsigned char *) &s[3 * wrap]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
115 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
116 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
117 srclv[3].v = (vector signed short) vec_mergel(zero, tmp); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
118 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
119 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
120 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
121 /* Pack the results into our destination vector, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
122 and do an aligned write of that back to memory. */ |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
123 dstv = vec_packsu(sumhv, sumlv) ; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
124 vec_st(dstv, 0, (vector unsigned char *) dst); |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
125 |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
126 dst+=16; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
127 s+=16; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
128 dst_width-=16; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
129 } |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
130 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
131 /* If there are any leftover pixels, resample them |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
5750
diff
changeset
|
132 with the slow scalar method. */ |
5750
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
133 while(dst_width>0) { |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
134 sum = s[0 * wrap] * filter[0] + |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
135 s[1 * wrap] * filter[1] + |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
136 s[2 * wrap] * filter[2] + |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
137 s[3 * wrap] * filter[3]; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
138 sum = sum >> FILTER_BITS; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
139 if (sum<0) sum = 0; else if (sum>255) sum=255; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
140 dst[0] = sum; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
141 dst++; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
142 s++; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
143 dst_width--; |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
144 } |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
145 } |
09f99af1db40
Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff
changeset
|
146 |