annotate ppc/imgresample_altivec.c @ 6920:d02af7474bff libavcodec

Prevent 128*1<<trellis from becoming 0 and creating 0 sized arrays. fixes CID84 RUN2 CID85 RUN2 CID86 RUN2 CID87 RUN2 CID88 RUN2 CID89 RUN2 CID90 RUN2 CID91 RUN2 CID92 RUN2 CID93 RUN2 CID94 RUN2 CID95 RUN2 CID96 RUN2 CID97 RUN2 CID98 RUN2 CID99 RUN2 CID100 RUN2 CID101 RUN2 CID102 RUN2 CID103 RUN2 CID104 RUN2 CID105 RUN2 CID106 RUN2
author michael
date Wed, 28 May 2008 11:59:41 +0000
parents 09f99af1db40
children a8a79f5385f6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
1 /*
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
2 * High quality image resampling with polyphase filters
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
3 * Copyright (c) 2001 Fabrice Bellard.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
4 *
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
5 * This file is part of FFmpeg.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
6 *
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
11 *
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
15 * Lesser General Public License for more details.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
16 *
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
20 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
21
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
22 /**
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
23 * @file imgresample_altivec.c
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
24 * High quality image resampling with polyphase filters - AltiVec bits
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
25 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
26
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
27 #include "gcc_fixes.h"
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
28
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
29 typedef union {
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
30 vector unsigned char v;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
31 unsigned char c[16];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
32 } vec_uc_t;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
33
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
34 typedef union {
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
35 vector signed short v;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
36 signed short s[8];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
37 } vec_ss_t;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
38
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
39 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
40 int wrap, int16_t *filter)
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
41 {
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
42 int sum, i;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
43 const uint8_t *s;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
44 vector unsigned char *tv, tmp, dstv, zero;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
45 vec_ss_t srchv[4], srclv[4], fv[4];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
46 vector signed short zeros, sumhv, sumlv;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
47 s = src;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
48
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
49 for(i=0;i<4;i++)
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
50 {
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
51 /*
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
52 The vec_madds later on does an implicit >>15 on the result.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
53 Since FILTER_BITS is 8, and we have 15 bits of magnitude in
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
54 a signed short, we have just enough bits to pre-shift our
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
55 filter constants <<7 to compensate for vec_madds.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
56 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
57 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
58 fv[i].v = vec_splat(fv[i].v, 0);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
59 }
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
60
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
61 zero = vec_splat_u8(0);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
62 zeros = vec_splat_s16(0);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
63
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
64
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
65 /*
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
66 When we're resampling, we'd ideally like both our input buffers,
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
67 and output buffers to be 16-byte aligned, so we can do both aligned
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
68 reads and writes. Sadly we can't always have this at the moment, so
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
69 we opt for aligned writes, as unaligned writes have a huge overhead.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
70 To do this, do enough scalar resamples to get dst 16-byte aligned.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
71 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
72 i = (-(int)dst) & 0xf;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
73 while(i>0) {
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
74 sum = s[0 * wrap] * filter[0] +
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
75 s[1 * wrap] * filter[1] +
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
76 s[2 * wrap] * filter[2] +
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
77 s[3 * wrap] * filter[3];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
78 sum = sum >> FILTER_BITS;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
79 if (sum<0) sum = 0; else if (sum>255) sum=255;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
80 dst[0] = sum;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
81 dst++;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
82 s++;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
83 dst_width--;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
84 i--;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
85 }
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
86
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
87 /* Do our altivec resampling on 16 pixels at once. */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
88 while(dst_width>=16) {
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
89 /*
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
90 Read 16 (potentially unaligned) bytes from each of
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
91 4 lines into 4 vectors, and split them into shorts.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
92 Interleave the multipy/accumulate for the resample
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
93 filter with the loads to hide the 3 cycle latency
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
94 the vec_madds have.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
95 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
96 tv = (vector unsigned char *) &s[0 * wrap];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
97 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
98 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
99 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
100 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
101 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
102
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
103 tv = (vector unsigned char *) &s[1 * wrap];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
104 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
105 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
106 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
107 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
108 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
109
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
110 tv = (vector unsigned char *) &s[2 * wrap];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
111 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
112 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
113 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
114 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
115 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
116
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
117 tv = (vector unsigned char *) &s[3 * wrap];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
118 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
119 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
120 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
121 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
122 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
123
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
124 /*
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
125 Pack the results into our destination vector,
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
126 and do an aligned write of that back to memory.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
127 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
128 dstv = vec_packsu(sumhv, sumlv) ;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
129 vec_st(dstv, 0, (vector unsigned char *) dst);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
130
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
131 dst+=16;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
132 s+=16;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
133 dst_width-=16;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
134 }
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
135
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
136 /*
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
137 If there are any leftover pixels, resample them
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
138 with the slow scalar method.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
139 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
140 while(dst_width>0) {
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
141 sum = s[0 * wrap] * filter[0] +
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
142 s[1 * wrap] * filter[1] +
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
143 s[2 * wrap] * filter[2] +
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
144 s[3 * wrap] * filter[3];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
145 sum = sum >> FILTER_BITS;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
146 if (sum<0) sum = 0; else if (sum>255) sum=255;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
147 dst[0] = sum;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
148 dst++;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
149 s++;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
150 dst_width--;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
151 }
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
152 }
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
153