annotate ppc/imgresample_altivec.c @ 7855:9a135b6a1dc7 libavcodec

Correct order of parsing for pulse scalefactor band and offset to match the specification. Patch by Alex Converse (alex converse gmail com)
author superdump
date Sat, 13 Sep 2008 18:47:43 +0000
parents a8a79f5385f6
children 4e58133ef122
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
1 /*
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
2 * High quality image resampling with polyphase filters
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
3 * Copyright (c) 2001 Fabrice Bellard.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
4 *
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
5 * This file is part of FFmpeg.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
6 *
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
11 *
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
15 * Lesser General Public License for more details.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
16 *
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
20 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
21
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
22 /**
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
23 * @file imgresample_altivec.c
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
24 * High quality image resampling with polyphase filters - AltiVec bits
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
25 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
26
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
27 #include "gcc_fixes.h"
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
28
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
29 typedef union {
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
30 vector unsigned char v;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
31 unsigned char c[16];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
32 } vec_uc_t;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
33
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
34 typedef union {
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
35 vector signed short v;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
36 signed short s[8];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
37 } vec_ss_t;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
38
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
39 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
40 int wrap, int16_t *filter)
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
41 {
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
42 int sum, i;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
43 const uint8_t *s;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
44 vector unsigned char *tv, tmp, dstv, zero;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
45 vec_ss_t srchv[4], srclv[4], fv[4];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
46 vector signed short zeros, sumhv, sumlv;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
47 s = src;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
48
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 5750
diff changeset
49 for(i=0;i<4;i++) {
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
50 /*
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
51 The vec_madds later on does an implicit >>15 on the result.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
52 Since FILTER_BITS is 8, and we have 15 bits of magnitude in
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
53 a signed short, we have just enough bits to pre-shift our
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
54 filter constants <<7 to compensate for vec_madds.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
55 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
56 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
57 fv[i].v = vec_splat(fv[i].v, 0);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
58 }
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
59
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
60 zero = vec_splat_u8(0);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
61 zeros = vec_splat_s16(0);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
62
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
63
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
64 /*
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
65 When we're resampling, we'd ideally like both our input buffers,
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
66 and output buffers to be 16-byte aligned, so we can do both aligned
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
67 reads and writes. Sadly we can't always have this at the moment, so
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
68 we opt for aligned writes, as unaligned writes have a huge overhead.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
69 To do this, do enough scalar resamples to get dst 16-byte aligned.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
70 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
71 i = (-(int)dst) & 0xf;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
72 while(i>0) {
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
73 sum = s[0 * wrap] * filter[0] +
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
74 s[1 * wrap] * filter[1] +
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
75 s[2 * wrap] * filter[2] +
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
76 s[3 * wrap] * filter[3];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
77 sum = sum >> FILTER_BITS;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
78 if (sum<0) sum = 0; else if (sum>255) sum=255;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
79 dst[0] = sum;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
80 dst++;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
81 s++;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
82 dst_width--;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
83 i--;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
84 }
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
85
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
86 /* Do our altivec resampling on 16 pixels at once. */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
87 while(dst_width>=16) {
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 5750
diff changeset
88 /* Read 16 (potentially unaligned) bytes from each of
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
89 4 lines into 4 vectors, and split them into shorts.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
90 Interleave the multipy/accumulate for the resample
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
91 filter with the loads to hide the 3 cycle latency
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 5750
diff changeset
92 the vec_madds have. */
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
93 tv = (vector unsigned char *) &s[0 * wrap];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
94 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
95 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
96 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
97 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
98 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
99
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
100 tv = (vector unsigned char *) &s[1 * wrap];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
101 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
102 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
103 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
104 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
105 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
106
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
107 tv = (vector unsigned char *) &s[2 * wrap];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
108 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
109 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
110 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
111 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
112 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
113
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
114 tv = (vector unsigned char *) &s[3 * wrap];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
115 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
116 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
117 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
118 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
119 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
120
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 5750
diff changeset
121 /* Pack the results into our destination vector,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 5750
diff changeset
122 and do an aligned write of that back to memory. */
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
123 dstv = vec_packsu(sumhv, sumlv) ;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
124 vec_st(dstv, 0, (vector unsigned char *) dst);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
125
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
126 dst+=16;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
127 s+=16;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
128 dst_width-=16;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
129 }
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
130
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 5750
diff changeset
131 /* If there are any leftover pixels, resample them
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 5750
diff changeset
132 with the slow scalar method. */
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
133 while(dst_width>0) {
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
134 sum = s[0 * wrap] * filter[0] +
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
135 s[1 * wrap] * filter[1] +
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
136 s[2 * wrap] * filter[2] +
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
137 s[3 * wrap] * filter[3];
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
138 sum = sum >> FILTER_BITS;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
139 if (sum<0) sum = 0; else if (sum>255) sum=255;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
140 dst[0] = sum;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
141 dst++;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
142 s++;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
143 dst_width--;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
144 }
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
145 }
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
146