Mercurial > libavcodec.hg
comparison ppc/gmc_altivec.c @ 2967:ef2149182f1c libavcodec
COSMETICS: Remove all trailing whitespace.
author | diego |
---|---|
date | Sat, 17 Dec 2005 18:14:38 +0000 |
parents | b370288f004d |
children | 0b546eab515d |
comparison
equal
deleted
inserted
replaced
2966:564788471dd4 | 2967:ef2149182f1c |
---|---|
38 const int C=(16-x16)*( y16); | 38 const int C=(16-x16)*( y16); |
39 const int D=( x16)*( y16); | 39 const int D=( x16)*( y16); |
40 int i; | 40 int i; |
41 | 41 |
42 POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | 42 POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); |
43 | 43 |
44 for(i=0; i<h; i++) | 44 for(i=0; i<h; i++) |
45 { | 45 { |
46 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; | 46 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; |
47 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; | 47 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; |
48 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; | 48 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; |
85 Bv = vec_splat(tempA, 1); | 85 Bv = vec_splat(tempA, 1); |
86 Cv = vec_splat(tempA, 2); | 86 Cv = vec_splat(tempA, 2); |
87 Dv = vec_splat(tempA, 3); | 87 Dv = vec_splat(tempA, 3); |
88 | 88 |
89 rounderV = vec_ld(0, (unsigned short*)rounder_a); | 89 rounderV = vec_ld(0, (unsigned short*)rounder_a); |
90 | 90 |
91 // we'll be able to pick-up our 9 char elements | 91 // we'll be able to pick-up our 9 char elements |
92 // at src from those 32 bytes | 92 // at src from those 32 bytes |
93 // we load the first batch here, as inside the loop | 93 // we load the first batch here, as inside the loop |
94 // we can re-use 'src+stride' from one iteration | 94 // we can re-use 'src+stride' from one iteration |
95 // as the 'src' of the next. | 95 // as the 'src' of the next. |
96 src_0 = vec_ld(0, src); | 96 src_0 = vec_ld(0, src); |
97 src_1 = vec_ld(16, src); | 97 src_1 = vec_ld(16, src); |
98 srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src)); | 98 srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src)); |
99 | 99 |
100 if (src_really_odd != 0x0000000F) | 100 if (src_really_odd != 0x0000000F) |
101 { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector. | 101 { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector. |
102 srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src)); | 102 srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src)); |
103 } | 103 } |
104 else | 104 else |
105 { | 105 { |
106 srcvB = src_1; | 106 srcvB = src_1; |
107 } | 107 } |
108 srcvA = vec_mergeh(vczero, srcvA); | 108 srcvA = vec_mergeh(vczero, srcvA); |
109 srcvB = vec_mergeh(vczero, srcvB); | 109 srcvB = vec_mergeh(vczero, srcvB); |
110 | 110 |
111 for(i=0; i<h; i++) | 111 for(i=0; i<h; i++) |
112 { | 112 { |
113 dst_odd = (unsigned long)dst & 0x0000000F; | 113 dst_odd = (unsigned long)dst & 0x0000000F; |
114 src_really_odd = (((unsigned long)src) + stride) & 0x0000000F; | 114 src_really_odd = (((unsigned long)src) + stride) & 0x0000000F; |
115 | 115 |
116 dstv = vec_ld(0, dst); | 116 dstv = vec_ld(0, dst); |
117 | 117 |
118 // we we'll be able to pick-up our 9 char elements | 118 // we we'll be able to pick-up our 9 char elements |
119 // at src + stride from those 32 bytes | 119 // at src + stride from those 32 bytes |
120 // then reuse the resulting 2 vectors srvcC and srcvD | 120 // then reuse the resulting 2 vectors srvcC and srcvD |
121 // as the next srcvA and srcvB | 121 // as the next srcvA and srcvB |
122 src_0 = vec_ld(stride + 0, src); | 122 src_0 = vec_ld(stride + 0, src); |
123 src_1 = vec_ld(stride + 16, src); | 123 src_1 = vec_ld(stride + 16, src); |
124 srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src)); | 124 srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src)); |
125 | 125 |
126 if (src_really_odd != 0x0000000F) | 126 if (src_really_odd != 0x0000000F) |
127 { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector. | 127 { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector. |
128 srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src)); | 128 srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src)); |
129 } | 129 } |
130 else | 130 else |
131 { | 131 { |
132 srcvD = src_1; | 132 srcvD = src_1; |
133 } | 133 } |
134 | 134 |
135 srcvC = vec_mergeh(vczero, srcvC); | 135 srcvC = vec_mergeh(vczero, srcvC); |
136 srcvD = vec_mergeh(vczero, srcvD); | 136 srcvD = vec_mergeh(vczero, srcvD); |
137 | 137 |
138 | 138 |
139 // OK, now we (finally) do the math :-) | 139 // OK, now we (finally) do the math :-) |
140 // those four instructions replaces 32 int muls & 32 int adds. | 140 // those four instructions replaces 32 int muls & 32 int adds. |
141 // isn't AltiVec nice ? | 141 // isn't AltiVec nice ? |
142 tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV); | 142 tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV); |
143 tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA); | 143 tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA); |
144 tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB); | 144 tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB); |
145 tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC); | 145 tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC); |
146 | 146 |
147 srcvA = srcvC; | 147 srcvA = srcvC; |
148 srcvB = srcvD; | 148 srcvB = srcvD; |
149 | 149 |
150 tempD = vec_sr(tempD, vcsr8); | 150 tempD = vec_sr(tempD, vcsr8); |
151 | 151 |
152 dstv2 = vec_pack(tempD, (vector unsigned short)vczero); | 152 dstv2 = vec_pack(tempD, (vector unsigned short)vczero); |
153 | 153 |
154 if (dst_odd) | 154 if (dst_odd) |
155 { | 155 { |
156 dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1)); | 156 dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1)); |
157 } | 157 } |
158 else | 158 else |
159 { | 159 { |
160 dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3)); | 160 dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3)); |
161 } | 161 } |
162 | 162 |
163 vec_st(dstv2, 0, dst); | 163 vec_st(dstv2, 0, dst); |
164 | 164 |
165 dst += stride; | 165 dst += stride; |
166 src += stride; | 166 src += stride; |
167 } | 167 } |
168 | 168 |
169 POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | 169 POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); |