Mercurial > libavcodec.hg
comparison dsputil.c @ 516:0cb7ab35ccb2 libavcodec
* Detect "retrict" keyword
* Use "restrict" in some dsputil routines (yields large speedup)
author | mellum |
---|---|
date | Thu, 04 Jul 2002 01:47:32 +0000 |
parents | 873b9075d853 |
children | 389e30fe7269 |
comparison
equal
deleted
inserted
replaced
515:86f73263a61c | 516:0cb7ab35ccb2 |
---|---|
19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> | 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
20 */ | 20 */ |
21 #include "avcodec.h" | 21 #include "avcodec.h" |
22 #include "dsputil.h" | 22 #include "dsputil.h" |
23 #include "simple_idct.h" | 23 #include "simple_idct.h" |
24 #include "config.h" | |
25 | |
26 /* Suppress restrict if it was not defined in config.h */ | |
27 #ifndef restrict | |
28 #define restrict | |
29 #endif | |
24 | 30 |
25 void (*ff_idct)(DCTELEM *block); | 31 void (*ff_idct)(DCTELEM *block); |
26 void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block); | 32 void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block); |
27 void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block); | 33 void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block); |
28 void (*av_fdct)(DCTELEM *block); | 34 void (*av_fdct)(DCTELEM *block); |
157 lastIndexAfterPerm= zigzag_direct[lastIndex]; | 163 lastIndexAfterPerm= zigzag_direct[lastIndex]; |
158 zigzag_end[lastIndex]= lastIndexAfterPerm + 1; | 164 zigzag_end[lastIndex]= lastIndexAfterPerm + 1; |
159 } | 165 } |
160 } | 166 } |
161 | 167 |
162 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size) | 168 void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) |
163 { | 169 { |
164 DCTELEM *p; | |
165 const UINT8 *pix; | |
166 int i; | 170 int i; |
167 | 171 |
168 /* read the pixels */ | 172 /* read the pixels */ |
169 p = block; | |
170 pix = pixels; | |
171 for(i=0;i<8;i++) { | 173 for(i=0;i<8;i++) { |
172 p[0] = pix[0]; | 174 block[0] = pixels[0]; |
173 p[1] = pix[1]; | 175 block[1] = pixels[1]; |
174 p[2] = pix[2]; | 176 block[2] = pixels[2]; |
175 p[3] = pix[3]; | 177 block[3] = pixels[3]; |
176 p[4] = pix[4]; | 178 block[4] = pixels[4]; |
177 p[5] = pix[5]; | 179 block[5] = pixels[5]; |
178 p[6] = pix[6]; | 180 block[6] = pixels[6]; |
179 p[7] = pix[7]; | 181 block[7] = pixels[7]; |
180 pix += line_size; | 182 pixels += line_size; |
181 p += 8; | 183 block += 8; |
182 } | 184 } |
183 } | 185 } |
184 | 186 |
185 void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride){ | 187 void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2, |
186 DCTELEM *p; | 188 int stride){ |
187 int i; | 189 int i; |
188 | 190 |
189 /* read the pixels */ | 191 /* read the pixels */ |
190 p = block; | |
191 for(i=0;i<8;i++) { | 192 for(i=0;i<8;i++) { |
192 p[0] = s1[0] - s2[0]; | 193 block[0] = s1[0] - s2[0]; |
193 p[1] = s1[1] - s2[1]; | 194 block[1] = s1[1] - s2[1]; |
194 p[2] = s1[2] - s2[2]; | 195 block[2] = s1[2] - s2[2]; |
195 p[3] = s1[3] - s2[3]; | 196 block[3] = s1[3] - s2[3]; |
196 p[4] = s1[4] - s2[4]; | 197 block[4] = s1[4] - s2[4]; |
197 p[5] = s1[5] - s2[5]; | 198 block[5] = s1[5] - s2[5]; |
198 p[6] = s1[6] - s2[6]; | 199 block[6] = s1[6] - s2[6]; |
199 p[7] = s1[7] - s2[7]; | 200 block[7] = s1[7] - s2[7]; |
200 s1 += stride; | 201 s1 += stride; |
201 s2 += stride; | 202 s2 += stride; |
202 p += 8; | 203 block += 8; |
203 } | 204 } |
204 } | 205 } |
205 | 206 |
206 | 207 |
207 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size) | 208 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, |
208 { | 209 int line_size) |
209 const DCTELEM *p; | 210 { |
210 UINT8 *pix; | |
211 int i; | 211 int i; |
212 UINT8 *cm = cropTbl + MAX_NEG_CROP; | 212 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
213 | 213 |
214 /* read the pixels */ | 214 /* read the pixels */ |
215 p = block; | |
216 pix = pixels; | |
217 for(i=0;i<8;i++) { | 215 for(i=0;i<8;i++) { |
218 pix[0] = cm[p[0]]; | 216 pixels[0] = cm[block[0]]; |
219 pix[1] = cm[p[1]]; | 217 pixels[1] = cm[block[1]]; |
220 pix[2] = cm[p[2]]; | 218 pixels[2] = cm[block[2]]; |
221 pix[3] = cm[p[3]]; | 219 pixels[3] = cm[block[3]]; |
222 pix[4] = cm[p[4]]; | 220 pixels[4] = cm[block[4]]; |
223 pix[5] = cm[p[5]]; | 221 pixels[5] = cm[block[5]]; |
224 pix[6] = cm[p[6]]; | 222 pixels[6] = cm[block[6]]; |
225 pix[7] = cm[p[7]]; | 223 pixels[7] = cm[block[7]]; |
226 pix += line_size; | 224 |
227 p += 8; | 225 pixels += line_size; |
228 } | 226 block += 8; |
229 } | 227 } |
230 | 228 } |
231 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size) | 229 |
232 { | 230 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, |
233 const DCTELEM *p; | 231 int line_size) |
234 UINT8 *pix; | 232 { |
235 int i; | 233 int i; |
236 UINT8 *cm = cropTbl + MAX_NEG_CROP; | 234 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
237 | 235 |
238 /* read the pixels */ | 236 /* read the pixels */ |
239 p = block; | |
240 pix = pixels; | |
241 for(i=0;i<8;i++) { | 237 for(i=0;i<8;i++) { |
242 pix[0] = cm[pix[0] + p[0]]; | 238 pixels[0] = cm[pixels[0] + block[0]]; |
243 pix[1] = cm[pix[1] + p[1]]; | 239 pixels[1] = cm[pixels[1] + block[1]]; |
244 pix[2] = cm[pix[2] + p[2]]; | 240 pixels[2] = cm[pixels[2] + block[2]]; |
245 pix[3] = cm[pix[3] + p[3]]; | 241 pixels[3] = cm[pixels[3] + block[3]]; |
246 pix[4] = cm[pix[4] + p[4]]; | 242 pixels[4] = cm[pixels[4] + block[4]]; |
247 pix[5] = cm[pix[5] + p[5]]; | 243 pixels[5] = cm[pixels[5] + block[5]]; |
248 pix[6] = cm[pix[6] + p[6]]; | 244 pixels[6] = cm[pixels[6] + block[6]]; |
249 pix[7] = cm[pix[7] + p[7]]; | 245 pixels[7] = cm[pixels[7] + block[7]]; |
250 pix += line_size; | 246 pixels += line_size; |
251 p += 8; | 247 block += 8; |
252 } | 248 } |
253 } | 249 } |
254 | 250 |
255 #if 0 | 251 #if 0 |
256 | 252 |