comparison i386/vp3dsp_mmx.c @ 7742:bff9b5fea03f libavcodec

Use ff_pw_8 in MMX/SSE VP3 IDCT
author conrad
date Sat, 30 Aug 2008 19:41:42 +0000
parents f7cbb7733146
children 892ca48b7d76
comparison
equal deleted inserted replaced
7741:dbb5ab337349 7742:bff9b5fea03f
22 * @file vp3dsp_mmx.c 22 * @file vp3dsp_mmx.c
23 * MMX-optimized functions cribbed from the original VP3 source code. 23 * MMX-optimized functions cribbed from the original VP3 source code.
24 */ 24 */
25 25
26 #include "libavcodec/dsputil.h" 26 #include "libavcodec/dsputil.h"
27 #include "dsputil_mmx.h"
27 #include "mmx.h" 28 #include "mmx.h"
28
29 #define IdctAdjustBeforeShift 8
30 29
31 /* (12 * 4) 2-byte memory locations ( = 96 bytes total) 30 /* (12 * 4) 2-byte memory locations ( = 96 bytes total)
32 * idct_constants[0..15] = Mask table (M(I)) 31 * idct_constants[0..15] = Mask table (M(I))
33 * idct_constants[16..43] = Cosine table (C(I)) 32 * idct_constants[16..43] = Cosine table (C(I))
34 * idct_constants[44..47] = 8 33 * idct_constants[44..47] = 8
35 */ 34 */
36 static uint16_t idct_constants[(4 + 7 + 1) * 4]; 35 static uint16_t idct_constants[(4 + 7) * 4];
37 static const uint16_t idct_cosine_table[7] = { 36 static const uint16_t idct_cosine_table[7] = {
38 64277, 60547, 54491, 46341, 36410, 25080, 12785 37 64277, 60547, 54491, 46341, 36410, 25080, 12785
39 }; 38 };
40 39
41 #define r0 mm0 40 #define r0 mm0
267 j = 1; 266 j = 1;
268 do { 267 do {
269 p = idct_constants + ((j + 3) << 2); 268 p = idct_constants + ((j + 3) << 2);
270 p[0] = p[1] = p[2] = p[3] = idct_cosine_table[j - 1]; 269 p[0] = p[1] = p[2] = p[3] = idct_cosine_table[j - 1];
271 } while (++j <= 7); 270 } while (++j <= 7);
272
273 idct_constants[44] = idct_constants[45] =
274 idct_constants[46] = idct_constants[47] = IdctAdjustBeforeShift;
275 } 271 }
276 272
277 void ff_vp3_idct_mmx(int16_t *output_data) 273 void ff_vp3_idct_mmx(int16_t *output_data)
278 { 274 {
279 /* eax = quantized input 275 /* eax = quantized input
284 * edx = output 280 * edx = output
285 * r0..r7 = mm0..mm7 281 * r0..r7 = mm0..mm7
286 */ 282 */
287 283
288 #define C(x) (idct_constants + 16 + (x - 1) * 4) 284 #define C(x) (idct_constants + 16 + (x - 1) * 4)
289 #define Eight (idct_constants + 44) 285 #define Eight (&ff_pw_8)
290 286
291 /* at this point, function has completed dequantization + dezigzag + 287 /* at this point, function has completed dequantization + dezigzag +
292 * partial transposition; now do the idct itself */ 288 * partial transposition; now do the idct itself */
293 #define I(K) (output_data + K * 8) 289 #define I(K) (output_data + K * 8)
294 #define J(K) (output_data + ((K - 4) * 8) + 4) 290 #define J(K) (output_data + ((K - 4) * 8) + 4)