Mercurial > libavcodec.hg
comparison i386/fdct_mmx.c @ 2979:bfabfdf9ce55 libavcodec
COSMETICS: tabs --> spaces, some prettyprinting
author | diego |
---|---|
date | Thu, 22 Dec 2005 01:10:11 +0000 |
parents | ef2149182f1c |
children | 96f9bd6a9ea9 |
comparison
equal
deleted
inserted
replaced
2978:403183bbb505 | 2979:bfabfdf9ce55 |
---|---|
28 // (8-byte) memory boundaries! Otherwise the unaligned memory access will | 28 // (8-byte) memory boundaries! Otherwise the unaligned memory access will |
29 // severely stall MMX execution. | 29 // severely stall MMX execution. |
30 // | 30 // |
31 ////////////////////////////////////////////////////////////////////// | 31 ////////////////////////////////////////////////////////////////////// |
32 | 32 |
33 #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy | 33 #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy |
34 #define SHIFT_FRW_COL BITS_FRW_ACC | 34 #define SHIFT_FRW_COL BITS_FRW_ACC |
35 #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3) | 35 #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3) |
36 #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1)) | 36 #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1)) |
37 //#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1)) | 37 //#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1)) |
38 | 38 |
39 //concatenated table, for forward DCT transformation | 39 //concatenated table, for forward DCT transformation |
40 static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = { | 40 static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = { |
41 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5 | 41 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5 |
42 27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5 | 42 27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5 |
43 -21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5 | 43 -21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5 |
44 }; | 44 }; |
45 | 45 |
46 static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = { | 46 static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = { |
47 23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5 | 47 23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5 |
48 }; | 48 }; |
49 | 49 |
50 static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL; | 50 static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL; |
51 | 51 |
52 static const int32_t fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW }; | 52 static const int32_t fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW }; |
349 | 349 |
350 | 350 |
351 static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) | 351 static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) |
352 { | 352 { |
353 asm volatile( | 353 asm volatile( |
354 ".macro FDCT_ROW_SSE2_H1 i t \n\t" | 354 ".macro FDCT_ROW_SSE2_H1 i t \n\t" |
355 "movq \\i(%0), %%xmm2 \n\t" | 355 "movq \\i(%0), %%xmm2 \n\t" |
356 "movq \\i+8(%0), %%xmm0 \n\t" | 356 "movq \\i+8(%0), %%xmm0 \n\t" |
357 "movdqa \\t+32(%1), %%xmm3 \n\t" | 357 "movdqa \\t+32(%1), %%xmm3 \n\t" |
358 "movdqa \\t+48(%1), %%xmm7 \n\t" | 358 "movdqa \\t+48(%1), %%xmm7 \n\t" |
359 "movdqa \\t(%1), %%xmm4 \n\t" | 359 "movdqa \\t(%1), %%xmm4 \n\t" |
360 "movdqa \\t+16(%1), %%xmm5 \n\t" | 360 "movdqa \\t+16(%1), %%xmm5 \n\t" |
361 ".endm \n\t" | 361 ".endm \n\t" |
362 ".macro FDCT_ROW_SSE2_H2 i t \n\t" | 362 ".macro FDCT_ROW_SSE2_H2 i t \n\t" |
363 "movq \\i(%0), %%xmm2 \n\t" | 363 "movq \\i(%0), %%xmm2 \n\t" |
364 "movq \\i+8(%0), %%xmm0 \n\t" | 364 "movq \\i+8(%0), %%xmm0 \n\t" |
365 "movdqa \\t+32(%1), %%xmm3 \n\t" | 365 "movdqa \\t+32(%1), %%xmm3 \n\t" |
366 "movdqa \\t+48(%1), %%xmm7 \n\t" | 366 "movdqa \\t+48(%1), %%xmm7 \n\t" |
367 ".endm \n\t" | 367 ".endm \n\t" |
368 ".macro FDCT_ROW_SSE2 i \n\t" | 368 ".macro FDCT_ROW_SSE2 i \n\t" |
369 "movq %%xmm2, %%xmm1 \n\t" | 369 "movq %%xmm2, %%xmm1 \n\t" |
370 "pshuflw $27, %%xmm0, %%xmm0 \n\t" | 370 "pshuflw $27, %%xmm0, %%xmm0 \n\t" |
371 "paddsw %%xmm0, %%xmm1 \n\t" | 371 "paddsw %%xmm0, %%xmm1 \n\t" |
372 "psubsw %%xmm0, %%xmm2 \n\t" | 372 "psubsw %%xmm0, %%xmm2 \n\t" |
373 "punpckldq %%xmm2, %%xmm1 \n\t" | 373 "punpckldq %%xmm2, %%xmm1 \n\t" |
374 "pshufd $78, %%xmm1, %%xmm2 \n\t" | 374 "pshufd $78, %%xmm1, %%xmm2 \n\t" |
375 "pmaddwd %%xmm2, %%xmm3 \n\t" | 375 "pmaddwd %%xmm2, %%xmm3 \n\t" |
376 "pmaddwd %%xmm1, %%xmm7 \n\t" | 376 "pmaddwd %%xmm1, %%xmm7 \n\t" |
377 "pmaddwd %%xmm5, %%xmm2 \n\t" | 377 "pmaddwd %%xmm5, %%xmm2 \n\t" |
378 "pmaddwd %%xmm4, %%xmm1 \n\t" | 378 "pmaddwd %%xmm4, %%xmm1 \n\t" |
379 "paddd %%xmm7, %%xmm3 \n\t" | 379 "paddd %%xmm7, %%xmm3 \n\t" |
380 "paddd %%xmm2, %%xmm1 \n\t" | 380 "paddd %%xmm2, %%xmm1 \n\t" |
381 "paddd %%xmm6, %%xmm3 \n\t" | 381 "paddd %%xmm6, %%xmm3 \n\t" |
382 "paddd %%xmm6, %%xmm1 \n\t" | 382 "paddd %%xmm6, %%xmm1 \n\t" |
383 "psrad %3, %%xmm3 \n\t" | 383 "psrad %3, %%xmm3 \n\t" |
384 "psrad %3, %%xmm1 \n\t" | 384 "psrad %3, %%xmm1 \n\t" |
385 "packssdw %%xmm3, %%xmm1 \n\t" | 385 "packssdw %%xmm3, %%xmm1 \n\t" |
386 "movdqa %%xmm1, \\i(%4) \n\t" | 386 "movdqa %%xmm1, \\i(%4) \n\t" |
387 ".endm \n\t" | 387 ".endm \n\t" |
388 "movdqa (%2), %%xmm6 \n\t" | 388 "movdqa (%2), %%xmm6 \n\t" |
389 "FDCT_ROW_SSE2_H1 0 0 \n\t" | 389 "FDCT_ROW_SSE2_H1 0 0 \n\t" |
390 "FDCT_ROW_SSE2 0 \n\t" | 390 "FDCT_ROW_SSE2 0 \n\t" |
391 "FDCT_ROW_SSE2_H2 64 0 \n\t" | 391 "FDCT_ROW_SSE2_H2 64 0 \n\t" |
392 "FDCT_ROW_SSE2 64 \n\t" | 392 "FDCT_ROW_SSE2 64 \n\t" |
393 | 393 |
394 "FDCT_ROW_SSE2_H1 16 64 \n\t" | 394 "FDCT_ROW_SSE2_H1 16 64 \n\t" |
395 "FDCT_ROW_SSE2 16 \n\t" | 395 "FDCT_ROW_SSE2 16 \n\t" |
396 "FDCT_ROW_SSE2_H2 112 64 \n\t" | 396 "FDCT_ROW_SSE2_H2 112 64 \n\t" |
397 "FDCT_ROW_SSE2 112 \n\t" | 397 "FDCT_ROW_SSE2 112 \n\t" |
398 | 398 |
399 "FDCT_ROW_SSE2_H1 32 128 \n\t" | 399 "FDCT_ROW_SSE2_H1 32 128 \n\t" |
400 "FDCT_ROW_SSE2 32 \n\t" | 400 "FDCT_ROW_SSE2 32 \n\t" |
401 "FDCT_ROW_SSE2_H2 96 128 \n\t" | 401 "FDCT_ROW_SSE2_H2 96 128 \n\t" |
402 "FDCT_ROW_SSE2 96 \n\t" | 402 "FDCT_ROW_SSE2 96 \n\t" |
403 | 403 |
404 "FDCT_ROW_SSE2_H1 48 192 \n\t" | 404 "FDCT_ROW_SSE2_H1 48 192 \n\t" |
405 "FDCT_ROW_SSE2 48 \n\t" | 405 "FDCT_ROW_SSE2 48 \n\t" |
406 "FDCT_ROW_SSE2_H2 80 192 \n\t" | 406 "FDCT_ROW_SSE2_H2 80 192 \n\t" |
407 "FDCT_ROW_SSE2 80 \n\t" | 407 "FDCT_ROW_SSE2 80 \n\t" |
408 : | 408 : |
409 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out) | 409 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out) |
410 ); | 410 ); |
411 } | 411 } |
412 | 412 |
413 static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table) | 413 static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table) |
414 { | 414 { |