comparison i386/fdct_mmx.c @ 2979:bfabfdf9ce55 libavcodec

COSMETICS: tabs --> spaces, some prettyprinting
author diego
date Thu, 22 Dec 2005 01:10:11 +0000
parents ef2149182f1c
children 96f9bd6a9ea9
comparison
equal deleted inserted replaced
2978:403183bbb505 2979:bfabfdf9ce55
28 // (8-byte) memory boundaries! Otherwise the unaligned memory access will 28 // (8-byte) memory boundaries! Otherwise the unaligned memory access will
29 // severely stall MMX execution. 29 // severely stall MMX execution.
30 // 30 //
31 ////////////////////////////////////////////////////////////////////// 31 //////////////////////////////////////////////////////////////////////
32 32
33 #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy 33 #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
34 #define SHIFT_FRW_COL BITS_FRW_ACC 34 #define SHIFT_FRW_COL BITS_FRW_ACC
35 #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3) 35 #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
36 #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1)) 36 #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
37 //#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1)) 37 //#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1))
38 38
39 //concatenated table, for forward DCT transformation 39 //concatenated table, for forward DCT transformation
40 static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = { 40 static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = {
41 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5 41 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5
42 27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5 42 27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5
43 -21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5 43 -21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5
44 }; 44 };
45 45
46 static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = { 46 static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = {
47 23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5 47 23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5
48 }; 48 };
49 49
50 static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL; 50 static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL;
51 51
52 static const int32_t fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW }; 52 static const int32_t fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW };
349 349
350 350
351 static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) 351 static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
352 { 352 {
353 asm volatile( 353 asm volatile(
354 ".macro FDCT_ROW_SSE2_H1 i t \n\t" 354 ".macro FDCT_ROW_SSE2_H1 i t \n\t"
355 "movq \\i(%0), %%xmm2 \n\t" 355 "movq \\i(%0), %%xmm2 \n\t"
356 "movq \\i+8(%0), %%xmm0 \n\t" 356 "movq \\i+8(%0), %%xmm0 \n\t"
357 "movdqa \\t+32(%1), %%xmm3 \n\t" 357 "movdqa \\t+32(%1), %%xmm3 \n\t"
358 "movdqa \\t+48(%1), %%xmm7 \n\t" 358 "movdqa \\t+48(%1), %%xmm7 \n\t"
359 "movdqa \\t(%1), %%xmm4 \n\t" 359 "movdqa \\t(%1), %%xmm4 \n\t"
360 "movdqa \\t+16(%1), %%xmm5 \n\t" 360 "movdqa \\t+16(%1), %%xmm5 \n\t"
361 ".endm \n\t" 361 ".endm \n\t"
362 ".macro FDCT_ROW_SSE2_H2 i t \n\t" 362 ".macro FDCT_ROW_SSE2_H2 i t \n\t"
363 "movq \\i(%0), %%xmm2 \n\t" 363 "movq \\i(%0), %%xmm2 \n\t"
364 "movq \\i+8(%0), %%xmm0 \n\t" 364 "movq \\i+8(%0), %%xmm0 \n\t"
365 "movdqa \\t+32(%1), %%xmm3 \n\t" 365 "movdqa \\t+32(%1), %%xmm3 \n\t"
366 "movdqa \\t+48(%1), %%xmm7 \n\t" 366 "movdqa \\t+48(%1), %%xmm7 \n\t"
367 ".endm \n\t" 367 ".endm \n\t"
368 ".macro FDCT_ROW_SSE2 i \n\t" 368 ".macro FDCT_ROW_SSE2 i \n\t"
369 "movq %%xmm2, %%xmm1 \n\t" 369 "movq %%xmm2, %%xmm1 \n\t"
370 "pshuflw $27, %%xmm0, %%xmm0 \n\t" 370 "pshuflw $27, %%xmm0, %%xmm0 \n\t"
371 "paddsw %%xmm0, %%xmm1 \n\t" 371 "paddsw %%xmm0, %%xmm1 \n\t"
372 "psubsw %%xmm0, %%xmm2 \n\t" 372 "psubsw %%xmm0, %%xmm2 \n\t"
373 "punpckldq %%xmm2, %%xmm1 \n\t" 373 "punpckldq %%xmm2, %%xmm1 \n\t"
374 "pshufd $78, %%xmm1, %%xmm2 \n\t" 374 "pshufd $78, %%xmm1, %%xmm2 \n\t"
375 "pmaddwd %%xmm2, %%xmm3 \n\t" 375 "pmaddwd %%xmm2, %%xmm3 \n\t"
376 "pmaddwd %%xmm1, %%xmm7 \n\t" 376 "pmaddwd %%xmm1, %%xmm7 \n\t"
377 "pmaddwd %%xmm5, %%xmm2 \n\t" 377 "pmaddwd %%xmm5, %%xmm2 \n\t"
378 "pmaddwd %%xmm4, %%xmm1 \n\t" 378 "pmaddwd %%xmm4, %%xmm1 \n\t"
379 "paddd %%xmm7, %%xmm3 \n\t" 379 "paddd %%xmm7, %%xmm3 \n\t"
380 "paddd %%xmm2, %%xmm1 \n\t" 380 "paddd %%xmm2, %%xmm1 \n\t"
381 "paddd %%xmm6, %%xmm3 \n\t" 381 "paddd %%xmm6, %%xmm3 \n\t"
382 "paddd %%xmm6, %%xmm1 \n\t" 382 "paddd %%xmm6, %%xmm1 \n\t"
383 "psrad %3, %%xmm3 \n\t" 383 "psrad %3, %%xmm3 \n\t"
384 "psrad %3, %%xmm1 \n\t" 384 "psrad %3, %%xmm1 \n\t"
385 "packssdw %%xmm3, %%xmm1 \n\t" 385 "packssdw %%xmm3, %%xmm1 \n\t"
386 "movdqa %%xmm1, \\i(%4) \n\t" 386 "movdqa %%xmm1, \\i(%4) \n\t"
387 ".endm \n\t" 387 ".endm \n\t"
388 "movdqa (%2), %%xmm6 \n\t" 388 "movdqa (%2), %%xmm6 \n\t"
389 "FDCT_ROW_SSE2_H1 0 0 \n\t" 389 "FDCT_ROW_SSE2_H1 0 0 \n\t"
390 "FDCT_ROW_SSE2 0 \n\t" 390 "FDCT_ROW_SSE2 0 \n\t"
391 "FDCT_ROW_SSE2_H2 64 0 \n\t" 391 "FDCT_ROW_SSE2_H2 64 0 \n\t"
392 "FDCT_ROW_SSE2 64 \n\t" 392 "FDCT_ROW_SSE2 64 \n\t"
393 393
394 "FDCT_ROW_SSE2_H1 16 64 \n\t" 394 "FDCT_ROW_SSE2_H1 16 64 \n\t"
395 "FDCT_ROW_SSE2 16 \n\t" 395 "FDCT_ROW_SSE2 16 \n\t"
396 "FDCT_ROW_SSE2_H2 112 64 \n\t" 396 "FDCT_ROW_SSE2_H2 112 64 \n\t"
397 "FDCT_ROW_SSE2 112 \n\t" 397 "FDCT_ROW_SSE2 112 \n\t"
398 398
399 "FDCT_ROW_SSE2_H1 32 128 \n\t" 399 "FDCT_ROW_SSE2_H1 32 128 \n\t"
400 "FDCT_ROW_SSE2 32 \n\t" 400 "FDCT_ROW_SSE2 32 \n\t"
401 "FDCT_ROW_SSE2_H2 96 128 \n\t" 401 "FDCT_ROW_SSE2_H2 96 128 \n\t"
402 "FDCT_ROW_SSE2 96 \n\t" 402 "FDCT_ROW_SSE2 96 \n\t"
403 403
404 "FDCT_ROW_SSE2_H1 48 192 \n\t" 404 "FDCT_ROW_SSE2_H1 48 192 \n\t"
405 "FDCT_ROW_SSE2 48 \n\t" 405 "FDCT_ROW_SSE2 48 \n\t"
406 "FDCT_ROW_SSE2_H2 80 192 \n\t" 406 "FDCT_ROW_SSE2_H2 80 192 \n\t"
407 "FDCT_ROW_SSE2 80 \n\t" 407 "FDCT_ROW_SSE2 80 \n\t"
408 : 408 :
409 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out) 409 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
410 ); 410 );
411 } 411 }
412 412
413 static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table) 413 static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
414 { 414 {