Mercurial > libavcodec.hg
comparison i386/fdct_mmx.c @ 3564:96f9bd6a9ea9 libavcodec
Add support for Mac OS X Intel part 2: Assembler macros in fdct_mmx.c
convert gas macros to ccp macros
Patch by John Dalgliesh % johnd AH defyne P org %
Original thread:
Date: Aug 10, 2006 5:39 AM
Subject: Re: [Ffmpeg-devel] Mac OS X Intel part 2: Assembler macros in fdct_mmx.c
author | gpoirier |
---|---|
date | Thu, 10 Aug 2006 11:29:57 +0000 |
parents | bfabfdf9ce55 |
children | 8a62cb7e8d0f |
comparison
equal
deleted
inserted
replaced
3563:a3db61f32f5a | 3564:96f9bd6a9ea9 |
---|---|
349 | 349 |
350 | 350 |
351 static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) | 351 static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) |
352 { | 352 { |
353 asm volatile( | 353 asm volatile( |
354 ".macro FDCT_ROW_SSE2_H1 i t \n\t" | 354 #define FDCT_ROW_SSE2_H1(i,t) \ |
355 "movq \\i(%0), %%xmm2 \n\t" | 355 "movq " #i "(%0), %%xmm2 \n\t" \ |
356 "movq \\i+8(%0), %%xmm0 \n\t" | 356 "movq " #i "+8(%0), %%xmm0 \n\t" \ |
357 "movdqa \\t+32(%1), %%xmm3 \n\t" | 357 "movdqa " #t "+32(%1), %%xmm3 \n\t" \ |
358 "movdqa \\t+48(%1), %%xmm7 \n\t" | 358 "movdqa " #t "+48(%1), %%xmm7 \n\t" \ |
359 "movdqa \\t(%1), %%xmm4 \n\t" | 359 "movdqa " #t "(%1), %%xmm4 \n\t" \ |
360 "movdqa \\t+16(%1), %%xmm5 \n\t" | 360 "movdqa " #t "+16(%1), %%xmm5 \n\t" |
361 ".endm \n\t" | 361 |
362 ".macro FDCT_ROW_SSE2_H2 i t \n\t" | 362 #define FDCT_ROW_SSE2_H2(i,t) \ |
363 "movq \\i(%0), %%xmm2 \n\t" | 363 "movq " #i "(%0), %%xmm2 \n\t" \ |
364 "movq \\i+8(%0), %%xmm0 \n\t" | 364 "movq " #i "+8(%0), %%xmm0 \n\t" \ |
365 "movdqa \\t+32(%1), %%xmm3 \n\t" | 365 "movdqa " #t "+32(%1), %%xmm3 \n\t" \ |
366 "movdqa \\t+48(%1), %%xmm7 \n\t" | 366 "movdqa " #t "+48(%1), %%xmm7 \n\t" |
367 ".endm \n\t" | 367 |
368 ".macro FDCT_ROW_SSE2 i \n\t" | 368 #define FDCT_ROW_SSE2(i) \ |
369 "movq %%xmm2, %%xmm1 \n\t" | 369 "movq %%xmm2, %%xmm1 \n\t" \ |
370 "pshuflw $27, %%xmm0, %%xmm0 \n\t" | 370 "pshuflw $27, %%xmm0, %%xmm0 \n\t" \ |
371 "paddsw %%xmm0, %%xmm1 \n\t" | 371 "paddsw %%xmm0, %%xmm1 \n\t" \ |
372 "psubsw %%xmm0, %%xmm2 \n\t" | 372 "psubsw %%xmm0, %%xmm2 \n\t" \ |
373 "punpckldq %%xmm2, %%xmm1 \n\t" | 373 "punpckldq %%xmm2, %%xmm1 \n\t" \ |
374 "pshufd $78, %%xmm1, %%xmm2 \n\t" | 374 "pshufd $78, %%xmm1, %%xmm2 \n\t" \ |
375 "pmaddwd %%xmm2, %%xmm3 \n\t" | 375 "pmaddwd %%xmm2, %%xmm3 \n\t" \ |
376 "pmaddwd %%xmm1, %%xmm7 \n\t" | 376 "pmaddwd %%xmm1, %%xmm7 \n\t" \ |
377 "pmaddwd %%xmm5, %%xmm2 \n\t" | 377 "pmaddwd %%xmm5, %%xmm2 \n\t" \ |
378 "pmaddwd %%xmm4, %%xmm1 \n\t" | 378 "pmaddwd %%xmm4, %%xmm1 \n\t" \ |
379 "paddd %%xmm7, %%xmm3 \n\t" | 379 "paddd %%xmm7, %%xmm3 \n\t" \ |
380 "paddd %%xmm2, %%xmm1 \n\t" | 380 "paddd %%xmm2, %%xmm1 \n\t" \ |
381 "paddd %%xmm6, %%xmm3 \n\t" | 381 "paddd %%xmm6, %%xmm3 \n\t" \ |
382 "paddd %%xmm6, %%xmm1 \n\t" | 382 "paddd %%xmm6, %%xmm1 \n\t" \ |
383 "psrad %3, %%xmm3 \n\t" | 383 "psrad %3, %%xmm3 \n\t" \ |
384 "psrad %3, %%xmm1 \n\t" | 384 "psrad %3, %%xmm1 \n\t" \ |
385 "packssdw %%xmm3, %%xmm1 \n\t" | 385 "packssdw %%xmm3, %%xmm1 \n\t" \ |
386 "movdqa %%xmm1, \\i(%4) \n\t" | 386 "movdqa %%xmm1, " #i "(%4) \n\t" |
387 ".endm \n\t" | 387 |
388 "movdqa (%2), %%xmm6 \n\t" | 388 "movdqa (%2), %%xmm6 \n\t" |
389 "FDCT_ROW_SSE2_H1 0 0 \n\t" | 389 FDCT_ROW_SSE2_H1(0,0) |
390 "FDCT_ROW_SSE2 0 \n\t" | 390 FDCT_ROW_SSE2(0) |
391 "FDCT_ROW_SSE2_H2 64 0 \n\t" | 391 FDCT_ROW_SSE2_H2(64,0) |
392 "FDCT_ROW_SSE2 64 \n\t" | 392 FDCT_ROW_SSE2(64) |
393 | 393 |
394 "FDCT_ROW_SSE2_H1 16 64 \n\t" | 394 FDCT_ROW_SSE2_H1(16,64) |
395 "FDCT_ROW_SSE2 16 \n\t" | 395 FDCT_ROW_SSE2(16) |
396 "FDCT_ROW_SSE2_H2 112 64 \n\t" | 396 FDCT_ROW_SSE2_H2(112,64) |
397 "FDCT_ROW_SSE2 112 \n\t" | 397 FDCT_ROW_SSE2(112) |
398 | 398 |
399 "FDCT_ROW_SSE2_H1 32 128 \n\t" | 399 FDCT_ROW_SSE2_H1(32,128) |
400 "FDCT_ROW_SSE2 32 \n\t" | 400 FDCT_ROW_SSE2(32) |
401 "FDCT_ROW_SSE2_H2 96 128 \n\t" | 401 FDCT_ROW_SSE2_H2(96,128) |
402 "FDCT_ROW_SSE2 96 \n\t" | 402 FDCT_ROW_SSE2(96) |
403 | 403 |
404 "FDCT_ROW_SSE2_H1 48 192 \n\t" | 404 FDCT_ROW_SSE2_H1(48,192) |
405 "FDCT_ROW_SSE2 48 \n\t" | 405 FDCT_ROW_SSE2(48) |
406 "FDCT_ROW_SSE2_H2 80 192 \n\t" | 406 FDCT_ROW_SSE2_H2(80,192) |
407 "FDCT_ROW_SSE2 80 \n\t" | 407 FDCT_ROW_SSE2(80) |
408 : | 408 : |
409 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out) | 409 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out) |
410 ); | 410 ); |
411 } | 411 } |
412 | 412 |