comparison i386/fdct_mmx.c @ 3564:96f9bd6a9ea9 libavcodec

Add support for Mac OS X Intel part 2: Assembler macros in fdct_mmx.c convert gas macros to ccp macros Patch by John Dalgliesh % johnd AH defyne P org % Original thread: Date: Aug 10, 2006 5:39 AM Subject: Re: [Ffmpeg-devel] Mac OS X Intel part 2: Assembler macros in fdct_mmx.c
author gpoirier
date Thu, 10 Aug 2006 11:29:57 +0000
parents bfabfdf9ce55
children 8a62cb7e8d0f
comparison
equal deleted inserted replaced
3563:a3db61f32f5a 3564:96f9bd6a9ea9
349 349
350 350
351 static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) 351 static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
352 { 352 {
353 asm volatile( 353 asm volatile(
354 ".macro FDCT_ROW_SSE2_H1 i t \n\t" 354 #define FDCT_ROW_SSE2_H1(i,t) \
355 "movq \\i(%0), %%xmm2 \n\t" 355 "movq " #i "(%0), %%xmm2 \n\t" \
356 "movq \\i+8(%0), %%xmm0 \n\t" 356 "movq " #i "+8(%0), %%xmm0 \n\t" \
357 "movdqa \\t+32(%1), %%xmm3 \n\t" 357 "movdqa " #t "+32(%1), %%xmm3 \n\t" \
358 "movdqa \\t+48(%1), %%xmm7 \n\t" 358 "movdqa " #t "+48(%1), %%xmm7 \n\t" \
359 "movdqa \\t(%1), %%xmm4 \n\t" 359 "movdqa " #t "(%1), %%xmm4 \n\t" \
360 "movdqa \\t+16(%1), %%xmm5 \n\t" 360 "movdqa " #t "+16(%1), %%xmm5 \n\t"
361 ".endm \n\t" 361
362 ".macro FDCT_ROW_SSE2_H2 i t \n\t" 362 #define FDCT_ROW_SSE2_H2(i,t) \
363 "movq \\i(%0), %%xmm2 \n\t" 363 "movq " #i "(%0), %%xmm2 \n\t" \
364 "movq \\i+8(%0), %%xmm0 \n\t" 364 "movq " #i "+8(%0), %%xmm0 \n\t" \
365 "movdqa \\t+32(%1), %%xmm3 \n\t" 365 "movdqa " #t "+32(%1), %%xmm3 \n\t" \
366 "movdqa \\t+48(%1), %%xmm7 \n\t" 366 "movdqa " #t "+48(%1), %%xmm7 \n\t"
367 ".endm \n\t" 367
368 ".macro FDCT_ROW_SSE2 i \n\t" 368 #define FDCT_ROW_SSE2(i) \
369 "movq %%xmm2, %%xmm1 \n\t" 369 "movq %%xmm2, %%xmm1 \n\t" \
370 "pshuflw $27, %%xmm0, %%xmm0 \n\t" 370 "pshuflw $27, %%xmm0, %%xmm0 \n\t" \
371 "paddsw %%xmm0, %%xmm1 \n\t" 371 "paddsw %%xmm0, %%xmm1 \n\t" \
372 "psubsw %%xmm0, %%xmm2 \n\t" 372 "psubsw %%xmm0, %%xmm2 \n\t" \
373 "punpckldq %%xmm2, %%xmm1 \n\t" 373 "punpckldq %%xmm2, %%xmm1 \n\t" \
374 "pshufd $78, %%xmm1, %%xmm2 \n\t" 374 "pshufd $78, %%xmm1, %%xmm2 \n\t" \
375 "pmaddwd %%xmm2, %%xmm3 \n\t" 375 "pmaddwd %%xmm2, %%xmm3 \n\t" \
376 "pmaddwd %%xmm1, %%xmm7 \n\t" 376 "pmaddwd %%xmm1, %%xmm7 \n\t" \
377 "pmaddwd %%xmm5, %%xmm2 \n\t" 377 "pmaddwd %%xmm5, %%xmm2 \n\t" \
378 "pmaddwd %%xmm4, %%xmm1 \n\t" 378 "pmaddwd %%xmm4, %%xmm1 \n\t" \
379 "paddd %%xmm7, %%xmm3 \n\t" 379 "paddd %%xmm7, %%xmm3 \n\t" \
380 "paddd %%xmm2, %%xmm1 \n\t" 380 "paddd %%xmm2, %%xmm1 \n\t" \
381 "paddd %%xmm6, %%xmm3 \n\t" 381 "paddd %%xmm6, %%xmm3 \n\t" \
382 "paddd %%xmm6, %%xmm1 \n\t" 382 "paddd %%xmm6, %%xmm1 \n\t" \
383 "psrad %3, %%xmm3 \n\t" 383 "psrad %3, %%xmm3 \n\t" \
384 "psrad %3, %%xmm1 \n\t" 384 "psrad %3, %%xmm1 \n\t" \
385 "packssdw %%xmm3, %%xmm1 \n\t" 385 "packssdw %%xmm3, %%xmm1 \n\t" \
386 "movdqa %%xmm1, \\i(%4) \n\t" 386 "movdqa %%xmm1, " #i "(%4) \n\t"
387 ".endm \n\t" 387
388 "movdqa (%2), %%xmm6 \n\t" 388 "movdqa (%2), %%xmm6 \n\t"
389 "FDCT_ROW_SSE2_H1 0 0 \n\t" 389 FDCT_ROW_SSE2_H1(0,0)
390 "FDCT_ROW_SSE2 0 \n\t" 390 FDCT_ROW_SSE2(0)
391 "FDCT_ROW_SSE2_H2 64 0 \n\t" 391 FDCT_ROW_SSE2_H2(64,0)
392 "FDCT_ROW_SSE2 64 \n\t" 392 FDCT_ROW_SSE2(64)
393 393
394 "FDCT_ROW_SSE2_H1 16 64 \n\t" 394 FDCT_ROW_SSE2_H1(16,64)
395 "FDCT_ROW_SSE2 16 \n\t" 395 FDCT_ROW_SSE2(16)
396 "FDCT_ROW_SSE2_H2 112 64 \n\t" 396 FDCT_ROW_SSE2_H2(112,64)
397 "FDCT_ROW_SSE2 112 \n\t" 397 FDCT_ROW_SSE2(112)
398 398
399 "FDCT_ROW_SSE2_H1 32 128 \n\t" 399 FDCT_ROW_SSE2_H1(32,128)
400 "FDCT_ROW_SSE2 32 \n\t" 400 FDCT_ROW_SSE2(32)
401 "FDCT_ROW_SSE2_H2 96 128 \n\t" 401 FDCT_ROW_SSE2_H2(96,128)
402 "FDCT_ROW_SSE2 96 \n\t" 402 FDCT_ROW_SSE2(96)
403 403
404 "FDCT_ROW_SSE2_H1 48 192 \n\t" 404 FDCT_ROW_SSE2_H1(48,192)
405 "FDCT_ROW_SSE2 48 \n\t" 405 FDCT_ROW_SSE2(48)
406 "FDCT_ROW_SSE2_H2 80 192 \n\t" 406 FDCT_ROW_SSE2_H2(80,192)
407 "FDCT_ROW_SSE2 80 \n\t" 407 FDCT_ROW_SSE2(80)
408 : 408 :
409 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out) 409 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
410 ); 410 );
411 } 411 }
412 412