Mercurial > libavcodec.hg
comparison x86/x86inc.asm @ 11931:980030a3e315 libavcodec
Update x264asm header files to latest versions.
Modify the asm accordingly.
GLOBAL is now no longoer necessary for PIC-compliant loads.
author | darkshikari |
---|---|
date | Wed, 23 Jun 2010 19:20:46 +0000 |
parents | 669965580b72 |
children | bc13f76ecfbf |
comparison
equal
deleted
inserted
replaced
11930:1e8556438209 | 11931:980030a3e315 |
---|---|
1 ;***************************************************************************** | 1 ;***************************************************************************** |
2 ;* x86inc.asm | 2 ;* x86inc.asm |
3 ;***************************************************************************** | 3 ;***************************************************************************** |
4 ;* Copyright (C) 2005-2008 Loren Merritt <lorenm@u.washington.edu> | 4 ;* Copyright (C) 2005-2008 x264 project |
5 ;* | 5 ;* |
6 ;* This file is part of FFmpeg. | 6 ;* Authors: Loren Merritt <lorenm@u.washington.edu> |
7 ;* Anton Mitrofanov <BugMaster@narod.ru> | |
7 ;* | 8 ;* |
8 ;* FFmpeg is free software; you can redistribute it and/or | 9 ;* Permission to use, copy, modify, and/or distribute this software for any |
9 ;* modify it under the terms of the GNU Lesser General Public | 10 ;* purpose with or without fee is hereby granted, provided that the above |
10 ;* License as published by the Free Software Foundation; either | 11 ;* copyright notice and this permission notice appear in all copies. |
11 ;* version 2.1 of the License, or (at your option) any later version. | |
12 ;* | 12 ;* |
13 ;* FFmpeg is distributed in the hope that it will be useful, | 13 ;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | 14 ;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 15 ;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
16 ;* Lesser General Public License for more details. | 16 ;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
17 ;* | 17 ;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
18 ;* You should have received a copy of the GNU Lesser General Public | 18 ;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
19 ;* License along with FFmpeg; if not, write to the Free Software | 19 ;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
20 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 ;***************************************************************************** | 20 ;***************************************************************************** |
21 | |
22 ; This is a header file for the x264ASM assembly language, which uses | |
23 ; NASM/YASM syntax combined with a large number of macros to provide easy | |
24 ; abstraction between different calling conventions (x86_32, win64, linux64). | |
25 ; It also has various other useful features to simplify writing the kind of | |
26 ; DSP functions that are most often used in x264. | |
27 | |
28 ; Unlike the rest of x264, this file is available under an ISC license, as it | |
29 ; has significant usefulness outside of x264 and we want it to be available | |
30 ; to the largest audience possible. Of course, if you modify it for your own | |
31 ; purposes to add a new feature, we strongly encourage contributing a patch | |
32 ; as this feature might be useful for others as well. Send patches or ideas | |
33 ; to x264-devel@videolan.org . | |
34 | |
35 %define program_name ff | |
22 | 36 |
23 %ifdef ARCH_X86_64 | 37 %ifdef ARCH_X86_64 |
24 %ifidn __OUTPUT_FORMAT__,win32 | 38 %ifidn __OUTPUT_FORMAT__,win32 |
25 %define WIN64 | 39 %define WIN64 |
26 %else | 40 %else |
27 %define UNIX64 | 41 %define UNIX64 |
28 %endif | 42 %endif |
43 %endif | |
44 | |
45 %ifdef PREFIX | |
46 %define mangle(x) _ %+ x | |
47 %else | |
48 %define mangle(x) x | |
29 %endif | 49 %endif |
30 | 50 |
31 ; FIXME: All of the 64bit asm functions that take a stride as an argument | 51 ; FIXME: All of the 64bit asm functions that take a stride as an argument |
32 ; via register, assume that the high dword of that register is filled with 0. | 52 ; via register, assume that the high dword of that register is filled with 0. |
33 ; This is true in practice (since we never do any 64bit arithmetic on strides, | 53 ; This is true in practice (since we never do any 64bit arithmetic on strides, |
45 %else | 65 %else |
46 SECTION .rodata align=%1 | 66 SECTION .rodata align=%1 |
47 %endif | 67 %endif |
48 %endmacro | 68 %endmacro |
49 | 69 |
50 ; PIC support macros. | 70 %ifdef WIN64 |
51 ; x86_64 can't fit 64bit address literals in most instruction types, | 71 %define PIC |
52 ; so shared objects (under the assumption that they might be anywhere | 72 %elifndef ARCH_X86_64 |
53 ; in memory) must use an address mode that does fit. | |
54 ; So all accesses to global variables must use this macro, e.g. | |
55 ; mov eax, [foo GLOBAL] | |
56 ; instead of | |
57 ; mov eax, [foo] | |
58 ; | |
59 ; x86_32 doesn't require PIC. | 73 ; x86_32 doesn't require PIC. |
60 ; Some distros prefer shared objects to be PIC, but nothing breaks if | 74 ; Some distros prefer shared objects to be PIC, but nothing breaks if |
61 ; the code contains a few textrels, so we'll skip that complexity. | 75 ; the code contains a few textrels, so we'll skip that complexity. |
62 | |
63 %ifdef WIN64 | |
64 %define PIC | |
65 %elifndef ARCH_X86_64 | |
66 %undef PIC | 76 %undef PIC |
67 %endif | 77 %endif |
68 %ifdef PIC | 78 %ifdef PIC |
69 %define GLOBAL wrt rip | 79 default rel |
70 %else | |
71 %define GLOBAL | |
72 %endif | 80 %endif |
73 | 81 |
74 ; Macros to eliminate most code duplication between x86_32 and x86_64: | 82 ; Macros to eliminate most code duplication between x86_32 and x86_64: |
75 ; Currently this works only for leaf functions which load all their arguments | 83 ; Currently this works only for leaf functions which load all their arguments |
76 ; into registers at the start, and make no other use of the stack. Luckily that | 84 ; into registers at the start, and make no other use of the stack. Luckily that |
161 %define t%1b t%1 %+ b | 169 %define t%1b t%1 %+ b |
162 %rotate 1 | 170 %rotate 1 |
163 %endrep | 171 %endrep |
164 %endmacro | 172 %endmacro |
165 | 173 |
166 DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7 | 174 DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9 |
167 | 175 |
168 %ifdef ARCH_X86_64 | 176 %ifdef ARCH_X86_64 |
169 %define gprsize 8 | 177 %define gprsize 8 |
170 %else | 178 %else |
171 %define gprsize 4 | 179 %define gprsize 4 |
257 %if %1 < %2 | 265 %if %1 < %2 |
258 mov r%1, [rsp + stack_offset + 8 + %1*8] | 266 mov r%1, [rsp + stack_offset + 8 + %1*8] |
259 %endif | 267 %endif |
260 %endmacro | 268 %endmacro |
261 | 269 |
262 %macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names... | 270 %macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names... |
263 ASSERT %2 >= %1 | 271 ASSERT %2 >= %1 |
264 %assign regs_used %2 | 272 %assign regs_used %2 |
265 ASSERT regs_used <= 7 | 273 ASSERT regs_used <= 7 |
266 %if %0 > 2 | 274 %assign xmm_regs_used %3 |
267 %assign xmm_regs_used %3 | |
268 %else | |
269 %assign xmm_regs_used 0 | |
270 %endif | |
271 ASSERT xmm_regs_used <= 16 | 275 ASSERT xmm_regs_used <= 16 |
272 %if regs_used > 4 | 276 %if regs_used > 4 |
273 push r4 | 277 push r4 |
274 push r5 | 278 push r5 |
275 %assign stack_offset stack_offset+16 | 279 %assign stack_offset stack_offset+16 |
386 %if %1 < %2 | 390 %if %1 < %2 |
387 mov r%1, [esp + stack_offset + 4 + %1*4] | 391 mov r%1, [esp + stack_offset + 4 + %1*4] |
388 %endif | 392 %endif |
389 %endmacro | 393 %endmacro |
390 | 394 |
391 %macro PROLOGUE 2-4+ ; #args, #regs, arg_names... | 395 %macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names... |
392 ASSERT %2 >= %1 | 396 ASSERT %2 >= %1 |
393 %assign regs_used %2 | 397 %assign regs_used %2 |
394 ASSERT regs_used <= 7 | 398 ASSERT regs_used <= 7 |
395 PUSH_IF_USED 3 | 399 PUSH_IF_USED 3 |
396 PUSH_IF_USED 4 | 400 PUSH_IF_USED 4 |
432 | 436 |
433 %assign function_align 16 | 437 %assign function_align 16 |
434 | 438 |
435 ; Symbol prefix for C linkage | 439 ; Symbol prefix for C linkage |
436 %macro cglobal 1-2+ | 440 %macro cglobal 1-2+ |
437 %xdefine %1 ff_%1 | 441 %xdefine %1 mangle(program_name %+ _ %+ %1) |
438 %ifdef PREFIX | |
439 %xdefine %1 _ %+ %1 | |
440 %endif | |
441 %xdefine %1.skip_prologue %1 %+ .skip_prologue | 442 %xdefine %1.skip_prologue %1 %+ .skip_prologue |
442 %ifidn __OUTPUT_FORMAT__,elf | 443 %ifidn __OUTPUT_FORMAT__,elf |
443 global %1:function hidden | 444 global %1:function hidden |
444 %else | 445 %else |
445 global %1 | 446 global %1 |
452 PROLOGUE %2 | 453 PROLOGUE %2 |
453 %endif | 454 %endif |
454 %endmacro | 455 %endmacro |
455 | 456 |
456 %macro cextern 1 | 457 %macro cextern 1 |
457 %ifdef PREFIX | 458 %xdefine %1 mangle(program_name %+ _ %+ %1) |
458 %xdefine %1 _%1 | |
459 %endif | |
460 extern %1 | 459 extern %1 |
460 %endmacro | |
461 | |
462 ;like cextern, but without the prefix | |
463 %macro cextern_naked 1 | |
464 %xdefine %1 mangle(%1) | |
465 extern %1 | |
466 %endmacro | |
467 | |
468 %macro const 2+ | |
469 %xdefine %1 mangle(program_name %+ _ %+ %1) | |
470 global %1 | |
471 %1: %2 | |
461 %endmacro | 472 %endmacro |
462 | 473 |
463 ; This is needed for ELF, otherwise the GNU linker assumes the stack is | 474 ; This is needed for ELF, otherwise the GNU linker assumes the stack is |
464 ; executable by default. | 475 ; executable by default. |
465 %ifidn __OUTPUT_FORMAT__,elf | 476 %ifidn __OUTPUT_FORMAT__,elf |
466 SECTION .note.GNU-stack noalloc noexec nowrite progbits | 477 SECTION .note.GNU-stack noalloc noexec nowrite progbits |
467 %endif | 478 %endif |
468 | |
469 %assign FENC_STRIDE 16 | |
470 %assign FDEC_STRIDE 32 | |
471 | 479 |
472 ; merge mmx and sse* | 480 ; merge mmx and sse* |
473 | 481 |
474 %macro CAT_XDEFINE 3 | 482 %macro CAT_XDEFINE 3 |
475 %xdefine %1%2 %3 | 483 %xdefine %1%2 %3 |
573 %undef tmp | 581 %undef tmp |
574 %rotate 1 | 582 %rotate 1 |
575 %endrep | 583 %endrep |
576 %endmacro | 584 %endmacro |
577 | 585 |
578 %macro SAVE_MM_PERMUTATION 1 | 586 ; If SAVE_MM_PERMUTATION is placed at the end of a function and given the |
587 ; function name, then any later calls to that function will automatically | |
588 ; load the permutation, so values can be returned in mmregs. | |
589 %macro SAVE_MM_PERMUTATION 1 ; name to save as | |
579 %assign %%i 0 | 590 %assign %%i 0 |
580 %rep num_mmregs | 591 %rep num_mmregs |
581 CAT_XDEFINE %1_m, %%i, m %+ %%i | 592 CAT_XDEFINE %1_m, %%i, m %+ %%i |
582 %assign %%i %%i+1 | 593 %assign %%i %%i+1 |
583 %endrep | 594 %endrep |
584 %endmacro | 595 %endmacro |
585 | 596 |
586 %macro LOAD_MM_PERMUTATION 1 | 597 %macro LOAD_MM_PERMUTATION 1 ; name to load from |
587 %assign %%i 0 | 598 %assign %%i 0 |
588 %rep num_mmregs | 599 %rep num_mmregs |
589 CAT_XDEFINE m, %%i, %1_m %+ %%i | 600 CAT_XDEFINE m, %%i, %1_m %+ %%i |
590 CAT_XDEFINE n, m %+ %%i, %%i | 601 CAT_XDEFINE n, m %+ %%i, %%i |
591 %assign %%i %%i+1 | 602 %assign %%i %%i+1 |
597 %ifdef %1_m0 | 608 %ifdef %1_m0 |
598 LOAD_MM_PERMUTATION %1 | 609 LOAD_MM_PERMUTATION %1 |
599 %endif | 610 %endif |
600 %endmacro | 611 %endmacro |
601 | 612 |
602 ;Substitutions that reduce instruction size but are functionally equivalent | 613 ; Substitutions that reduce instruction size but are functionally equivalent |
603 %macro add 2 | 614 %macro add 2 |
604 %ifnum %2 | 615 %ifnum %2 |
605 %if %2==128 | 616 %if %2==128 |
606 sub %1, -128 | 617 sub %1, -128 |
607 %else | 618 %else |