comparison x86/x86inc.asm @ 11931:980030a3e315 libavcodec

Update x264asm header files to latest versions. Modify the asm accordingly. GLOBAL is now no longoer necessary for PIC-compliant loads.
author darkshikari
date Wed, 23 Jun 2010 19:20:46 +0000
parents 669965580b72
children bc13f76ecfbf
comparison
equal deleted inserted replaced
11930:1e8556438209 11931:980030a3e315
1 ;***************************************************************************** 1 ;*****************************************************************************
2 ;* x86inc.asm 2 ;* x86inc.asm
3 ;***************************************************************************** 3 ;*****************************************************************************
4 ;* Copyright (C) 2005-2008 Loren Merritt <lorenm@u.washington.edu> 4 ;* Copyright (C) 2005-2008 x264 project
5 ;* 5 ;*
6 ;* This file is part of FFmpeg. 6 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
7 ;* Anton Mitrofanov <BugMaster@narod.ru>
7 ;* 8 ;*
8 ;* FFmpeg is free software; you can redistribute it and/or 9 ;* Permission to use, copy, modify, and/or distribute this software for any
9 ;* modify it under the terms of the GNU Lesser General Public 10 ;* purpose with or without fee is hereby granted, provided that the above
10 ;* License as published by the Free Software Foundation; either 11 ;* copyright notice and this permission notice appear in all copies.
11 ;* version 2.1 of the License, or (at your option) any later version.
12 ;* 12 ;*
13 ;* FFmpeg is distributed in the hope that it will be useful, 13 ;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of 14 ;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 ;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
16 ;* Lesser General Public License for more details. 16 ;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17 ;* 17 ;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18 ;* You should have received a copy of the GNU Lesser General Public 18 ;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19 ;* License along with FFmpeg; if not, write to the Free Software 19 ;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;***************************************************************************** 20 ;*****************************************************************************
21
22 ; This is a header file for the x264ASM assembly language, which uses
23 ; NASM/YASM syntax combined with a large number of macros to provide easy
24 ; abstraction between different calling conventions (x86_32, win64, linux64).
25 ; It also has various other useful features to simplify writing the kind of
26 ; DSP functions that are most often used in x264.
27
28 ; Unlike the rest of x264, this file is available under an ISC license, as it
29 ; has significant usefulness outside of x264 and we want it to be available
30 ; to the largest audience possible. Of course, if you modify it for your own
31 ; purposes to add a new feature, we strongly encourage contributing a patch
32 ; as this feature might be useful for others as well. Send patches or ideas
33 ; to x264-devel@videolan.org .
34
35 %define program_name ff
22 36
23 %ifdef ARCH_X86_64 37 %ifdef ARCH_X86_64
24 %ifidn __OUTPUT_FORMAT__,win32 38 %ifidn __OUTPUT_FORMAT__,win32
25 %define WIN64 39 %define WIN64
26 %else 40 %else
27 %define UNIX64 41 %define UNIX64
28 %endif 42 %endif
43 %endif
44
45 %ifdef PREFIX
46 %define mangle(x) _ %+ x
47 %else
48 %define mangle(x) x
29 %endif 49 %endif
30 50
31 ; FIXME: All of the 64bit asm functions that take a stride as an argument 51 ; FIXME: All of the 64bit asm functions that take a stride as an argument
32 ; via register, assume that the high dword of that register is filled with 0. 52 ; via register, assume that the high dword of that register is filled with 0.
33 ; This is true in practice (since we never do any 64bit arithmetic on strides, 53 ; This is true in practice (since we never do any 64bit arithmetic on strides,
45 %else 65 %else
46 SECTION .rodata align=%1 66 SECTION .rodata align=%1
47 %endif 67 %endif
48 %endmacro 68 %endmacro
49 69
50 ; PIC support macros. 70 %ifdef WIN64
51 ; x86_64 can't fit 64bit address literals in most instruction types, 71 %define PIC
52 ; so shared objects (under the assumption that they might be anywhere 72 %elifndef ARCH_X86_64
53 ; in memory) must use an address mode that does fit.
54 ; So all accesses to global variables must use this macro, e.g.
55 ; mov eax, [foo GLOBAL]
56 ; instead of
57 ; mov eax, [foo]
58 ;
59 ; x86_32 doesn't require PIC. 73 ; x86_32 doesn't require PIC.
60 ; Some distros prefer shared objects to be PIC, but nothing breaks if 74 ; Some distros prefer shared objects to be PIC, but nothing breaks if
61 ; the code contains a few textrels, so we'll skip that complexity. 75 ; the code contains a few textrels, so we'll skip that complexity.
62
63 %ifdef WIN64
64 %define PIC
65 %elifndef ARCH_X86_64
66 %undef PIC 76 %undef PIC
67 %endif 77 %endif
68 %ifdef PIC 78 %ifdef PIC
69 %define GLOBAL wrt rip 79 default rel
70 %else
71 %define GLOBAL
72 %endif 80 %endif
73 81
74 ; Macros to eliminate most code duplication between x86_32 and x86_64: 82 ; Macros to eliminate most code duplication between x86_32 and x86_64:
75 ; Currently this works only for leaf functions which load all their arguments 83 ; Currently this works only for leaf functions which load all their arguments
76 ; into registers at the start, and make no other use of the stack. Luckily that 84 ; into registers at the start, and make no other use of the stack. Luckily that
161 %define t%1b t%1 %+ b 169 %define t%1b t%1 %+ b
162 %rotate 1 170 %rotate 1
163 %endrep 171 %endrep
164 %endmacro 172 %endmacro
165 173
166 DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7 174 DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9
167 175
168 %ifdef ARCH_X86_64 176 %ifdef ARCH_X86_64
169 %define gprsize 8 177 %define gprsize 8
170 %else 178 %else
171 %define gprsize 4 179 %define gprsize 4
257 %if %1 < %2 265 %if %1 < %2
258 mov r%1, [rsp + stack_offset + 8 + %1*8] 266 mov r%1, [rsp + stack_offset + 8 + %1*8]
259 %endif 267 %endif
260 %endmacro 268 %endmacro
261 269
262 %macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names... 270 %macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names...
263 ASSERT %2 >= %1 271 ASSERT %2 >= %1
264 %assign regs_used %2 272 %assign regs_used %2
265 ASSERT regs_used <= 7 273 ASSERT regs_used <= 7
266 %if %0 > 2 274 %assign xmm_regs_used %3
267 %assign xmm_regs_used %3
268 %else
269 %assign xmm_regs_used 0
270 %endif
271 ASSERT xmm_regs_used <= 16 275 ASSERT xmm_regs_used <= 16
272 %if regs_used > 4 276 %if regs_used > 4
273 push r4 277 push r4
274 push r5 278 push r5
275 %assign stack_offset stack_offset+16 279 %assign stack_offset stack_offset+16
386 %if %1 < %2 390 %if %1 < %2
387 mov r%1, [esp + stack_offset + 4 + %1*4] 391 mov r%1, [esp + stack_offset + 4 + %1*4]
388 %endif 392 %endif
389 %endmacro 393 %endmacro
390 394
391 %macro PROLOGUE 2-4+ ; #args, #regs, arg_names... 395 %macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
392 ASSERT %2 >= %1 396 ASSERT %2 >= %1
393 %assign regs_used %2 397 %assign regs_used %2
394 ASSERT regs_used <= 7 398 ASSERT regs_used <= 7
395 PUSH_IF_USED 3 399 PUSH_IF_USED 3
396 PUSH_IF_USED 4 400 PUSH_IF_USED 4
432 436
433 %assign function_align 16 437 %assign function_align 16
434 438
435 ; Symbol prefix for C linkage 439 ; Symbol prefix for C linkage
436 %macro cglobal 1-2+ 440 %macro cglobal 1-2+
437 %xdefine %1 ff_%1 441 %xdefine %1 mangle(program_name %+ _ %+ %1)
438 %ifdef PREFIX
439 %xdefine %1 _ %+ %1
440 %endif
441 %xdefine %1.skip_prologue %1 %+ .skip_prologue 442 %xdefine %1.skip_prologue %1 %+ .skip_prologue
442 %ifidn __OUTPUT_FORMAT__,elf 443 %ifidn __OUTPUT_FORMAT__,elf
443 global %1:function hidden 444 global %1:function hidden
444 %else 445 %else
445 global %1 446 global %1
452 PROLOGUE %2 453 PROLOGUE %2
453 %endif 454 %endif
454 %endmacro 455 %endmacro
455 456
456 %macro cextern 1 457 %macro cextern 1
457 %ifdef PREFIX 458 %xdefine %1 mangle(program_name %+ _ %+ %1)
458 %xdefine %1 _%1
459 %endif
460 extern %1 459 extern %1
460 %endmacro
461
462 ;like cextern, but without the prefix
463 %macro cextern_naked 1
464 %xdefine %1 mangle(%1)
465 extern %1
466 %endmacro
467
468 %macro const 2+
469 %xdefine %1 mangle(program_name %+ _ %+ %1)
470 global %1
471 %1: %2
461 %endmacro 472 %endmacro
462 473
463 ; This is needed for ELF, otherwise the GNU linker assumes the stack is 474 ; This is needed for ELF, otherwise the GNU linker assumes the stack is
464 ; executable by default. 475 ; executable by default.
465 %ifidn __OUTPUT_FORMAT__,elf 476 %ifidn __OUTPUT_FORMAT__,elf
466 SECTION .note.GNU-stack noalloc noexec nowrite progbits 477 SECTION .note.GNU-stack noalloc noexec nowrite progbits
467 %endif 478 %endif
468
469 %assign FENC_STRIDE 16
470 %assign FDEC_STRIDE 32
471 479
472 ; merge mmx and sse* 480 ; merge mmx and sse*
473 481
474 %macro CAT_XDEFINE 3 482 %macro CAT_XDEFINE 3
475 %xdefine %1%2 %3 483 %xdefine %1%2 %3
573 %undef tmp 581 %undef tmp
574 %rotate 1 582 %rotate 1
575 %endrep 583 %endrep
576 %endmacro 584 %endmacro
577 585
578 %macro SAVE_MM_PERMUTATION 1 586 ; If SAVE_MM_PERMUTATION is placed at the end of a function and given the
587 ; function name, then any later calls to that function will automatically
588 ; load the permutation, so values can be returned in mmregs.
589 %macro SAVE_MM_PERMUTATION 1 ; name to save as
579 %assign %%i 0 590 %assign %%i 0
580 %rep num_mmregs 591 %rep num_mmregs
581 CAT_XDEFINE %1_m, %%i, m %+ %%i 592 CAT_XDEFINE %1_m, %%i, m %+ %%i
582 %assign %%i %%i+1 593 %assign %%i %%i+1
583 %endrep 594 %endrep
584 %endmacro 595 %endmacro
585 596
586 %macro LOAD_MM_PERMUTATION 1 597 %macro LOAD_MM_PERMUTATION 1 ; name to load from
587 %assign %%i 0 598 %assign %%i 0
588 %rep num_mmregs 599 %rep num_mmregs
589 CAT_XDEFINE m, %%i, %1_m %+ %%i 600 CAT_XDEFINE m, %%i, %1_m %+ %%i
590 CAT_XDEFINE n, m %+ %%i, %%i 601 CAT_XDEFINE n, m %+ %%i, %%i
591 %assign %%i %%i+1 602 %assign %%i %%i+1
597 %ifdef %1_m0 608 %ifdef %1_m0
598 LOAD_MM_PERMUTATION %1 609 LOAD_MM_PERMUTATION %1
599 %endif 610 %endif
600 %endmacro 611 %endmacro
601 612
602 ;Substitutions that reduce instruction size but are functionally equivalent 613 ; Substitutions that reduce instruction size but are functionally equivalent
603 %macro add 2 614 %macro add 2
604 %ifnum %2 615 %ifnum %2
605 %if %2==128 616 %if %2==128
606 sub %1, -128 617 sub %1, -128
607 %else 618 %else