Mercurial > libavcodec.hg
annotate x86/h264_i386.h @ 10903:8c8321b94c35 libavcodec
Mark a few functions as noinline, this makes ff_h264_filter_mb() a bit smaller
and 5% faster.
ff_h264_filter_mb_fast() stay the same size as gcc decided not to inline these
functions there in the first place.
author | michael |
---|---|
date | Sat, 16 Jan 2010 17:27:17 +0000 |
parents | e9d9d946f213 |
children | 7dd2a45249a9 |
rev | line source |
---|---|
8430 | 1 /* |
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder | |
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 /** | |
8718
e9d9d946f213
Use full internal pathname in doxygen @file directives.
diego
parents:
8590
diff
changeset
|
23 * @file libavcodec/x86/h264_i386.h |
8430 | 24 * H.264 / AVC / MPEG4 part10 codec. |
25 * non-MMX i386-specific optimizations for H.264 | |
26 * @author Michael Niedermayer <michaelni@gmx.at> | |
27 */ | |
28 | |
29 #ifndef AVCODEC_X86_H264_I386_H | |
30 #define AVCODEC_X86_H264_I386_H | |
31 | |
32 #include "libavcodec/cabac.h" | |
33 | |
34 //FIXME use some macros to avoid duplicating get_cabac (cannot be done yet | |
35 //as that would make optimization work hard) | |
8590 | 36 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) |
8430 | 37 static int decode_significance_x86(CABACContext *c, int max_coeff, |
38 uint8_t *significant_coeff_ctx_base, | |
39 int *index){ | |
40 void *end= significant_coeff_ctx_base + max_coeff - 1; | |
41 int minusstart= -(int)significant_coeff_ctx_base; | |
42 int minusindex= 4-(int)index; | |
43 int coeff_count; | |
44 __asm__ volatile( | |
45 "movl "RANGE "(%3), %%esi \n\t" | |
46 "movl "LOW "(%3), %%ebx \n\t" | |
47 | |
48 "2: \n\t" | |
49 | |
50 BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", | |
51 "%%bx", "%%esi", "%%eax", "%%al") | |
52 | |
53 "test $1, %%edx \n\t" | |
54 " jz 3f \n\t" | |
55 | |
56 BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx", | |
57 "%%bx", "%%esi", "%%eax", "%%al") | |
58 | |
59 "mov %2, %%"REG_a" \n\t" | |
60 "movl %4, %%ecx \n\t" | |
61 "add %1, %%"REG_c" \n\t" | |
62 "movl %%ecx, (%%"REG_a") \n\t" | |
63 | |
64 "test $1, %%edx \n\t" | |
65 " jnz 4f \n\t" | |
66 | |
67 "add $4, %%"REG_a" \n\t" | |
68 "mov %%"REG_a", %2 \n\t" | |
69 | |
70 "3: \n\t" | |
71 "add $1, %1 \n\t" | |
72 "cmp %5, %1 \n\t" | |
73 " jb 2b \n\t" | |
74 "mov %2, %%"REG_a" \n\t" | |
75 "movl %4, %%ecx \n\t" | |
76 "add %1, %%"REG_c" \n\t" | |
77 "movl %%ecx, (%%"REG_a") \n\t" | |
78 "4: \n\t" | |
79 "add %6, %%eax \n\t" | |
80 "shr $2, %%eax \n\t" | |
81 | |
82 "movl %%esi, "RANGE "(%3) \n\t" | |
83 "movl %%ebx, "LOW "(%3) \n\t" | |
84 :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index) | |
85 :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex) | |
86 : "%"REG_c, "%ebx", "%edx", "%esi", "memory" | |
87 ); | |
88 return coeff_count; | |
89 } | |
90 | |
91 static int decode_significance_8x8_x86(CABACContext *c, | |
92 uint8_t *significant_coeff_ctx_base, | |
93 int *index, const uint8_t *sig_off){ | |
94 int minusindex= 4-(int)index; | |
95 int coeff_count; | |
96 x86_reg last=0; | |
97 __asm__ volatile( | |
98 "movl "RANGE "(%3), %%esi \n\t" | |
99 "movl "LOW "(%3), %%ebx \n\t" | |
100 | |
101 "mov %1, %%"REG_D" \n\t" | |
102 "2: \n\t" | |
103 | |
104 "mov %6, %%"REG_a" \n\t" | |
105 "movzbl (%%"REG_a", %%"REG_D"), %%edi \n\t" | |
106 "add %5, %%"REG_D" \n\t" | |
107 | |
108 BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx", | |
109 "%%bx", "%%esi", "%%eax", "%%al") | |
110 | |
111 "mov %1, %%edi \n\t" | |
112 "test $1, %%edx \n\t" | |
113 " jz 3f \n\t" | |
114 | |
115 "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t" | |
116 "add %5, %%"REG_D" \n\t" | |
117 | |
118 BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx", | |
119 "%%bx", "%%esi", "%%eax", "%%al") | |
120 | |
121 "mov %2, %%"REG_a" \n\t" | |
122 "mov %1, %%edi \n\t" | |
123 "movl %%edi, (%%"REG_a") \n\t" | |
124 | |
125 "test $1, %%edx \n\t" | |
126 " jnz 4f \n\t" | |
127 | |
128 "add $4, %%"REG_a" \n\t" | |
129 "mov %%"REG_a", %2 \n\t" | |
130 | |
131 "3: \n\t" | |
132 "addl $1, %%edi \n\t" | |
133 "mov %%edi, %1 \n\t" | |
134 "cmpl $63, %%edi \n\t" | |
135 " jb 2b \n\t" | |
136 "mov %2, %%"REG_a" \n\t" | |
137 "movl %%edi, (%%"REG_a") \n\t" | |
138 "4: \n\t" | |
139 "addl %4, %%eax \n\t" | |
140 "shr $2, %%eax \n\t" | |
141 | |
142 "movl %%esi, "RANGE "(%3) \n\t" | |
143 "movl %%ebx, "LOW "(%3) \n\t" | |
144 :"=&a"(coeff_count),"+m"(last), "+m"(index) | |
145 :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off) | |
146 : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory" | |
147 ); | |
148 return coeff_count; | |
149 } | |
8590 | 150 #endif /* ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE */ |
151 /* !defined(BROKEN_RELOCATIONS) */ | |
8430 | 152 |
153 #endif /* AVCODEC_X86_H264_I386_H */ |