Mercurial > libavcodec.hg
comparison x86/h264_intrapred.asm @ 12035:09705b027344 libavcodec
Fix h264/vp8 intra pred on Athlon XP
Whose idea was it to have a CPU that didn't SIGILL on an invalid instruction?
author | darkshikari |
---|---|
date | Thu, 01 Jul 2010 10:29:47 +0000 |
parents | 2002ea7c06f6 |
children |
comparison
equal
deleted
inserted
replaced
12034:f6ae68a7b1fd | 12035:09705b027344 |
---|---|
113 | 113 |
114 ;----------------------------------------------------------------------------- | 114 ;----------------------------------------------------------------------------- |
115 ; void pred16x16_dc(uint8_t *src, int stride) | 115 ; void pred16x16_dc(uint8_t *src, int stride) |
116 ;----------------------------------------------------------------------------- | 116 ;----------------------------------------------------------------------------- |
117 | 117 |
118 %macro PRED16x16_DC 2 | 118 %macro PRED16x16_DC 1 |
119 cglobal pred16x16_dc_%1, 2,7 | 119 cglobal pred16x16_dc_%1, 2,7 |
120 mov r4, r0 | 120 mov r4, r0 |
121 sub r0, r1 | 121 sub r0, r1 |
122 pxor mm0, mm0 | 122 pxor mm0, mm0 |
123 pxor mm1, mm1 | 123 pxor mm1, mm1 |
141 shr r2d, 5 | 141 shr r2d, 5 |
142 %ifidn %1, mmxext | 142 %ifidn %1, mmxext |
143 movd m0, r2d | 143 movd m0, r2d |
144 punpcklbw m0, m0 | 144 punpcklbw m0, m0 |
145 pshufw m0, m0, 0 | 145 pshufw m0, m0, 0 |
146 %elifidn %1, sse | |
147 imul r2d, 0x01010101 | |
148 movd m0, r2d | |
149 shufps m0, m0, 0 | |
150 %elifidn %1, sse2 | 146 %elifidn %1, sse2 |
151 movd m0, r2d | 147 movd m0, r2d |
152 punpcklbw m0, m0 | 148 punpcklbw m0, m0 |
153 pshuflw m0, m0, 0 | 149 pshuflw m0, m0, 0 |
154 punpcklqdq m0, m0 | 150 punpcklqdq m0, m0 |
159 %endif | 155 %endif |
160 | 156 |
161 %if mmsize==8 | 157 %if mmsize==8 |
162 mov r3d, 8 | 158 mov r3d, 8 |
163 .loop: | 159 .loop: |
164 %2 [r4+r1*0+0], m0 | 160 mova [r4+r1*0+0], m0 |
165 %2 [r4+r1*0+8], m0 | 161 mova [r4+r1*0+8], m0 |
166 %2 [r4+r1*1+0], m0 | 162 mova [r4+r1*1+0], m0 |
167 %2 [r4+r1*1+8], m0 | 163 mova [r4+r1*1+8], m0 |
168 %else | 164 %else |
169 mov r3d, 4 | 165 mov r3d, 4 |
170 .loop: | 166 .loop: |
171 %2 [r4+r1*0], m0 | 167 mova [r4+r1*0], m0 |
172 %2 [r4+r1*1], m0 | 168 mova [r4+r1*1], m0 |
173 lea r4, [r4+r1*2] | 169 lea r4, [r4+r1*2] |
174 %2 [r4+r1*0], m0 | 170 mova [r4+r1*0], m0 |
175 %2 [r4+r1*1], m0 | 171 mova [r4+r1*1], m0 |
176 %endif | 172 %endif |
177 lea r4, [r4+r1*2] | 173 lea r4, [r4+r1*2] |
178 dec r3d | 174 dec r3d |
179 jg .loop | 175 jg .loop |
180 REP_RET | 176 REP_RET |
181 %endmacro | 177 %endmacro |
182 | 178 |
183 INIT_MMX | 179 INIT_MMX |
184 PRED16x16_DC mmxext, movq | 180 PRED16x16_DC mmxext |
185 INIT_XMM | 181 INIT_XMM |
186 PRED16x16_DC sse, movaps | 182 PRED16x16_DC sse2 |
187 PRED16x16_DC sse2, movdqa | 183 PRED16x16_DC ssse3 |
188 PRED16x16_DC ssse3, movdqa | |
189 | 184 |
190 ;----------------------------------------------------------------------------- | 185 ;----------------------------------------------------------------------------- |
191 ; void pred16x16_tm_vp8(uint8_t *src, int stride) | 186 ; void pred16x16_tm_vp8(uint8_t *src, int stride) |
192 ;----------------------------------------------------------------------------- | 187 ;----------------------------------------------------------------------------- |
193 | 188 |