comparison x86/h264_intrapred.asm @ 12035:09705b027344 libavcodec

Fix h264/vp8 intra pred on Athlon XP Whose idea was it to have a CPU that didn't SIGILL on an invalid instruction?
author darkshikari
date Thu, 01 Jul 2010 10:29:47 +0000
parents 2002ea7c06f6
children
comparison
equal deleted inserted replaced
12034:f6ae68a7b1fd 12035:09705b027344
113 113
114 ;----------------------------------------------------------------------------- 114 ;-----------------------------------------------------------------------------
115 ; void pred16x16_dc(uint8_t *src, int stride) 115 ; void pred16x16_dc(uint8_t *src, int stride)
116 ;----------------------------------------------------------------------------- 116 ;-----------------------------------------------------------------------------
117 117
118 %macro PRED16x16_DC 2 118 %macro PRED16x16_DC 1
119 cglobal pred16x16_dc_%1, 2,7 119 cglobal pred16x16_dc_%1, 2,7
120 mov r4, r0 120 mov r4, r0
121 sub r0, r1 121 sub r0, r1
122 pxor mm0, mm0 122 pxor mm0, mm0
123 pxor mm1, mm1 123 pxor mm1, mm1
141 shr r2d, 5 141 shr r2d, 5
142 %ifidn %1, mmxext 142 %ifidn %1, mmxext
143 movd m0, r2d 143 movd m0, r2d
144 punpcklbw m0, m0 144 punpcklbw m0, m0
145 pshufw m0, m0, 0 145 pshufw m0, m0, 0
146 %elifidn %1, sse
147 imul r2d, 0x01010101
148 movd m0, r2d
149 shufps m0, m0, 0
150 %elifidn %1, sse2 146 %elifidn %1, sse2
151 movd m0, r2d 147 movd m0, r2d
152 punpcklbw m0, m0 148 punpcklbw m0, m0
153 pshuflw m0, m0, 0 149 pshuflw m0, m0, 0
154 punpcklqdq m0, m0 150 punpcklqdq m0, m0
159 %endif 155 %endif
160 156
161 %if mmsize==8 157 %if mmsize==8
162 mov r3d, 8 158 mov r3d, 8
163 .loop: 159 .loop:
164 %2 [r4+r1*0+0], m0 160 mova [r4+r1*0+0], m0
165 %2 [r4+r1*0+8], m0 161 mova [r4+r1*0+8], m0
166 %2 [r4+r1*1+0], m0 162 mova [r4+r1*1+0], m0
167 %2 [r4+r1*1+8], m0 163 mova [r4+r1*1+8], m0
168 %else 164 %else
169 mov r3d, 4 165 mov r3d, 4
170 .loop: 166 .loop:
171 %2 [r4+r1*0], m0 167 mova [r4+r1*0], m0
172 %2 [r4+r1*1], m0 168 mova [r4+r1*1], m0
173 lea r4, [r4+r1*2] 169 lea r4, [r4+r1*2]
174 %2 [r4+r1*0], m0 170 mova [r4+r1*0], m0
175 %2 [r4+r1*1], m0 171 mova [r4+r1*1], m0
176 %endif 172 %endif
177 lea r4, [r4+r1*2] 173 lea r4, [r4+r1*2]
178 dec r3d 174 dec r3d
179 jg .loop 175 jg .loop
180 REP_RET 176 REP_RET
181 %endmacro 177 %endmacro
182 178
183 INIT_MMX 179 INIT_MMX
184 PRED16x16_DC mmxext, movq 180 PRED16x16_DC mmxext
185 INIT_XMM 181 INIT_XMM
186 PRED16x16_DC sse, movaps 182 PRED16x16_DC sse2
187 PRED16x16_DC sse2, movdqa 183 PRED16x16_DC ssse3
188 PRED16x16_DC ssse3, movdqa
189 184
190 ;----------------------------------------------------------------------------- 185 ;-----------------------------------------------------------------------------
191 ; void pred16x16_tm_vp8(uint8_t *src, int stride) 186 ; void pred16x16_tm_vp8(uint8_t *src, int stride)
192 ;----------------------------------------------------------------------------- 187 ;-----------------------------------------------------------------------------
193 188