comparison x86/dsputil_yasm.asm @ 10633:66242b8fbd32 libavcodec

port ape dsp functions from sse2 to mmx now requires yasm
author lorenm
date Thu, 03 Dec 2009 18:53:12 +0000
parents 276b3a342389
children 5da7180afadf
comparison
equal deleted inserted replaced
10632:54982e4c4478 10633:66242b8fbd32
97 FLOAT_TO_INT16_INTERLEAVE6 3dn2 97 FLOAT_TO_INT16_INTERLEAVE6 3dn2
98 %undef cvtps2pi 98 %undef cvtps2pi
99 99
100 100
101 101
102 %macro SCALARPRODUCT 1
103 ; void add_int16(int16_t * v1, int16_t * v2, int order)
104 cglobal add_int16_%1, 3,3,2, v1, v2, order
105 shl orderq, 1
106 add v1q, orderq
107 add v2q, orderq
108 neg orderq
109 .loop:
110 movu m0, [v2q + orderq]
111 movu m1, [v2q + orderq + mmsize]
112 paddw m0, [v1q + orderq]
113 paddw m1, [v1q + orderq + mmsize]
114 mova [v1q + orderq], m0
115 mova [v1q + orderq + mmsize], m1
116 add orderq, mmsize*2
117 jl .loop
118 REP_RET
119
120 ; void sub_int16(int16_t * v1, int16_t * v2, int order)
121 cglobal sub_int16_%1, 3,3,4, v1, v2, order
122 shl orderq, 1
123 add v1q, orderq
124 add v2q, orderq
125 neg orderq
126 .loop:
127 movu m2, [v2q + orderq]
128 movu m3, [v2q + orderq + mmsize]
129 mova m0, [v1q + orderq]
130 mova m1, [v1q + orderq + mmsize]
131 psubw m0, m2
132 psubw m1, m3
133 mova [v1q + orderq], m0
134 mova [v1q + orderq + mmsize], m1
135 add orderq, mmsize*2
136 jl .loop
137 REP_RET
138
139 ; int scalarproduct_int16_sse2(int16_t * v1, int16_t * v2, int order, int shift)
140 cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift
141 shl orderq, 1
142 add v1q, orderq
143 add v2q, orderq
144 neg orderq
145 movd m3, shiftm
146 pxor m2, m2
147 .loop:
148 movu m0, [v1q + orderq]
149 movu m1, [v1q + orderq + mmsize]
150 pmaddwd m0, [v2q + orderq]
151 pmaddwd m1, [v2q + orderq + mmsize]
152 paddd m2, m0
153 paddd m2, m1
154 add orderq, mmsize*2
155 jl .loop
156 %if mmsize == 16
157 movhlps m0, m2
158 paddd m2, m0
159 psrad m2, m3
160 pshuflw m0, m2, 0x4e
161 %else
162 psrad m2, m3
163 pshufw m0, m2, 0x4e
164 %endif
165 paddd m2, m0
166 movd eax, m2
167 RET
168 %endmacro
169
170 INIT_MMX
171 SCALARPRODUCT mmx2
172 INIT_XMM
173 SCALARPRODUCT sse2
174
175
176
102 ; void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top) 177 ; void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
103 cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_top 178 cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_top
104 movq mm0, [topq] 179 movq mm0, [topq]
105 movq mm2, mm0 180 movq mm2, mm0
106 movd mm4, [left_topq] 181 movd mm4, [left_topq]