Mercurial > libavcodec.hg
comparison x86/dsputil_yasm.asm @ 10633:66242b8fbd32 libavcodec
port ape dsp functions from sse2 to mmx
now requires yasm
author | lorenm |
---|---|
date | Thu, 03 Dec 2009 18:53:12 +0000 |
parents | 276b3a342389 |
children | 5da7180afadf |
comparison
equal
deleted
inserted
replaced
10632:54982e4c4478 | 10633:66242b8fbd32 |
---|---|
97 FLOAT_TO_INT16_INTERLEAVE6 3dn2 | 97 FLOAT_TO_INT16_INTERLEAVE6 3dn2 |
98 %undef cvtps2pi | 98 %undef cvtps2pi |
99 | 99 |
100 | 100 |
101 | 101 |
102 %macro SCALARPRODUCT 1 | |
103 ; void add_int16(int16_t * v1, int16_t * v2, int order) | |
104 cglobal add_int16_%1, 3,3,2, v1, v2, order | |
105 shl orderq, 1 | |
106 add v1q, orderq | |
107 add v2q, orderq | |
108 neg orderq | |
109 .loop: | |
110 movu m0, [v2q + orderq] | |
111 movu m1, [v2q + orderq + mmsize] | |
112 paddw m0, [v1q + orderq] | |
113 paddw m1, [v1q + orderq + mmsize] | |
114 mova [v1q + orderq], m0 | |
115 mova [v1q + orderq + mmsize], m1 | |
116 add orderq, mmsize*2 | |
117 jl .loop | |
118 REP_RET | |
119 | |
120 ; void sub_int16(int16_t * v1, int16_t * v2, int order) | |
121 cglobal sub_int16_%1, 3,3,4, v1, v2, order | |
122 shl orderq, 1 | |
123 add v1q, orderq | |
124 add v2q, orderq | |
125 neg orderq | |
126 .loop: | |
127 movu m2, [v2q + orderq] | |
128 movu m3, [v2q + orderq + mmsize] | |
129 mova m0, [v1q + orderq] | |
130 mova m1, [v1q + orderq + mmsize] | |
131 psubw m0, m2 | |
132 psubw m1, m3 | |
133 mova [v1q + orderq], m0 | |
134 mova [v1q + orderq + mmsize], m1 | |
135 add orderq, mmsize*2 | |
136 jl .loop | |
137 REP_RET | |
138 | |
139 ; int scalarproduct_int16_sse2(int16_t * v1, int16_t * v2, int order, int shift) | |
140 cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift | |
141 shl orderq, 1 | |
142 add v1q, orderq | |
143 add v2q, orderq | |
144 neg orderq | |
145 movd m3, shiftm | |
146 pxor m2, m2 | |
147 .loop: | |
148 movu m0, [v1q + orderq] | |
149 movu m1, [v1q + orderq + mmsize] | |
150 pmaddwd m0, [v2q + orderq] | |
151 pmaddwd m1, [v2q + orderq + mmsize] | |
152 paddd m2, m0 | |
153 paddd m2, m1 | |
154 add orderq, mmsize*2 | |
155 jl .loop | |
156 %if mmsize == 16 | |
157 movhlps m0, m2 | |
158 paddd m2, m0 | |
159 psrad m2, m3 | |
160 pshuflw m0, m2, 0x4e | |
161 %else | |
162 psrad m2, m3 | |
163 pshufw m0, m2, 0x4e | |
164 %endif | |
165 paddd m2, m0 | |
166 movd eax, m2 | |
167 RET | |
168 %endmacro | |
169 | |
170 INIT_MMX | |
171 SCALARPRODUCT mmx2 | |
172 INIT_XMM | |
173 SCALARPRODUCT sse2 | |
174 | |
175 | |
176 | |
102 ; void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top) | 177 ; void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top) |
103 cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_top | 178 cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_top |
104 movq mm0, [topq] | 179 movq mm0, [topq] |
105 movq mm2, mm0 | 180 movq mm2, mm0 |
106 movd mm4, [left_topq] | 181 movd mm4, [left_topq] |