annotate i386/sad_mmx.s @ 0:986e461dc072 libavcodec

Initial revision
author glantau
date Sun, 22 Jul 2001 14:18:56 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
1 ; MMX/SSE optimized routines for SAD of 16*16 macroblocks
986e461dc072 Initial revision
glantau
parents:
diff changeset
2 ; Copyright (C) Juan J. Sierralta P. <juanjo@atmlab.utfsm.cl>
986e461dc072 Initial revision
glantau
parents:
diff changeset
3 ;
986e461dc072 Initial revision
glantau
parents:
diff changeset
4 ; dist1_* Original Copyright (C) 2000 Chris Atenasio <chris@crud.net>
986e461dc072 Initial revision
glantau
parents:
diff changeset
5 ; Enhancements and rest Copyright (C) 2000 Andrew Stevens <as@comlab.ox.ac.uk>
986e461dc072 Initial revision
glantau
parents:
diff changeset
6
986e461dc072 Initial revision
glantau
parents:
diff changeset
7 ;
986e461dc072 Initial revision
glantau
parents:
diff changeset
8 ; This program is free software; you can redistribute it and/or
986e461dc072 Initial revision
glantau
parents:
diff changeset
9 ; modify it under the terms of the GNU General Public License
986e461dc072 Initial revision
glantau
parents:
diff changeset
10 ; as published by the Free Software Foundation; either version 2
986e461dc072 Initial revision
glantau
parents:
diff changeset
11 ; of the License, or (at your option) any later version.
986e461dc072 Initial revision
glantau
parents:
diff changeset
12 ;
986e461dc072 Initial revision
glantau
parents:
diff changeset
13 ; This program is distributed in the hope that it will be useful,
986e461dc072 Initial revision
glantau
parents:
diff changeset
14 ; but WITHOUT ANY WARRANTY; without even the implied warranty of
986e461dc072 Initial revision
glantau
parents:
diff changeset
15 ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
986e461dc072 Initial revision
glantau
parents:
diff changeset
16 ; GNU General Public License for more details.
986e461dc072 Initial revision
glantau
parents:
diff changeset
17 ;
986e461dc072 Initial revision
glantau
parents:
diff changeset
18 ; You should have received a copy of the GNU General Public License
986e461dc072 Initial revision
glantau
parents:
diff changeset
19 ; along with this program; if not, write to the Free Software
986e461dc072 Initial revision
glantau
parents:
diff changeset
20 ; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
986e461dc072 Initial revision
glantau
parents:
diff changeset
21 ;
986e461dc072 Initial revision
glantau
parents:
diff changeset
22
986e461dc072 Initial revision
glantau
parents:
diff changeset
23 global pix_abs16x16_mmx
986e461dc072 Initial revision
glantau
parents:
diff changeset
24
986e461dc072 Initial revision
glantau
parents:
diff changeset
25 ; int pix_abs16x16_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
986e461dc072 Initial revision
glantau
parents:
diff changeset
26 ; esi = p1 (init: blk1)
986e461dc072 Initial revision
glantau
parents:
diff changeset
27 ; edi = p2 (init: blk2)
986e461dc072 Initial revision
glantau
parents:
diff changeset
28 ; ecx = rowsleft (init: h)
986e461dc072 Initial revision
glantau
parents:
diff changeset
29 ; edx = lx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
30
986e461dc072 Initial revision
glantau
parents:
diff changeset
31 ; mm0 = distance accumulators (4 words)
986e461dc072 Initial revision
glantau
parents:
diff changeset
32 ; mm1 = distance accumulators (4 words)
986e461dc072 Initial revision
glantau
parents:
diff changeset
33 ; mm2 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
34 ; mm3 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
35 ; mm4 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
36 ; mm5 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
37 ; mm6 = 0
986e461dc072 Initial revision
glantau
parents:
diff changeset
38 ; mm7 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
39
986e461dc072 Initial revision
glantau
parents:
diff changeset
40
986e461dc072 Initial revision
glantau
parents:
diff changeset
41 align 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
42 pix_abs16x16_mmx:
986e461dc072 Initial revision
glantau
parents:
diff changeset
43 push ebp ; save frame pointer
986e461dc072 Initial revision
glantau
parents:
diff changeset
44 mov ebp, esp
986e461dc072 Initial revision
glantau
parents:
diff changeset
45
986e461dc072 Initial revision
glantau
parents:
diff changeset
46 push ebx ; Saves registers (called saves convention in
986e461dc072 Initial revision
glantau
parents:
diff changeset
47 push ecx ; x86 GCC it seems)
986e461dc072 Initial revision
glantau
parents:
diff changeset
48 push edx ;
986e461dc072 Initial revision
glantau
parents:
diff changeset
49 push esi
986e461dc072 Initial revision
glantau
parents:
diff changeset
50 push edi
986e461dc072 Initial revision
glantau
parents:
diff changeset
51
986e461dc072 Initial revision
glantau
parents:
diff changeset
52 pxor mm0, mm0 ; zero acculumators
986e461dc072 Initial revision
glantau
parents:
diff changeset
53 pxor mm1, mm1
986e461dc072 Initial revision
glantau
parents:
diff changeset
54 pxor mm6, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
55 mov esi, [ebp+8] ; get pix1
986e461dc072 Initial revision
glantau
parents:
diff changeset
56 mov edi, [ebp+12] ; get pix2
986e461dc072 Initial revision
glantau
parents:
diff changeset
57 mov edx, [ebp+16] ; get lx
986e461dc072 Initial revision
glantau
parents:
diff changeset
58 mov ecx, [ebp+20] ; get rowsleft
986e461dc072 Initial revision
glantau
parents:
diff changeset
59 jmp .nextrow
986e461dc072 Initial revision
glantau
parents:
diff changeset
60 align 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
61
986e461dc072 Initial revision
glantau
parents:
diff changeset
62 .nextrow:
986e461dc072 Initial revision
glantau
parents:
diff changeset
63 ; First 8 bytes of the row
986e461dc072 Initial revision
glantau
parents:
diff changeset
64
986e461dc072 Initial revision
glantau
parents:
diff changeset
65 movq mm4, [edi] ; load first 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
66 movq mm5, [esi] ; load first 8 bytes of pix1 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
67 movq mm3, mm4 ; mm4 := abs(mm4-mm5)
986e461dc072 Initial revision
glantau
parents:
diff changeset
68 movq mm2,[esi+8] ; load last 8 bytes of pix1 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
69 psubusb mm4, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
70 movq mm7,[edi+8] ; load last 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
71 psubusb mm5, mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
72 por mm4, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
73
986e461dc072 Initial revision
glantau
parents:
diff changeset
74 ; Last 8 bytes of the row
986e461dc072 Initial revision
glantau
parents:
diff changeset
75
986e461dc072 Initial revision
glantau
parents:
diff changeset
76 movq mm3, mm7 ; mm7 := abs(mm7-mm2)
986e461dc072 Initial revision
glantau
parents:
diff changeset
77 psubusb mm7, mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
78 psubusb mm2, mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
79 por mm7, mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
80
986e461dc072 Initial revision
glantau
parents:
diff changeset
81 ; Now mm4 and mm7 have 16 absdiffs to add
986e461dc072 Initial revision
glantau
parents:
diff changeset
82
986e461dc072 Initial revision
glantau
parents:
diff changeset
83 ; First 8 bytes of the row2
986e461dc072 Initial revision
glantau
parents:
diff changeset
84
986e461dc072 Initial revision
glantau
parents:
diff changeset
85
986e461dc072 Initial revision
glantau
parents:
diff changeset
86 add edi, edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
87 movq mm2, [edi] ; load first 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
88 add esi, edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
89 movq mm5, [esi] ; load first 8 bytes of pix1 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
90
986e461dc072 Initial revision
glantau
parents:
diff changeset
91
986e461dc072 Initial revision
glantau
parents:
diff changeset
92
986e461dc072 Initial revision
glantau
parents:
diff changeset
93 movq mm3, mm2 ; mm2 := abs(mm2-mm5)
986e461dc072 Initial revision
glantau
parents:
diff changeset
94 psubusb mm2, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
95 movq mm6,[esi+8] ; load last 8 bytes of pix1 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
96 psubusb mm5, mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
97 por mm2, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
98
986e461dc072 Initial revision
glantau
parents:
diff changeset
99 ; Last 8 bytes of the row2
986e461dc072 Initial revision
glantau
parents:
diff changeset
100
986e461dc072 Initial revision
glantau
parents:
diff changeset
101 movq mm5,[edi+8] ; load last 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
102
986e461dc072 Initial revision
glantau
parents:
diff changeset
103
986e461dc072 Initial revision
glantau
parents:
diff changeset
104 movq mm3, mm5 ; mm5 := abs(mm5-mm6)
986e461dc072 Initial revision
glantau
parents:
diff changeset
105 psubusb mm5, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
106 psubusb mm6, mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
107 por mm5, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
108
986e461dc072 Initial revision
glantau
parents:
diff changeset
109 ; Now mm2, mm4, mm5, mm7 have 32 absdiffs
986e461dc072 Initial revision
glantau
parents:
diff changeset
110
986e461dc072 Initial revision
glantau
parents:
diff changeset
111 movq mm3, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
112
986e461dc072 Initial revision
glantau
parents:
diff changeset
113 pxor mm6, mm6 ; Zero mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
114
986e461dc072 Initial revision
glantau
parents:
diff changeset
115 punpcklbw mm3, mm6 ; Unpack to words and add
986e461dc072 Initial revision
glantau
parents:
diff changeset
116 punpckhbw mm7, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
117 paddusw mm7, mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
118
986e461dc072 Initial revision
glantau
parents:
diff changeset
119 movq mm3, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
120
986e461dc072 Initial revision
glantau
parents:
diff changeset
121 punpcklbw mm3, mm6 ; Unpack to words and add
986e461dc072 Initial revision
glantau
parents:
diff changeset
122 punpckhbw mm5, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
123 paddusw mm5, mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
124
986e461dc072 Initial revision
glantau
parents:
diff changeset
125 paddusw mm0, mm7 ; Add to the acumulator (mm0)
986e461dc072 Initial revision
glantau
parents:
diff changeset
126 paddusw mm1, mm5 ; Add to the acumulator (mm1)
986e461dc072 Initial revision
glantau
parents:
diff changeset
127
986e461dc072 Initial revision
glantau
parents:
diff changeset
128 movq mm3, mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
129
986e461dc072 Initial revision
glantau
parents:
diff changeset
130 punpcklbw mm3, mm6 ; Unpack to words and add
986e461dc072 Initial revision
glantau
parents:
diff changeset
131 punpckhbw mm4, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
132 movq mm5, mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
133 paddusw mm4, mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
134
986e461dc072 Initial revision
glantau
parents:
diff changeset
135
986e461dc072 Initial revision
glantau
parents:
diff changeset
136
986e461dc072 Initial revision
glantau
parents:
diff changeset
137 punpcklbw mm5, mm6 ; Unpack to words and add
986e461dc072 Initial revision
glantau
parents:
diff changeset
138 punpckhbw mm2, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
139 paddusw mm2, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
140
986e461dc072 Initial revision
glantau
parents:
diff changeset
141 ; Loop termination
986e461dc072 Initial revision
glantau
parents:
diff changeset
142
986e461dc072 Initial revision
glantau
parents:
diff changeset
143 add esi, edx ; update pointers to next row
986e461dc072 Initial revision
glantau
parents:
diff changeset
144 paddusw mm0, mm4 ; Add to the acumulator (mm0)
986e461dc072 Initial revision
glantau
parents:
diff changeset
145 add edi, edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
146 sub ecx,2
986e461dc072 Initial revision
glantau
parents:
diff changeset
147 paddusw mm1, mm2 ; Add to the acumulator (mm1)
986e461dc072 Initial revision
glantau
parents:
diff changeset
148 test ecx, ecx ; check rowsleft
986e461dc072 Initial revision
glantau
parents:
diff changeset
149 jnz near .nextrow
986e461dc072 Initial revision
glantau
parents:
diff changeset
150
986e461dc072 Initial revision
glantau
parents:
diff changeset
151 paddusw mm0, mm1
986e461dc072 Initial revision
glantau
parents:
diff changeset
152 movq mm2, mm0 ; Copy mm0 to mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
153 psrlq mm2, 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
154 paddusw mm0, mm2 ; Add
986e461dc072 Initial revision
glantau
parents:
diff changeset
155 movq mm3, mm0
986e461dc072 Initial revision
glantau
parents:
diff changeset
156 psrlq mm3, 16
986e461dc072 Initial revision
glantau
parents:
diff changeset
157 paddusw mm0, mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
158 movd eax, mm0 ; Store return value
986e461dc072 Initial revision
glantau
parents:
diff changeset
159 and eax, 0xffff
986e461dc072 Initial revision
glantau
parents:
diff changeset
160
986e461dc072 Initial revision
glantau
parents:
diff changeset
161 pop edi
986e461dc072 Initial revision
glantau
parents:
diff changeset
162 pop esi
986e461dc072 Initial revision
glantau
parents:
diff changeset
163 pop edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
164 pop ecx
986e461dc072 Initial revision
glantau
parents:
diff changeset
165 pop ebx
986e461dc072 Initial revision
glantau
parents:
diff changeset
166
986e461dc072 Initial revision
glantau
parents:
diff changeset
167 pop ebp ; restore stack pointer
986e461dc072 Initial revision
glantau
parents:
diff changeset
168
986e461dc072 Initial revision
glantau
parents:
diff changeset
169 ;emms ; clear mmx registers
986e461dc072 Initial revision
glantau
parents:
diff changeset
170 ret ; return
986e461dc072 Initial revision
glantau
parents:
diff changeset
171
986e461dc072 Initial revision
glantau
parents:
diff changeset
172 global pix_abs16x16_sse
986e461dc072 Initial revision
glantau
parents:
diff changeset
173
986e461dc072 Initial revision
glantau
parents:
diff changeset
174 ; int pix_abs16x16_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
986e461dc072 Initial revision
glantau
parents:
diff changeset
175 ; esi = p1 (init: blk1)
986e461dc072 Initial revision
glantau
parents:
diff changeset
176 ; edi = p2 (init: blk2)
986e461dc072 Initial revision
glantau
parents:
diff changeset
177 ; ecx = rowsleft (init: h)
986e461dc072 Initial revision
glantau
parents:
diff changeset
178 ; edx = lx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
179
986e461dc072 Initial revision
glantau
parents:
diff changeset
180 ; mm0 = distance accumulators (4 words)
986e461dc072 Initial revision
glantau
parents:
diff changeset
181 ; mm1 = distance accumulators (4 words)
986e461dc072 Initial revision
glantau
parents:
diff changeset
182 ; mm2 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
183 ; mm3 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
184 ; mm4 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
185 ; mm5 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
186 ; mm6 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
187 ; mm7 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
188
986e461dc072 Initial revision
glantau
parents:
diff changeset
189
986e461dc072 Initial revision
glantau
parents:
diff changeset
190 align 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
191 pix_abs16x16_sse:
986e461dc072 Initial revision
glantau
parents:
diff changeset
192 push ebp ; save frame pointer
986e461dc072 Initial revision
glantau
parents:
diff changeset
193 mov ebp, esp
986e461dc072 Initial revision
glantau
parents:
diff changeset
194
986e461dc072 Initial revision
glantau
parents:
diff changeset
195 push ebx ; Saves registers (called saves convention in
986e461dc072 Initial revision
glantau
parents:
diff changeset
196 push ecx ; x86 GCC it seems)
986e461dc072 Initial revision
glantau
parents:
diff changeset
197 push edx ;
986e461dc072 Initial revision
glantau
parents:
diff changeset
198 push esi
986e461dc072 Initial revision
glantau
parents:
diff changeset
199 push edi
986e461dc072 Initial revision
glantau
parents:
diff changeset
200
986e461dc072 Initial revision
glantau
parents:
diff changeset
201 pxor mm0, mm0 ; zero acculumators
986e461dc072 Initial revision
glantau
parents:
diff changeset
202 pxor mm1, mm1
986e461dc072 Initial revision
glantau
parents:
diff changeset
203 mov esi, [ebp+8] ; get pix1
986e461dc072 Initial revision
glantau
parents:
diff changeset
204 mov edi, [ebp+12] ; get pix2
986e461dc072 Initial revision
glantau
parents:
diff changeset
205 mov edx, [ebp+16] ; get lx
986e461dc072 Initial revision
glantau
parents:
diff changeset
206 mov ecx, [ebp+20] ; get rowsleft
986e461dc072 Initial revision
glantau
parents:
diff changeset
207 jmp .next4row
986e461dc072 Initial revision
glantau
parents:
diff changeset
208 align 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
209
986e461dc072 Initial revision
glantau
parents:
diff changeset
210 .next4row:
986e461dc072 Initial revision
glantau
parents:
diff changeset
211 ; First row
986e461dc072 Initial revision
glantau
parents:
diff changeset
212
986e461dc072 Initial revision
glantau
parents:
diff changeset
213 movq mm4, [edi] ; load first 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
214 movq mm5, [edi+8] ; load last 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
215 psadbw mm4, [esi] ; SAD of first 8 bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
216 psadbw mm5, [esi+8] ; SAD of last 8 bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
217 paddw mm0, mm4 ; Add to acumulators
986e461dc072 Initial revision
glantau
parents:
diff changeset
218 paddw mm1, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
219
986e461dc072 Initial revision
glantau
parents:
diff changeset
220 ; Second row
986e461dc072 Initial revision
glantau
parents:
diff changeset
221
986e461dc072 Initial revision
glantau
parents:
diff changeset
222 add edi, edx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
223 add esi, edx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
224
986e461dc072 Initial revision
glantau
parents:
diff changeset
225 movq mm6, [edi] ; load first 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
226 movq mm7, [edi+8] ; load last 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
227 psadbw mm6, [esi] ; SAD of first 8 bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
228 psadbw mm7, [esi+8] ; SAD of last 8 bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
229 paddw mm0, mm6 ; Add to acumulators
986e461dc072 Initial revision
glantau
parents:
diff changeset
230 paddw mm1, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
231
986e461dc072 Initial revision
glantau
parents:
diff changeset
232 ; Third row
986e461dc072 Initial revision
glantau
parents:
diff changeset
233
986e461dc072 Initial revision
glantau
parents:
diff changeset
234 add edi, edx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
235 add esi, edx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
236
986e461dc072 Initial revision
glantau
parents:
diff changeset
237 movq mm4, [edi] ; load first 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
238 movq mm5, [edi+8] ; load last 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
239 psadbw mm4, [esi] ; SAD of first 8 bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
240 psadbw mm5, [esi+8] ; SAD of last 8 bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
241 paddw mm0, mm4 ; Add to acumulators
986e461dc072 Initial revision
glantau
parents:
diff changeset
242 paddw mm1, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
243
986e461dc072 Initial revision
glantau
parents:
diff changeset
244 ; Fourth row
986e461dc072 Initial revision
glantau
parents:
diff changeset
245
986e461dc072 Initial revision
glantau
parents:
diff changeset
246 add edi, edx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
247 add esi, edx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
248
986e461dc072 Initial revision
glantau
parents:
diff changeset
249 movq mm6, [edi] ; load first 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
250 movq mm7, [edi+8] ; load last 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
251 psadbw mm6, [esi] ; SAD of first 8 bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
252 psadbw mm7, [esi+8] ; SAD of last 8 bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
253 paddw mm0, mm6 ; Add to acumulators
986e461dc072 Initial revision
glantau
parents:
diff changeset
254 paddw mm1, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
255
986e461dc072 Initial revision
glantau
parents:
diff changeset
256 ; Loop termination
986e461dc072 Initial revision
glantau
parents:
diff changeset
257
986e461dc072 Initial revision
glantau
parents:
diff changeset
258 add esi, edx ; update pointers to next row
986e461dc072 Initial revision
glantau
parents:
diff changeset
259 add edi, edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
260 sub ecx,4
986e461dc072 Initial revision
glantau
parents:
diff changeset
261 test ecx, ecx ; check rowsleft
986e461dc072 Initial revision
glantau
parents:
diff changeset
262 jnz near .next4row
986e461dc072 Initial revision
glantau
parents:
diff changeset
263
986e461dc072 Initial revision
glantau
parents:
diff changeset
264 paddd mm0, mm1 ; Sum acumulators
986e461dc072 Initial revision
glantau
parents:
diff changeset
265 movd eax, mm0 ; Store return value
986e461dc072 Initial revision
glantau
parents:
diff changeset
266
986e461dc072 Initial revision
glantau
parents:
diff changeset
267 pop edi
986e461dc072 Initial revision
glantau
parents:
diff changeset
268 pop esi
986e461dc072 Initial revision
glantau
parents:
diff changeset
269 pop edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
270 pop ecx
986e461dc072 Initial revision
glantau
parents:
diff changeset
271 pop ebx
986e461dc072 Initial revision
glantau
parents:
diff changeset
272
986e461dc072 Initial revision
glantau
parents:
diff changeset
273 pop ebp ; restore stack pointer
986e461dc072 Initial revision
glantau
parents:
diff changeset
274
986e461dc072 Initial revision
glantau
parents:
diff changeset
275 ;emms ; clear mmx registers
986e461dc072 Initial revision
glantau
parents:
diff changeset
276 ret ; return
986e461dc072 Initial revision
glantau
parents:
diff changeset
277
986e461dc072 Initial revision
glantau
parents:
diff changeset
278 global pix_abs16x16_x2_mmx
986e461dc072 Initial revision
glantau
parents:
diff changeset
279
986e461dc072 Initial revision
glantau
parents:
diff changeset
280 ; int pix_abs16x16_x2_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
986e461dc072 Initial revision
glantau
parents:
diff changeset
281 ; esi = p1 (init: blk1)
986e461dc072 Initial revision
glantau
parents:
diff changeset
282 ; edi = p2 (init: blk2)
986e461dc072 Initial revision
glantau
parents:
diff changeset
283 ; ecx = rowsleft (init: h)
986e461dc072 Initial revision
glantau
parents:
diff changeset
284 ; edx = lx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
285
986e461dc072 Initial revision
glantau
parents:
diff changeset
286 ; mm0 = distance accumulators (4 words)
986e461dc072 Initial revision
glantau
parents:
diff changeset
287 ; mm1 = distance accumulators (4 words)
986e461dc072 Initial revision
glantau
parents:
diff changeset
288 ; mm2 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
289 ; mm3 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
290 ; mm4 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
291 ; mm5 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
292 ; mm6 = 0
986e461dc072 Initial revision
glantau
parents:
diff changeset
293 ; mm7 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
294
986e461dc072 Initial revision
glantau
parents:
diff changeset
295
986e461dc072 Initial revision
glantau
parents:
diff changeset
296 align 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
297 pix_abs16x16_x2_mmx:
986e461dc072 Initial revision
glantau
parents:
diff changeset
298 push ebp ; save frame pointer
986e461dc072 Initial revision
glantau
parents:
diff changeset
299 mov ebp, esp
986e461dc072 Initial revision
glantau
parents:
diff changeset
300
986e461dc072 Initial revision
glantau
parents:
diff changeset
301 push ebx ; Saves registers (called saves convention in
986e461dc072 Initial revision
glantau
parents:
diff changeset
302 push ecx ; x86 GCC it seems)
986e461dc072 Initial revision
glantau
parents:
diff changeset
303 push edx ;
986e461dc072 Initial revision
glantau
parents:
diff changeset
304 push esi
986e461dc072 Initial revision
glantau
parents:
diff changeset
305 push edi
986e461dc072 Initial revision
glantau
parents:
diff changeset
306
986e461dc072 Initial revision
glantau
parents:
diff changeset
307 pxor mm0, mm0 ; zero acculumators
986e461dc072 Initial revision
glantau
parents:
diff changeset
308 pxor mm1, mm1
986e461dc072 Initial revision
glantau
parents:
diff changeset
309 pxor mm6, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
310 mov esi, [ebp+8] ; get pix1
986e461dc072 Initial revision
glantau
parents:
diff changeset
311 mov edi, [ebp+12] ; get pix2
986e461dc072 Initial revision
glantau
parents:
diff changeset
312 mov edx, [ebp+16] ; get lx
986e461dc072 Initial revision
glantau
parents:
diff changeset
313 mov ecx, [ebp+20] ; get rowsleft
986e461dc072 Initial revision
glantau
parents:
diff changeset
314 jmp .nextrow_x2
986e461dc072 Initial revision
glantau
parents:
diff changeset
315 align 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
316
986e461dc072 Initial revision
glantau
parents:
diff changeset
317 .nextrow_x2:
986e461dc072 Initial revision
glantau
parents:
diff changeset
318 ; First 8 bytes of the row
986e461dc072 Initial revision
glantau
parents:
diff changeset
319
986e461dc072 Initial revision
glantau
parents:
diff changeset
320 movq mm4, [edi] ; load first 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
321 movq mm5, [edi+1] ; load bytes 1-8 of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
322
986e461dc072 Initial revision
glantau
parents:
diff changeset
323 movq mm2, mm4 ; copy mm4 on mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
324 movq mm3, mm5 ; copy mm5 on mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
325 punpcklbw mm4, mm6 ; first 4 bytes of [edi] on mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
326 punpcklbw mm5, mm6 ; first 4 bytes of [edi+1] on mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
327 paddusw mm4, mm5 ; mm4 := first 4 bytes interpolated in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
328 psrlw mm4, 1
986e461dc072 Initial revision
glantau
parents:
diff changeset
329
986e461dc072 Initial revision
glantau
parents:
diff changeset
330 punpckhbw mm2, mm6 ; last 4 bytes of [edi] on mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
331 punpckhbw mm3, mm6 ; last 4 bytes of [edi+1] on mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
332 paddusw mm2, mm3 ; mm2 := last 4 bytes interpolated in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
333 psrlw mm2, 1
986e461dc072 Initial revision
glantau
parents:
diff changeset
334
986e461dc072 Initial revision
glantau
parents:
diff changeset
335 packuswb mm4, mm2 ; pack 8 bytes interpolated on mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
336 movq mm5,[esi] ; load first 8 bytes of pix1 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
337
986e461dc072 Initial revision
glantau
parents:
diff changeset
338 movq mm3, mm4 ; mm4 := abs(mm4-mm5)
986e461dc072 Initial revision
glantau
parents:
diff changeset
339 psubusb mm4, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
340 psubusb mm5, mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
341 por mm4, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
342
986e461dc072 Initial revision
glantau
parents:
diff changeset
343 ; Last 8 bytes of the row
986e461dc072 Initial revision
glantau
parents:
diff changeset
344
986e461dc072 Initial revision
glantau
parents:
diff changeset
345 movq mm7, [edi+8] ; load last 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
346 movq mm5, [edi+9] ; load bytes 10-17 of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
347
986e461dc072 Initial revision
glantau
parents:
diff changeset
348 movq mm2, mm7 ; copy mm7 on mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
349 movq mm3, mm5 ; copy mm5 on mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
350 punpcklbw mm7, mm6 ; first 4 bytes of [edi+8] on mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
351 punpcklbw mm5, mm6 ; first 4 bytes of [edi+9] on mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
352 paddusw mm7, mm5 ; mm1 := first 4 bytes interpolated in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
353 psrlw mm7, 1
986e461dc072 Initial revision
glantau
parents:
diff changeset
354
986e461dc072 Initial revision
glantau
parents:
diff changeset
355 punpckhbw mm2, mm6 ; last 4 bytes of [edi] on mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
356 punpckhbw mm3, mm6 ; last 4 bytes of [edi+1] on mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
357 paddusw mm2, mm3 ; mm2 := last 4 bytes interpolated in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
358 psrlw mm2, 1
986e461dc072 Initial revision
glantau
parents:
diff changeset
359
986e461dc072 Initial revision
glantau
parents:
diff changeset
360 packuswb mm7, mm2 ; pack 8 bytes interpolated on mm1
986e461dc072 Initial revision
glantau
parents:
diff changeset
361 movq mm5,[esi+8] ; load last 8 bytes of pix1 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
362
986e461dc072 Initial revision
glantau
parents:
diff changeset
363 movq mm3, mm7 ; mm7 := abs(mm1-mm5)
986e461dc072 Initial revision
glantau
parents:
diff changeset
364 psubusb mm7, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
365 psubusb mm5, mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
366 por mm7, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
367
986e461dc072 Initial revision
glantau
parents:
diff changeset
368 ; Now mm4 and mm7 have 16 absdiffs to add
986e461dc072 Initial revision
glantau
parents:
diff changeset
369
986e461dc072 Initial revision
glantau
parents:
diff changeset
370 movq mm3, mm4 ; Make copies of these bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
371 movq mm2, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
372
986e461dc072 Initial revision
glantau
parents:
diff changeset
373 punpcklbw mm4, mm6 ; Unpack to words and add
986e461dc072 Initial revision
glantau
parents:
diff changeset
374 punpcklbw mm7, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
375 paddusw mm4, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
376 paddusw mm0, mm4 ; Add to the acumulator (mm0)
986e461dc072 Initial revision
glantau
parents:
diff changeset
377
986e461dc072 Initial revision
glantau
parents:
diff changeset
378 punpckhbw mm3, mm6 ; Unpack to words and add
986e461dc072 Initial revision
glantau
parents:
diff changeset
379 punpckhbw mm2, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
380 paddusw mm3, mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
381 paddusw mm1, mm3 ; Add to the acumulator (mm1)
986e461dc072 Initial revision
glantau
parents:
diff changeset
382
986e461dc072 Initial revision
glantau
parents:
diff changeset
383 ; Loop termination
986e461dc072 Initial revision
glantau
parents:
diff changeset
384
986e461dc072 Initial revision
glantau
parents:
diff changeset
385 add esi, edx ; update pointers to next row
986e461dc072 Initial revision
glantau
parents:
diff changeset
386 add edi, edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
387
986e461dc072 Initial revision
glantau
parents:
diff changeset
388 sub ecx,1
986e461dc072 Initial revision
glantau
parents:
diff changeset
389 test ecx, ecx ; check rowsleft
986e461dc072 Initial revision
glantau
parents:
diff changeset
390 jnz near .nextrow_x2
986e461dc072 Initial revision
glantau
parents:
diff changeset
391
986e461dc072 Initial revision
glantau
parents:
diff changeset
392 paddusw mm0, mm1
986e461dc072 Initial revision
glantau
parents:
diff changeset
393
986e461dc072 Initial revision
glantau
parents:
diff changeset
394 movq mm1, mm0 ; Copy mm0 to mm1
986e461dc072 Initial revision
glantau
parents:
diff changeset
395 psrlq mm1, 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
396 paddusw mm0, mm1 ; Add
986e461dc072 Initial revision
glantau
parents:
diff changeset
397 movq mm2, mm0
986e461dc072 Initial revision
glantau
parents:
diff changeset
398 psrlq mm2, 16
986e461dc072 Initial revision
glantau
parents:
diff changeset
399 paddusw mm0, mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
400 movd eax, mm0 ; Store return value
986e461dc072 Initial revision
glantau
parents:
diff changeset
401 and eax, 0xffff
986e461dc072 Initial revision
glantau
parents:
diff changeset
402
986e461dc072 Initial revision
glantau
parents:
diff changeset
403 pop edi
986e461dc072 Initial revision
glantau
parents:
diff changeset
404 pop esi
986e461dc072 Initial revision
glantau
parents:
diff changeset
405 pop edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
406 pop ecx
986e461dc072 Initial revision
glantau
parents:
diff changeset
407 pop ebx
986e461dc072 Initial revision
glantau
parents:
diff changeset
408
986e461dc072 Initial revision
glantau
parents:
diff changeset
409 pop ebp ; restore stack pointer
986e461dc072 Initial revision
glantau
parents:
diff changeset
410
986e461dc072 Initial revision
glantau
parents:
diff changeset
411 emms ; clear mmx registers
986e461dc072 Initial revision
glantau
parents:
diff changeset
412 ret ; return
986e461dc072 Initial revision
glantau
parents:
diff changeset
413
986e461dc072 Initial revision
glantau
parents:
diff changeset
414 global pix_abs16x16_y2_mmx
986e461dc072 Initial revision
glantau
parents:
diff changeset
415
986e461dc072 Initial revision
glantau
parents:
diff changeset
416 ; int pix_abs16x16_y2_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
986e461dc072 Initial revision
glantau
parents:
diff changeset
417 ; esi = p1 (init: blk1)
986e461dc072 Initial revision
glantau
parents:
diff changeset
418 ; edi = p2 (init: blk2)
986e461dc072 Initial revision
glantau
parents:
diff changeset
419 ; ebx = p2 + lx
986e461dc072 Initial revision
glantau
parents:
diff changeset
420 ; ecx = rowsleft (init: h)
986e461dc072 Initial revision
glantau
parents:
diff changeset
421 ; edx = lx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
422
986e461dc072 Initial revision
glantau
parents:
diff changeset
423 ; mm0 = distance accumulators (4 words)
986e461dc072 Initial revision
glantau
parents:
diff changeset
424 ; mm1 = distance accumulators (4 words)
986e461dc072 Initial revision
glantau
parents:
diff changeset
425 ; mm2 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
426 ; mm3 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
427 ; mm4 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
428 ; mm5 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
429 ; mm6 = 0
986e461dc072 Initial revision
glantau
parents:
diff changeset
430 ; mm7 = temp
986e461dc072 Initial revision
glantau
parents:
diff changeset
431
986e461dc072 Initial revision
glantau
parents:
diff changeset
432
986e461dc072 Initial revision
glantau
parents:
diff changeset
433 align 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
434 pix_abs16x16_y2_mmx:
986e461dc072 Initial revision
glantau
parents:
diff changeset
435 push ebp ; save frame pointer
986e461dc072 Initial revision
glantau
parents:
diff changeset
436 mov ebp, esp
986e461dc072 Initial revision
glantau
parents:
diff changeset
437
986e461dc072 Initial revision
glantau
parents:
diff changeset
438 push ebx ; Saves registers (called saves convention in
986e461dc072 Initial revision
glantau
parents:
diff changeset
439 push ecx ; x86 GCC it seems)
986e461dc072 Initial revision
glantau
parents:
diff changeset
440 push edx ;
986e461dc072 Initial revision
glantau
parents:
diff changeset
441 push esi
986e461dc072 Initial revision
glantau
parents:
diff changeset
442 push edi
986e461dc072 Initial revision
glantau
parents:
diff changeset
443
986e461dc072 Initial revision
glantau
parents:
diff changeset
444 pxor mm0, mm0 ; zero acculumators
986e461dc072 Initial revision
glantau
parents:
diff changeset
445 pxor mm1, mm1
986e461dc072 Initial revision
glantau
parents:
diff changeset
446 pxor mm6, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
447 mov esi, [ebp+8] ; get pix1
986e461dc072 Initial revision
glantau
parents:
diff changeset
448 mov edi, [ebp+12] ; get pix2
986e461dc072 Initial revision
glantau
parents:
diff changeset
449 mov edx, [ebp+16] ; get lx
986e461dc072 Initial revision
glantau
parents:
diff changeset
450 mov ecx, [ebp+20] ; get rowsleft
986e461dc072 Initial revision
glantau
parents:
diff changeset
451 mov ebx, edi
986e461dc072 Initial revision
glantau
parents:
diff changeset
452 add ebx, edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
453 jmp .nextrow_y2
986e461dc072 Initial revision
glantau
parents:
diff changeset
454 align 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
455
986e461dc072 Initial revision
glantau
parents:
diff changeset
456 .nextrow_y2:
986e461dc072 Initial revision
glantau
parents:
diff changeset
457 ; First 8 bytes of the row
986e461dc072 Initial revision
glantau
parents:
diff changeset
458
986e461dc072 Initial revision
glantau
parents:
diff changeset
459 movq mm4, [edi] ; load first 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
460 movq mm5, [ebx] ; load bytes 1-8 of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
461
986e461dc072 Initial revision
glantau
parents:
diff changeset
462 movq mm2, mm4 ; copy mm4 on mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
463 movq mm3, mm5 ; copy mm5 on mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
464 punpcklbw mm4, mm6 ; first 4 bytes of [edi] on mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
465 punpcklbw mm5, mm6 ; first 4 bytes of [ebx] on mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
466 paddusw mm4, mm5 ; mm4 := first 4 bytes interpolated in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
467 psrlw mm4, 1
986e461dc072 Initial revision
glantau
parents:
diff changeset
468
986e461dc072 Initial revision
glantau
parents:
diff changeset
469 punpckhbw mm2, mm6 ; last 4 bytes of [edi] on mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
470 punpckhbw mm3, mm6 ; last 4 bytes of [edi+1] on mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
471 paddusw mm2, mm3 ; mm2 := last 4 bytes interpolated in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
472 psrlw mm2, 1
986e461dc072 Initial revision
glantau
parents:
diff changeset
473
986e461dc072 Initial revision
glantau
parents:
diff changeset
474 packuswb mm4, mm2 ; pack 8 bytes interpolated on mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
475 movq mm5,[esi] ; load first 8 bytes of pix1 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
476
986e461dc072 Initial revision
glantau
parents:
diff changeset
477 movq mm3, mm4 ; mm4 := abs(mm4-mm5)
986e461dc072 Initial revision
glantau
parents:
diff changeset
478 psubusb mm4, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
479 psubusb mm5, mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
480 por mm4, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
481
986e461dc072 Initial revision
glantau
parents:
diff changeset
482 ; Last 8 bytes of the row
986e461dc072 Initial revision
glantau
parents:
diff changeset
483
986e461dc072 Initial revision
glantau
parents:
diff changeset
484 movq mm7, [edi+8] ; load last 8 bytes of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
485 movq mm5, [ebx+8] ; load bytes 10-17 of pix2 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
486
986e461dc072 Initial revision
glantau
parents:
diff changeset
487 movq mm2, mm7 ; copy mm7 on mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
488 movq mm3, mm5 ; copy mm5 on mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
489 punpcklbw mm7, mm6 ; first 4 bytes of [edi+8] on mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
490 punpcklbw mm5, mm6 ; first 4 bytes of [ebx+8] on mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
491 paddusw mm7, mm5 ; mm1 := first 4 bytes interpolated in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
492 psrlw mm7, 1
986e461dc072 Initial revision
glantau
parents:
diff changeset
493
986e461dc072 Initial revision
glantau
parents:
diff changeset
494 punpckhbw mm2, mm6 ; last 4 bytes of [edi+8] on mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
495 punpckhbw mm3, mm6 ; last 4 bytes of [ebx+8] on mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
496 paddusw mm2, mm3 ; mm2 := last 4 bytes interpolated in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
497 psrlw mm2, 1
986e461dc072 Initial revision
glantau
parents:
diff changeset
498
986e461dc072 Initial revision
glantau
parents:
diff changeset
499 packuswb mm7, mm2 ; pack 8 bytes interpolated on mm1
986e461dc072 Initial revision
glantau
parents:
diff changeset
500 movq mm5,[esi+8] ; load last 8 bytes of pix1 row
986e461dc072 Initial revision
glantau
parents:
diff changeset
501
986e461dc072 Initial revision
glantau
parents:
diff changeset
502 movq mm3, mm7 ; mm7 := abs(mm1-mm5)
986e461dc072 Initial revision
glantau
parents:
diff changeset
503 psubusb mm7, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
504 psubusb mm5, mm3
986e461dc072 Initial revision
glantau
parents:
diff changeset
505 por mm7, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
506
986e461dc072 Initial revision
glantau
parents:
diff changeset
507 ; Now mm4 and mm7 have 16 absdiffs to add
986e461dc072 Initial revision
glantau
parents:
diff changeset
508
986e461dc072 Initial revision
glantau
parents:
diff changeset
509 movq mm3, mm4 ; Make copies of these bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
510 movq mm2, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
511
986e461dc072 Initial revision
glantau
parents:
diff changeset
512 punpcklbw mm4, mm6 ; Unpack to words and add
986e461dc072 Initial revision
glantau
parents:
diff changeset
513 punpcklbw mm7, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
514 paddusw mm4, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
515 paddusw mm0, mm4 ; Add to the acumulator (mm0)
986e461dc072 Initial revision
glantau
parents:
diff changeset
516
986e461dc072 Initial revision
glantau
parents:
diff changeset
517 punpckhbw mm3, mm6 ; Unpack to words and add
986e461dc072 Initial revision
glantau
parents:
diff changeset
518 punpckhbw mm2, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
519 paddusw mm3, mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
520 paddusw mm1, mm3 ; Add to the acumulator (mm1)
986e461dc072 Initial revision
glantau
parents:
diff changeset
521
986e461dc072 Initial revision
glantau
parents:
diff changeset
522 ; Loop termination
986e461dc072 Initial revision
glantau
parents:
diff changeset
523
986e461dc072 Initial revision
glantau
parents:
diff changeset
524 add esi, edx ; update pointers to next row
986e461dc072 Initial revision
glantau
parents:
diff changeset
525 add edi, edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
526 add ebx, edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
527 sub ecx,1
986e461dc072 Initial revision
glantau
parents:
diff changeset
528 test ecx, ecx ; check rowsleft
986e461dc072 Initial revision
glantau
parents:
diff changeset
529 jnz near .nextrow_y2
986e461dc072 Initial revision
glantau
parents:
diff changeset
530
986e461dc072 Initial revision
glantau
parents:
diff changeset
531 paddusw mm0, mm1
986e461dc072 Initial revision
glantau
parents:
diff changeset
532
986e461dc072 Initial revision
glantau
parents:
diff changeset
533 movq mm1, mm0 ; Copy mm0 to mm1
986e461dc072 Initial revision
glantau
parents:
diff changeset
534 psrlq mm1, 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
535 paddusw mm0, mm1 ; Add
986e461dc072 Initial revision
glantau
parents:
diff changeset
536 movq mm2, mm0
986e461dc072 Initial revision
glantau
parents:
diff changeset
537 psrlq mm2, 16
986e461dc072 Initial revision
glantau
parents:
diff changeset
538 paddusw mm0, mm2
986e461dc072 Initial revision
glantau
parents:
diff changeset
539 movd eax, mm0 ; Store return value
986e461dc072 Initial revision
glantau
parents:
diff changeset
540 and eax, 0xffff
986e461dc072 Initial revision
glantau
parents:
diff changeset
541
986e461dc072 Initial revision
glantau
parents:
diff changeset
542 pop edi
986e461dc072 Initial revision
glantau
parents:
diff changeset
543 pop esi
986e461dc072 Initial revision
glantau
parents:
diff changeset
544 pop edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
545 pop ecx
986e461dc072 Initial revision
glantau
parents:
diff changeset
546 pop ebx
986e461dc072 Initial revision
glantau
parents:
diff changeset
547
986e461dc072 Initial revision
glantau
parents:
diff changeset
548 pop ebp ; restore stack pointer
986e461dc072 Initial revision
glantau
parents:
diff changeset
549
986e461dc072 Initial revision
glantau
parents:
diff changeset
550 emms ; clear mmx registers
986e461dc072 Initial revision
glantau
parents:
diff changeset
551 ret ; return
986e461dc072 Initial revision
glantau
parents:
diff changeset
552
986e461dc072 Initial revision
glantau
parents:
diff changeset
553 global pix_abs16x16_xy2_mmx
986e461dc072 Initial revision
glantau
parents:
diff changeset
554
986e461dc072 Initial revision
glantau
parents:
diff changeset
555 ; int pix_abs16x16_xy2_mmx(unsigned char *p1,unsigned char *p2,int lx,int h);
986e461dc072 Initial revision
glantau
parents:
diff changeset
556
986e461dc072 Initial revision
glantau
parents:
diff changeset
557 ; esi = p1 (init: blk1)
986e461dc072 Initial revision
glantau
parents:
diff changeset
558 ; edi = p2 (init: blk2)
986e461dc072 Initial revision
glantau
parents:
diff changeset
559 ; ebx = p1+lx
986e461dc072 Initial revision
glantau
parents:
diff changeset
560 ; ecx = rowsleft (init: h)
986e461dc072 Initial revision
glantau
parents:
diff changeset
561 ; edx = lx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
562
986e461dc072 Initial revision
glantau
parents:
diff changeset
563 ; mm0 = distance accumulators (4 words)
986e461dc072 Initial revision
glantau
parents:
diff changeset
564 ; mm1 = bytes p2
986e461dc072 Initial revision
glantau
parents:
diff changeset
565 ; mm2 = bytes p1
986e461dc072 Initial revision
glantau
parents:
diff changeset
566 ; mm3 = bytes p1+lx
986e461dc072 Initial revision
glantau
parents:
diff changeset
567 ; I'd love to find someplace to stash p1+1 and p1+lx+1's bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
568 ; but I don't think thats going to happen in iA32-land...
986e461dc072 Initial revision
glantau
parents:
diff changeset
569 ; mm4 = temp 4 bytes in words interpolating p1, p1+1
986e461dc072 Initial revision
glantau
parents:
diff changeset
570 ; mm5 = temp 4 bytes in words from p2
986e461dc072 Initial revision
glantau
parents:
diff changeset
571 ; mm6 = temp comparison bit mask p1,p2
986e461dc072 Initial revision
glantau
parents:
diff changeset
572 ; mm7 = temp comparison bit mask p2,p1
986e461dc072 Initial revision
glantau
parents:
diff changeset
573
986e461dc072 Initial revision
glantau
parents:
diff changeset
574
986e461dc072 Initial revision
glantau
parents:
diff changeset
575 align 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
576 pix_abs16x16_xy2_mmx:
986e461dc072 Initial revision
glantau
parents:
diff changeset
577 push ebp ; save stack pointer
986e461dc072 Initial revision
glantau
parents:
diff changeset
578 mov ebp, esp ; so that we can do this
986e461dc072 Initial revision
glantau
parents:
diff changeset
579
986e461dc072 Initial revision
glantau
parents:
diff changeset
580 push ebx ; Saves registers (called saves convention in
986e461dc072 Initial revision
glantau
parents:
diff changeset
581 push ecx ; x86 GCC it seems)
986e461dc072 Initial revision
glantau
parents:
diff changeset
582 push edx ;
986e461dc072 Initial revision
glantau
parents:
diff changeset
583 push esi
986e461dc072 Initial revision
glantau
parents:
diff changeset
584 push edi
986e461dc072 Initial revision
glantau
parents:
diff changeset
585
986e461dc072 Initial revision
glantau
parents:
diff changeset
586 pxor mm0, mm0 ; zero acculumators
986e461dc072 Initial revision
glantau
parents:
diff changeset
587
986e461dc072 Initial revision
glantau
parents:
diff changeset
588 mov esi, [ebp+12] ; get p1
986e461dc072 Initial revision
glantau
parents:
diff changeset
589 mov edi, [ebp+8] ; get p2
986e461dc072 Initial revision
glantau
parents:
diff changeset
590 mov edx, [ebp+16] ; get lx
986e461dc072 Initial revision
glantau
parents:
diff changeset
591 mov ecx, [ebp+20] ; rowsleft := h
986e461dc072 Initial revision
glantau
parents:
diff changeset
592 mov ebx, esi
986e461dc072 Initial revision
glantau
parents:
diff changeset
593 add ebx, edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
594 jmp .nextrowmm11 ; snap to it
986e461dc072 Initial revision
glantau
parents:
diff changeset
595 align 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
596 .nextrowmm11:
986e461dc072 Initial revision
glantau
parents:
diff changeset
597
986e461dc072 Initial revision
glantau
parents:
diff changeset
598 ;;
986e461dc072 Initial revision
glantau
parents:
diff changeset
599 ;; First 8 bytes of row
986e461dc072 Initial revision
glantau
parents:
diff changeset
600 ;;
986e461dc072 Initial revision
glantau
parents:
diff changeset
601
986e461dc072 Initial revision
glantau
parents:
diff changeset
602 ;; First 4 bytes of 8
986e461dc072 Initial revision
glantau
parents:
diff changeset
603
986e461dc072 Initial revision
glantau
parents:
diff changeset
604 movq mm4, [esi] ; mm4 := first 4 bytes p1
986e461dc072 Initial revision
glantau
parents:
diff changeset
605 pxor mm7, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
606 movq mm2, mm4 ; mm2 records all 8 bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
607 punpcklbw mm4, mm7 ; First 4 bytes p1 in Words...
986e461dc072 Initial revision
glantau
parents:
diff changeset
608
986e461dc072 Initial revision
glantau
parents:
diff changeset
609 movq mm6, [ebx] ; mm6 := first 4 bytes p1+lx
986e461dc072 Initial revision
glantau
parents:
diff changeset
610 movq mm3, mm6 ; mm3 records all 8 bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
611 punpcklbw mm6, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
612 paddw mm4, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
613
986e461dc072 Initial revision
glantau
parents:
diff changeset
614
986e461dc072 Initial revision
glantau
parents:
diff changeset
615 movq mm5, [esi+1] ; mm5 := first 4 bytes p1+1
986e461dc072 Initial revision
glantau
parents:
diff changeset
616 punpcklbw mm5, mm7 ; First 4 bytes p1 in Words...
986e461dc072 Initial revision
glantau
parents:
diff changeset
617 paddw mm4, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
618 movq mm6, [ebx+1] ; mm6 := first 4 bytes p1+lx+1
986e461dc072 Initial revision
glantau
parents:
diff changeset
619 punpcklbw mm6, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
620 paddw mm4, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
621
986e461dc072 Initial revision
glantau
parents:
diff changeset
622 psrlw mm4, 2 ; mm4 := First 4 bytes interpolated in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
623
986e461dc072 Initial revision
glantau
parents:
diff changeset
624 movq mm5, [edi] ; mm5:=first 4 bytes of p2 in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
625 movq mm1, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
626 punpcklbw mm5, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
627
986e461dc072 Initial revision
glantau
parents:
diff changeset
628 movq mm7,mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
629 pcmpgtw mm7,mm5 ; mm7 := [i : W0..3,mm4>mm5]
986e461dc072 Initial revision
glantau
parents:
diff changeset
630
986e461dc072 Initial revision
glantau
parents:
diff changeset
631 movq mm6,mm4 ; mm6 := [i : W0..3, (mm4-mm5)*(mm4-mm5 > 0)]
986e461dc072 Initial revision
glantau
parents:
diff changeset
632 psubw mm6,mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
633 pand mm6, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
634
986e461dc072 Initial revision
glantau
parents:
diff changeset
635 paddw mm0, mm6 ; Add to accumulator
986e461dc072 Initial revision
glantau
parents:
diff changeset
636
986e461dc072 Initial revision
glantau
parents:
diff changeset
637 movq mm6,mm5 ; mm6 := [i : W0..3,mm5>mm4]
986e461dc072 Initial revision
glantau
parents:
diff changeset
638 pcmpgtw mm6,mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
639 psubw mm5,mm4 ; mm5 := [i : B0..7, (mm5-mm4)*(mm5-mm4 > 0)]
986e461dc072 Initial revision
glantau
parents:
diff changeset
640 pand mm5, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
641
986e461dc072 Initial revision
glantau
parents:
diff changeset
642 paddw mm0, mm5 ; Add to accumulator
986e461dc072 Initial revision
glantau
parents:
diff changeset
643
986e461dc072 Initial revision
glantau
parents:
diff changeset
644 ;; Second 4 bytes of 8
986e461dc072 Initial revision
glantau
parents:
diff changeset
645
986e461dc072 Initial revision
glantau
parents:
diff changeset
646 movq mm4, mm2 ; mm4 := Second 4 bytes p1 in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
647 pxor mm7, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
648 punpckhbw mm4, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
649 movq mm6, mm3 ; mm6 := Second 4 bytes p1+1 in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
650 punpckhbw mm6, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
651 paddw mm4, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
652
986e461dc072 Initial revision
glantau
parents:
diff changeset
653 movq mm5, [esi+1] ; mm5 := first 4 bytes p1+1
986e461dc072 Initial revision
glantau
parents:
diff changeset
654 punpckhbw mm5, mm7 ; First 4 bytes p1 in Words...
986e461dc072 Initial revision
glantau
parents:
diff changeset
655 paddw mm4, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
656 movq mm6, [ebx+1] ; mm6 := first 4 bytes p1+lx+1
986e461dc072 Initial revision
glantau
parents:
diff changeset
657 punpckhbw mm6, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
658 paddw mm4, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
659
986e461dc072 Initial revision
glantau
parents:
diff changeset
660 psrlw mm4, 2 ; mm4 := First 4 bytes interpolated in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
661
986e461dc072 Initial revision
glantau
parents:
diff changeset
662 movq mm5, mm1 ; mm5:= second 4 bytes of p2 in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
663 punpckhbw mm5, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
664
986e461dc072 Initial revision
glantau
parents:
diff changeset
665 movq mm7,mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
666 pcmpgtw mm7,mm5 ; mm7 := [i : W0..3,mm4>mm5]
986e461dc072 Initial revision
glantau
parents:
diff changeset
667
986e461dc072 Initial revision
glantau
parents:
diff changeset
668 movq mm6,mm4 ; mm6 := [i : W0..3, (mm4-mm5)*(mm4-mm5 > 0)]
986e461dc072 Initial revision
glantau
parents:
diff changeset
669 psubw mm6,mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
670 pand mm6, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
671
986e461dc072 Initial revision
glantau
parents:
diff changeset
672 paddw mm0, mm6 ; Add to accumulator
986e461dc072 Initial revision
glantau
parents:
diff changeset
673
986e461dc072 Initial revision
glantau
parents:
diff changeset
674 movq mm6,mm5 ; mm6 := [i : W0..3,mm5>mm4]
986e461dc072 Initial revision
glantau
parents:
diff changeset
675 pcmpgtw mm6,mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
676 psubw mm5,mm4 ; mm5 := [i : B0..7, (mm5-mm4)*(mm5-mm4 > 0)]
986e461dc072 Initial revision
glantau
parents:
diff changeset
677 pand mm5, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
678
986e461dc072 Initial revision
glantau
parents:
diff changeset
679 paddw mm0, mm5 ; Add to accumulator
986e461dc072 Initial revision
glantau
parents:
diff changeset
680
986e461dc072 Initial revision
glantau
parents:
diff changeset
681
986e461dc072 Initial revision
glantau
parents:
diff changeset
682 ;;
986e461dc072 Initial revision
glantau
parents:
diff changeset
683 ;; Second 8 bytes of row
986e461dc072 Initial revision
glantau
parents:
diff changeset
684 ;;
986e461dc072 Initial revision
glantau
parents:
diff changeset
685 ;; First 4 bytes of 8
986e461dc072 Initial revision
glantau
parents:
diff changeset
686
986e461dc072 Initial revision
glantau
parents:
diff changeset
687 movq mm4, [esi+8] ; mm4 := first 4 bytes p1+8
986e461dc072 Initial revision
glantau
parents:
diff changeset
688 pxor mm7, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
689 movq mm2, mm4 ; mm2 records all 8 bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
690 punpcklbw mm4, mm7 ; First 4 bytes p1 in Words...
986e461dc072 Initial revision
glantau
parents:
diff changeset
691
986e461dc072 Initial revision
glantau
parents:
diff changeset
692 movq mm6, [ebx+8] ; mm6 := first 4 bytes p1+lx+8
986e461dc072 Initial revision
glantau
parents:
diff changeset
693 movq mm3, mm6 ; mm3 records all 8 bytes
986e461dc072 Initial revision
glantau
parents:
diff changeset
694 punpcklbw mm6, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
695 paddw mm4, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
696
986e461dc072 Initial revision
glantau
parents:
diff changeset
697
986e461dc072 Initial revision
glantau
parents:
diff changeset
698 movq mm5, [esi+9] ; mm5 := first 4 bytes p1+9
986e461dc072 Initial revision
glantau
parents:
diff changeset
699 punpcklbw mm5, mm7 ; First 4 bytes p1 in Words...
986e461dc072 Initial revision
glantau
parents:
diff changeset
700 paddw mm4, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
701 movq mm6, [ebx+9] ; mm6 := first 4 bytes p1+lx+9
986e461dc072 Initial revision
glantau
parents:
diff changeset
702 punpcklbw mm6, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
703 paddw mm4, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
704
986e461dc072 Initial revision
glantau
parents:
diff changeset
705 psrlw mm4, 2 ; mm4 := First 4 bytes interpolated in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
706
986e461dc072 Initial revision
glantau
parents:
diff changeset
707 movq mm5, [edi+8] ; mm5:=first 4 bytes of p2+8 in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
708 movq mm1, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
709 punpcklbw mm5, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
710
986e461dc072 Initial revision
glantau
parents:
diff changeset
711 movq mm7,mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
712 pcmpgtw mm7,mm5 ; mm7 := [i : W0..3,mm4>mm5]
986e461dc072 Initial revision
glantau
parents:
diff changeset
713
986e461dc072 Initial revision
glantau
parents:
diff changeset
714 movq mm6,mm4 ; mm6 := [i : W0..3, (mm4-mm5)*(mm4-mm5 > 0)]
986e461dc072 Initial revision
glantau
parents:
diff changeset
715 psubw mm6,mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
716 pand mm6, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
717
986e461dc072 Initial revision
glantau
parents:
diff changeset
718 paddw mm0, mm6 ; Add to accumulator
986e461dc072 Initial revision
glantau
parents:
diff changeset
719
986e461dc072 Initial revision
glantau
parents:
diff changeset
720 movq mm6,mm5 ; mm6 := [i : W0..3,mm5>mm4]
986e461dc072 Initial revision
glantau
parents:
diff changeset
721 pcmpgtw mm6,mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
722 psubw mm5,mm4 ; mm5 := [i : B0..7, (mm5-mm4)*(mm5-mm4 > 0)]
986e461dc072 Initial revision
glantau
parents:
diff changeset
723 pand mm5, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
724
986e461dc072 Initial revision
glantau
parents:
diff changeset
725 paddw mm0, mm5 ; Add to accumulator
986e461dc072 Initial revision
glantau
parents:
diff changeset
726
986e461dc072 Initial revision
glantau
parents:
diff changeset
727 ;; Second 4 bytes of 8
986e461dc072 Initial revision
glantau
parents:
diff changeset
728
986e461dc072 Initial revision
glantau
parents:
diff changeset
729 movq mm4, mm2 ; mm4 := Second 4 bytes p1 in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
730 pxor mm7, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
731 punpckhbw mm4, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
732 movq mm6, mm3 ; mm6 := Second 4 bytes p1+1 in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
733 punpckhbw mm6, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
734 paddw mm4, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
735
986e461dc072 Initial revision
glantau
parents:
diff changeset
736 movq mm5, [esi+9] ; mm5 := first 4 bytes p1+1
986e461dc072 Initial revision
glantau
parents:
diff changeset
737 punpckhbw mm5, mm7 ; First 4 bytes p1 in Words...
986e461dc072 Initial revision
glantau
parents:
diff changeset
738 paddw mm4, mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
739 movq mm6, [ebx+9] ; mm6 := first 4 bytes p1+lx+1
986e461dc072 Initial revision
glantau
parents:
diff changeset
740 punpckhbw mm6, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
741 paddw mm4, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
742
986e461dc072 Initial revision
glantau
parents:
diff changeset
743 psrlw mm4, 2 ; mm4 := First 4 bytes interpolated in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
744
986e461dc072 Initial revision
glantau
parents:
diff changeset
745 movq mm5, mm1 ; mm5:= second 4 bytes of p2 in words
986e461dc072 Initial revision
glantau
parents:
diff changeset
746 punpckhbw mm5, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
747
986e461dc072 Initial revision
glantau
parents:
diff changeset
748 movq mm7,mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
749 pcmpgtw mm7,mm5 ; mm7 := [i : W0..3,mm4>mm5]
986e461dc072 Initial revision
glantau
parents:
diff changeset
750
986e461dc072 Initial revision
glantau
parents:
diff changeset
751 movq mm6,mm4 ; mm6 := [i : W0..3, (mm4-mm5)*(mm4-mm5 > 0)]
986e461dc072 Initial revision
glantau
parents:
diff changeset
752 psubw mm6,mm5
986e461dc072 Initial revision
glantau
parents:
diff changeset
753 pand mm6, mm7
986e461dc072 Initial revision
glantau
parents:
diff changeset
754
986e461dc072 Initial revision
glantau
parents:
diff changeset
755 paddw mm0, mm6 ; Add to accumulator
986e461dc072 Initial revision
glantau
parents:
diff changeset
756
986e461dc072 Initial revision
glantau
parents:
diff changeset
757 movq mm6,mm5 ; mm6 := [i : W0..3,mm5>mm4]
986e461dc072 Initial revision
glantau
parents:
diff changeset
758 pcmpgtw mm6,mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
759 psubw mm5,mm4 ; mm5 := [i : B0..7, (mm5-mm4)*(mm5-mm4 > 0)]
986e461dc072 Initial revision
glantau
parents:
diff changeset
760 pand mm5, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
761
986e461dc072 Initial revision
glantau
parents:
diff changeset
762 paddw mm0, mm5 ; Add to accumulator
986e461dc072 Initial revision
glantau
parents:
diff changeset
763
986e461dc072 Initial revision
glantau
parents:
diff changeset
764
986e461dc072 Initial revision
glantau
parents:
diff changeset
765 ;;
986e461dc072 Initial revision
glantau
parents:
diff changeset
766 ;; Loop termination condition... and stepping
986e461dc072 Initial revision
glantau
parents:
diff changeset
767 ;;
986e461dc072 Initial revision
glantau
parents:
diff changeset
768
986e461dc072 Initial revision
glantau
parents:
diff changeset
769 add esi, edx ; update pointer to next row
986e461dc072 Initial revision
glantau
parents:
diff changeset
770 add edi, edx ; ditto
986e461dc072 Initial revision
glantau
parents:
diff changeset
771 add ebx, edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
772
986e461dc072 Initial revision
glantau
parents:
diff changeset
773 sub ecx,1
986e461dc072 Initial revision
glantau
parents:
diff changeset
774 test ecx, ecx ; check rowsleft
986e461dc072 Initial revision
glantau
parents:
diff changeset
775 jnz near .nextrowmm11
986e461dc072 Initial revision
glantau
parents:
diff changeset
776
986e461dc072 Initial revision
glantau
parents:
diff changeset
777 ;; Sum the Accumulators
986e461dc072 Initial revision
glantau
parents:
diff changeset
778 movq mm4, mm0
986e461dc072 Initial revision
glantau
parents:
diff changeset
779 psrlq mm4, 32
986e461dc072 Initial revision
glantau
parents:
diff changeset
780 paddw mm0, mm4
986e461dc072 Initial revision
glantau
parents:
diff changeset
781 movq mm6, mm0
986e461dc072 Initial revision
glantau
parents:
diff changeset
782 psrlq mm6, 16
986e461dc072 Initial revision
glantau
parents:
diff changeset
783 paddw mm0, mm6
986e461dc072 Initial revision
glantau
parents:
diff changeset
784 movd eax, mm0 ; store return value
986e461dc072 Initial revision
glantau
parents:
diff changeset
785 and eax, 0xffff
986e461dc072 Initial revision
glantau
parents:
diff changeset
786
986e461dc072 Initial revision
glantau
parents:
diff changeset
787 pop edi
986e461dc072 Initial revision
glantau
parents:
diff changeset
788 pop esi
986e461dc072 Initial revision
glantau
parents:
diff changeset
789 pop edx
986e461dc072 Initial revision
glantau
parents:
diff changeset
790 pop ecx
986e461dc072 Initial revision
glantau
parents:
diff changeset
791 pop ebx
986e461dc072 Initial revision
glantau
parents:
diff changeset
792
986e461dc072 Initial revision
glantau
parents:
diff changeset
793 pop ebp ; restore stack pointer
986e461dc072 Initial revision
glantau
parents:
diff changeset
794
986e461dc072 Initial revision
glantau
parents:
diff changeset
795 emms ; clear mmx registers
986e461dc072 Initial revision
glantau
parents:
diff changeset
796 ret ; we now return you to your regular programming
986e461dc072 Initial revision
glantau
parents:
diff changeset
797
986e461dc072 Initial revision
glantau
parents:
diff changeset
798