annotate i386/motion_est_mmx.c @ 8400:4877d4c6d8ae libavcodec

Add automatic prefix handling to yasm functions. Does nothing now, but will be useful for porting x264 asm in the future.
author darkshikari
date Fri, 19 Dec 2008 03:01:08 +0000
parents 0d108ec85620
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
1 /*
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
2 * MMX optimized motion estimation
429
718a22dc121f license/copyright change
glantau
parents: 330
diff changeset
3 * Copyright (c) 2001 Fabrice Bellard.
1739
07a484280a82 copyright year update of the files i touched and remembered, things look annoyingly unmaintained otherwise
michael
parents: 1708
diff changeset
4 * Copyright (c) 2002-2004 Michael Niedermayer
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
5 *
5214
470601203f44 Group all copyright and author notices together.
diego
parents: 5010
diff changeset
6 * mostly by Michael Niedermayer <michaelni@gmx.at>
470601203f44 Group all copyright and author notices together.
diego
parents: 5010
diff changeset
7 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
8 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
9 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
10 * FFmpeg is free software; you can redistribute it and/or
429
718a22dc121f license/copyright change
glantau
parents: 330
diff changeset
11 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 330
diff changeset
12 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
13 * version 2.1 of the License, or (at your option) any later version.
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
14 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
15 * FFmpeg is distributed in the hope that it will be useful,
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429
718a22dc121f license/copyright change
glantau
parents: 330
diff changeset
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 330
diff changeset
18 * Lesser General Public License for more details.
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
19 *
429
718a22dc121f license/copyright change
glantau
parents: 330
diff changeset
20 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
21 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
23 */
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 6755
diff changeset
24
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 6755
diff changeset
25 #include "libavutil/x86_cpu.h"
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 6755
diff changeset
26 #include "libavcodec/dsputil.h"
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
27
6190
2aa536e36c89 Add and use DECLARE_ASM_CONST for constants used in assembler code.
reimar
parents: 5214
diff changeset
28 DECLARE_ASM_CONST(8, uint64_t, round_tab[3])={
1569
1f8d1e1173d8 Fixes GCC 3.3.2 warnings patch by (Panagiotis Issaris <takis at lumumba dot luc dot ac dot be>)
michael
parents: 1455
diff changeset
29 0x0000000000000000ULL,
1f8d1e1173d8 Fixes GCC 3.3.2 warnings patch by (Panagiotis Issaris <takis at lumumba dot luc dot ac dot be>)
michael
parents: 1455
diff changeset
30 0x0001000100010001ULL,
1f8d1e1173d8 Fixes GCC 3.3.2 warnings patch by (Panagiotis Issaris <takis at lumumba dot luc dot ac dot be>)
michael
parents: 1455
diff changeset
31 0x0002000200020002ULL,
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
32 };
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
33
6190
2aa536e36c89 Add and use DECLARE_ASM_CONST for constants used in assembler code.
reimar
parents: 5214
diff changeset
34 DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL;
330
54d86f074a4b rounding bugfix
michaelni
parents: 294
diff changeset
35
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
36 static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
37 {
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6190
diff changeset
38 x86_reg len= -(stride*h);
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
39 __asm__ volatile(
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
40 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
41 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
42 "movq (%1, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
43 "movq (%2, %%"REG_a"), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
44 "movq (%2, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
45 "add %3, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
46 "psubusb %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
47 "psubusb %%mm4, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
48 "movq (%1, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
49 "movq (%2, %%"REG_a"), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
50 "movq (%2, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
51 "psubusb %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
52 "psubusb %%mm5, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
53 "por %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
54 "por %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
55 "movq %%mm0, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
56 "movq %%mm3, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
57 "punpcklbw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
58 "punpckhbw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
59 "punpcklbw %%mm7, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
60 "punpckhbw %%mm7, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
61 "paddw %%mm1, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
62 "paddw %%mm3, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
63 "paddw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
64 "paddw %%mm0, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
65 "add %3, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
66 " js 1b \n\t"
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
67 : "+a" (len)
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6190
diff changeset
68 : "r" (blk1 - len), "r" (blk2 - len), "r" ((x86_reg)stride)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
69 );
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
70 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
71
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
72 static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
73 {
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
74 __asm__ volatile(
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
75 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
76 "1: \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
77 "movq (%1), %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
78 "movq (%1, %3), %%mm1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
79 "psadbw (%2), %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
80 "psadbw (%2, %3), %%mm1 \n\t"
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
81 "paddw %%mm0, %%mm6 \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
82 "paddw %%mm1, %%mm6 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
83 "lea (%1,%3,2), %1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
84 "lea (%2,%3,2), %2 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
85 "sub $2, %0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
86 " jg 1b \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
87 : "+r" (h), "+r" (blk1), "+r" (blk2)
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6190
diff changeset
88 : "r" ((x86_reg)stride)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
89 );
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
90 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
91
4981
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
92 static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
93 {
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
94 int ret;
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
95 __asm__ volatile(
4981
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
96 "pxor %%xmm6, %%xmm6 \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
97 ASMALIGN(4)
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
98 "1: \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
99 "movdqu (%1), %%xmm0 \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
100 "movdqu (%1, %3), %%xmm1 \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
101 "psadbw (%2), %%xmm0 \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
102 "psadbw (%2, %3), %%xmm1 \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
103 "paddw %%xmm0, %%xmm6 \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
104 "paddw %%xmm1, %%xmm6 \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
105 "lea (%1,%3,2), %1 \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
106 "lea (%2,%3,2), %2 \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
107 "sub $2, %0 \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
108 " jg 1b \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
109 : "+r" (h), "+r" (blk1), "+r" (blk2)
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6190
diff changeset
110 : "r" ((x86_reg)stride)
4981
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
111 );
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
112 __asm__ volatile(
4981
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
113 "movhlps %%xmm6, %%xmm0 \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
114 "paddw %%xmm0, %%xmm6 \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
115 "movd %%xmm6, %0 \n\t"
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
116 : "=r"(ret)
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
117 );
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
118 return ret;
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
119 }
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
120
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
121 static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
122 {
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
123 __asm__ volatile(
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
124 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
125 "1: \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
126 "movq (%1), %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
127 "movq (%1, %3), %%mm1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
128 "pavgb 1(%1), %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
129 "pavgb 1(%1, %3), %%mm1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
130 "psadbw (%2), %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
131 "psadbw (%2, %3), %%mm1 \n\t"
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
132 "paddw %%mm0, %%mm6 \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
133 "paddw %%mm1, %%mm6 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
134 "lea (%1,%3,2), %1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
135 "lea (%2,%3,2), %2 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
136 "sub $2, %0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
137 " jg 1b \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
138 : "+r" (h), "+r" (blk1), "+r" (blk2)
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6190
diff changeset
139 : "r" ((x86_reg)stride)
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
140 );
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
141 }
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
142
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
143 static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
144 {
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
145 __asm__ volatile(
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
146 "movq (%1), %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
147 "add %3, %1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
148 ASMALIGN(4)
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
149 "1: \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
150 "movq (%1), %%mm1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
151 "movq (%1, %3), %%mm2 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
152 "pavgb %%mm1, %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
153 "pavgb %%mm2, %%mm1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
154 "psadbw (%2), %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
155 "psadbw (%2, %3), %%mm1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
156 "paddw %%mm0, %%mm6 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
157 "paddw %%mm1, %%mm6 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
158 "movq %%mm2, %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
159 "lea (%1,%3,2), %1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
160 "lea (%2,%3,2), %2 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
161 "sub $2, %0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
162 " jg 1b \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
163 : "+r" (h), "+r" (blk1), "+r" (blk2)
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6190
diff changeset
164 : "r" ((x86_reg)stride)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
165 );
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
166 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
167
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 1057
diff changeset
168 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
169 {
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
170 __asm__ volatile(
4974
a2e489e40ea3 tweak mmx2 sad.
lorenm
parents: 4197
diff changeset
171 "movq "MANGLE(bone)", %%mm5 \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
172 "movq (%1), %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
173 "pavgb 1(%1), %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
174 "add %3, %1 \n\t"
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
175 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
176 "1: \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
177 "movq (%1), %%mm1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
178 "movq (%1,%3), %%mm2 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
179 "pavgb 1(%1), %%mm1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
180 "pavgb 1(%1,%3), %%mm2 \n\t"
4974
a2e489e40ea3 tweak mmx2 sad.
lorenm
parents: 4197
diff changeset
181 "psubusb %%mm5, %%mm1 \n\t"
a2e489e40ea3 tweak mmx2 sad.
lorenm
parents: 4197
diff changeset
182 "pavgb %%mm1, %%mm0 \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
183 "pavgb %%mm2, %%mm1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
184 "psadbw (%2), %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
185 "psadbw (%2,%3), %%mm1 \n\t"
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
186 "paddw %%mm0, %%mm6 \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
187 "paddw %%mm1, %%mm6 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
188 "movq %%mm2, %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
189 "lea (%1,%3,2), %1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
190 "lea (%2,%3,2), %2 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
191 "sub $2, %0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
192 " jg 1b \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
193 : "+r" (h), "+r" (blk1), "+r" (blk2)
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6190
diff changeset
194 : "r" ((x86_reg)stride)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
195 );
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
196 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
197
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 1057
diff changeset
198 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
199 {
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6190
diff changeset
200 x86_reg len= -(stride*h);
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
201 __asm__ volatile(
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
202 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
203 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
204 "movq (%1, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
205 "movq (%2, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
206 "movq (%1, %%"REG_a"), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
207 "movq (%2, %%"REG_a"), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
208 "punpcklbw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
209 "punpcklbw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
210 "punpckhbw %%mm7, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
211 "punpckhbw %%mm7, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
212 "paddw %%mm0, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
213 "paddw %%mm2, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
214 "movq (%3, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
215 "movq (%3, %%"REG_a"), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
216 "paddw %%mm5, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
217 "paddw %%mm5, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
218 "psrlw $1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
219 "psrlw $1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
220 "packuswb %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
221 "psubusb %%mm1, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
222 "psubusb %%mm2, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
223 "por %%mm4, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
224 "movq %%mm1, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
225 "punpcklbw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
226 "punpckhbw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
227 "paddw %%mm1, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
228 "paddw %%mm0, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
229 "add %4, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
230 " js 1b \n\t"
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
231 : "+a" (len)
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6190
diff changeset
232 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((x86_reg)stride)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
233 );
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
234 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
235
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 1057
diff changeset
236 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
237 {
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6190
diff changeset
238 x86_reg len= -(stride*h);
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
239 __asm__ volatile(
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
240 "movq (%1, %%"REG_a"), %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
241 "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
242 "movq %%mm0, %%mm1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
243 "movq %%mm2, %%mm3 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
244 "punpcklbw %%mm7, %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
245 "punpckhbw %%mm7, %%mm1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
246 "punpcklbw %%mm7, %%mm2 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
247 "punpckhbw %%mm7, %%mm3 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
248 "paddw %%mm2, %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
249 "paddw %%mm3, %%mm1 \n\t"
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
250 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
251 "1: \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
252 "movq (%2, %%"REG_a"), %%mm2 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
253 "movq 1(%2, %%"REG_a"), %%mm4 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
254 "movq %%mm2, %%mm3 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
255 "movq %%mm4, %%mm5 \n\t"
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
256 "punpcklbw %%mm7, %%mm2 \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
257 "punpckhbw %%mm7, %%mm3 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
258 "punpcklbw %%mm7, %%mm4 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
259 "punpckhbw %%mm7, %%mm5 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
260 "paddw %%mm4, %%mm2 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
261 "paddw %%mm5, %%mm3 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
262 "movq 16+"MANGLE(round_tab)", %%mm5 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
263 "paddw %%mm2, %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
264 "paddw %%mm3, %%mm1 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
265 "paddw %%mm5, %%mm0 \n\t"
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
266 "paddw %%mm5, %%mm1 \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
267 "movq (%3, %%"REG_a"), %%mm4 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
268 "movq (%3, %%"REG_a"), %%mm5 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
269 "psrlw $2, %%mm0 \n\t"
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
270 "psrlw $2, %%mm1 \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
271 "packuswb %%mm1, %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
272 "psubusb %%mm0, %%mm4 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
273 "psubusb %%mm5, %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
274 "por %%mm4, %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
275 "movq %%mm0, %%mm4 \n\t"
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
276 "punpcklbw %%mm7, %%mm0 \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
277 "punpckhbw %%mm7, %%mm4 \n\t"
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
278 "paddw %%mm0, %%mm6 \n\t"
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
279 "paddw %%mm4, %%mm6 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
280 "movq %%mm2, %%mm0 \n\t"
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
281 "movq %%mm3, %%mm1 \n\t"
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
282 "add %4, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
283 " js 1b \n\t"
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
284 : "+a" (len)
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6190
diff changeset
285 : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((x86_reg)stride)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
286 );
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
287 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
288
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
289 static inline int sum_mmx(void)
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
290 {
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
291 int ret;
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
292 __asm__ volatile(
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
293 "movq %%mm6, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
294 "psrlq $32, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
295 "paddw %%mm0, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
296 "movq %%mm6, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
297 "psrlq $16, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
298 "paddw %%mm0, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
299 "movd %%mm6, %0 \n\t"
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
300 : "=r" (ret)
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
301 );
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
302 return ret&0xFFFF;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
303 }
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
304
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
305 static inline int sum_mmx2(void)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
306 {
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
307 int ret;
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
308 __asm__ volatile(
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
309 "movd %%mm6, %0 \n\t"
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
310 : "=r" (ret)
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
311 );
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
312 return ret;
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
313 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
314
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
315 static inline void sad8_x2a_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
316 {
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
317 sad8_2_mmx(blk1, blk1+1, blk2, stride, h);
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
318 }
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
319 static inline void sad8_y2a_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
320 {
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
321 sad8_2_mmx(blk1, blk1+stride, blk2, stride, h);
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
322 }
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
323
900
52c26a143399 100l (sad8x8_x2 had params in wrong order)
michaelni
parents: 429
diff changeset
324
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
325 #define PIX_SAD(suf)\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
326 static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
327 {\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
328 assert(h==8);\
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
329 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
330 "pxor %%mm6, %%mm6 \n\t":);\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
331 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
332 sad8_1_ ## suf(blk1, blk2, stride, 8);\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
333 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
334 return sum_ ## suf();\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
335 }\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
336 static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
337 {\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
338 assert(h==8);\
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
339 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
340 "pxor %%mm6, %%mm6 \n\t"\
4982
6135d67c7f86 10l, r8991 broke mmx1 sad
lorenm
parents: 4981
diff changeset
341 "movq %0, %%mm5 \n\t"\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
342 :: "m"(round_tab[1]) \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
343 );\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
344 \
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
345 sad8_x2a_ ## suf(blk1, blk2, stride, 8);\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
346 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
347 return sum_ ## suf();\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
348 }\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
349 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
350 static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
351 {\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
352 assert(h==8);\
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
353 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
354 "pxor %%mm6, %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
355 "movq %0, %%mm5 \n\t"\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
356 :: "m"(round_tab[1]) \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
357 );\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
358 \
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
359 sad8_y2a_ ## suf(blk1, blk2, stride, 8);\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
360 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
361 return sum_ ## suf();\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
362 }\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
363 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
364 static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
365 {\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
366 assert(h==8);\
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
367 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
368 "pxor %%mm6, %%mm6 \n\t"\
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
369 ::);\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
370 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
371 sad8_4_ ## suf(blk1, blk2, stride, 8);\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
372 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
373 return sum_ ## suf();\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
374 }\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
375 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
376 static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
377 {\
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
378 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
379 "pxor %%mm6, %%mm6 \n\t":);\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
380 \
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
381 sad8_1_ ## suf(blk1 , blk2 , stride, h);\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
382 sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
383 \
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
384 return sum_ ## suf();\
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
385 }\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
386 static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
387 {\
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
388 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
389 "pxor %%mm6, %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
390 "movq %0, %%mm5 \n\t"\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
391 :: "m"(round_tab[1]) \
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
392 );\
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
393 \
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
394 sad8_x2a_ ## suf(blk1 , blk2 , stride, h);\
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
395 sad8_x2a_ ## suf(blk1+8, blk2+8, stride, h);\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
396 \
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
397 return sum_ ## suf();\
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
398 }\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
399 static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
400 {\
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
401 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
402 "pxor %%mm6, %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
403 "movq %0, %%mm5 \n\t"\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
404 :: "m"(round_tab[1]) \
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
405 );\
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
406 \
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
407 sad8_y2a_ ## suf(blk1 , blk2 , stride, h);\
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
408 sad8_y2a_ ## suf(blk1+8, blk2+8, stride, h);\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
409 \
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
410 return sum_ ## suf();\
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
411 }\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
412 static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
413 {\
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6763
diff changeset
414 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
415 "pxor %%mm6, %%mm6 \n\t"\
4980
800a543a2513 tweak mmx2 sad.
lorenm
parents: 4974
diff changeset
416 ::);\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
417 \
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
418 sad8_4_ ## suf(blk1 , blk2 , stride, h);\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
419 sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
420 \
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
421 return sum_ ## suf();\
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
422 }\
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
423
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
424 PIX_SAD(mmx)
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
425 PIX_SAD(mmx2)
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
426
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1065
diff changeset
427 void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
428 {
8104
0d108ec85620 Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents: 8031
diff changeset
429 if (mm_flags & FF_MM_MMX) {
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
430 c->pix_abs[0][0] = sad16_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
431 c->pix_abs[0][1] = sad16_x2_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
432 c->pix_abs[0][2] = sad16_y2_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
433 c->pix_abs[0][3] = sad16_xy2_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
434 c->pix_abs[1][0] = sad8_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
435 c->pix_abs[1][1] = sad8_x2_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
436 c->pix_abs[1][2] = sad8_y2_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
437 c->pix_abs[1][3] = sad8_xy2_mmx;
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
438
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
439 c->sad[0]= sad16_mmx;
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
440 c->sad[1]= sad8_mmx;
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
441 }
8104
0d108ec85620 Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents: 8031
diff changeset
442 if (mm_flags & FF_MM_MMXEXT) {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
443 c->pix_abs[0][0] = sad16_mmx2;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
444 c->pix_abs[1][0] = sad8_mmx2;
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
445
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
446 c->sad[0]= sad16_mmx2;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
447 c->sad[1]= sad8_mmx2;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2293
diff changeset
448
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1065
diff changeset
449 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
450 c->pix_abs[0][1] = sad16_x2_mmx2;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
451 c->pix_abs[0][2] = sad16_y2_mmx2;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
452 c->pix_abs[0][3] = sad16_xy2_mmx2;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
453 c->pix_abs[1][1] = sad8_x2_mmx2;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
454 c->pix_abs[1][2] = sad8_y2_mmx2;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
455 c->pix_abs[1][3] = sad8_xy2_mmx2;
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1065
diff changeset
456 }
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
457 }
8104
0d108ec85620 Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents: 8031
diff changeset
458 if ((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)) {
4981
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
459 c->sad[0]= sad16_sse2;
0b392661ce83 sse2 version of fullpel sad.
lorenm
parents: 4980
diff changeset
460 }
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
461 }