annotate i386/motion_est_mmx.c @ 4889:beeb03aad909 libavcodec

patch so that the deprecated items show up correctly when building doxygen docs patch by mark cox melbournemark plus ffmpeg minus devel chez gmail dot com
author benoit
date Wed, 02 May 2007 09:13:47 +0000
parents bbe0bc387a19
children a2e489e40ea3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
1 /*
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
2 * MMX optimized motion estimation
429
718a22dc121f license/copyright change
glantau
parents: 330
diff changeset
3 * Copyright (c) 2001 Fabrice Bellard.
1739
07a484280a82 copyright year update of the files i touched and remembered, things look annoyingly unmaintained otherwise
michael
parents: 1708
diff changeset
4 * Copyright (c) 2002-2004 Michael Niedermayer
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
5 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
6 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
7 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
8 * FFmpeg is free software; you can redistribute it and/or
429
718a22dc121f license/copyright change
glantau
parents: 330
diff changeset
9 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 330
diff changeset
10 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
11 * version 2.1 of the License, or (at your option) any later version.
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
12 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
13 * FFmpeg is distributed in the hope that it will be useful,
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429
718a22dc121f license/copyright change
glantau
parents: 330
diff changeset
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 330
diff changeset
16 * Lesser General Public License for more details.
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
17 *
429
718a22dc121f license/copyright change
glantau
parents: 330
diff changeset
18 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
19 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
21 *
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
22 * mostly by Michael Niedermayer <michaelni@gmx.at>
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
23 */
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
24 #include "../dsputil.h"
3398
e0927bc44a10 Move REG_* macros from libavcodec/i386/mmx.h to libavutil/x86_cpu.h
lucabe
parents: 3036
diff changeset
25 #include "x86_cpu.h"
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
26
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 1057
diff changeset
27 static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={
1569
1f8d1e1173d8 Fixes GCC 3.3.2 warnings patch by (Panagiotis Issaris <takis at lumumba dot luc dot ac dot be>)
michael
parents: 1455
diff changeset
28 0x0000000000000000ULL,
1f8d1e1173d8 Fixes GCC 3.3.2 warnings patch by (Panagiotis Issaris <takis at lumumba dot luc dot ac dot be>)
michael
parents: 1455
diff changeset
29 0x0001000100010001ULL,
1f8d1e1173d8 Fixes GCC 3.3.2 warnings patch by (Panagiotis Issaris <takis at lumumba dot luc dot ac dot be>)
michael
parents: 1455
diff changeset
30 0x0002000200020002ULL,
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
31 };
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
32
1845
3054613980a8 attribute used patch by (mitya at school dot ioffe dot ru (Dmitry Baryshkov))
michael
parents: 1739
diff changeset
33 static attribute_used __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL;
330
54d86f074a4b rounding bugfix
michaelni
parents: 294
diff changeset
34
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
35 static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
36 {
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1845
diff changeset
37 long len= -(stride*h);
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
38 asm volatile(
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
39 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
40 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
41 "movq (%1, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
42 "movq (%2, %%"REG_a"), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
43 "movq (%2, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
44 "add %3, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
45 "psubusb %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
46 "psubusb %%mm4, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
47 "movq (%1, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
48 "movq (%2, %%"REG_a"), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
49 "movq (%2, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
50 "psubusb %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
51 "psubusb %%mm5, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
52 "por %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
53 "por %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
54 "movq %%mm0, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
55 "movq %%mm3, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
56 "punpcklbw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
57 "punpckhbw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
58 "punpcklbw %%mm7, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
59 "punpckhbw %%mm7, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
60 "paddw %%mm1, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
61 "paddw %%mm3, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
62 "paddw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
63 "paddw %%mm0, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
64 "add %3, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
65 " js 1b \n\t"
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
66 : "+a" (len)
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1845
diff changeset
67 : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
68 );
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
69 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
70
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
71 static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
72 {
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1845
diff changeset
73 long len= -(stride*h);
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
74 asm volatile(
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
75 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
76 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
77 "movq (%1, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
78 "movq (%2, %%"REG_a"), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
79 "psadbw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
80 "add %3, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
81 "movq (%1, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
82 "movq (%2, %%"REG_a"), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
83 "psadbw %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
84 "paddw %%mm3, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
85 "paddw %%mm0, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
86 "add %3, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
87 " js 1b \n\t"
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
88 : "+a" (len)
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1845
diff changeset
89 : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
90 );
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
91 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
92
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 1057
diff changeset
93 static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
94 {
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1845
diff changeset
95 long len= -(stride*h);
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
96 asm volatile(
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
97 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
98 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
99 "movq (%1, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
100 "movq (%2, %%"REG_a"), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
101 "pavgb %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
102 "movq (%3, %%"REG_a"), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
103 "psadbw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
104 "add %4, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
105 "movq (%1, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
106 "movq (%2, %%"REG_a"), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
107 "pavgb %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
108 "movq (%3, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
109 "psadbw %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
110 "paddw %%mm3, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
111 "paddw %%mm0, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
112 "add %4, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
113 " js 1b \n\t"
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
114 : "+a" (len)
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1845
diff changeset
115 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
116 );
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
117 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
118
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 1057
diff changeset
119 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
120 { //FIXME reuse src
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1845
diff changeset
121 long len= -(stride*h);
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
122 asm volatile(
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
123 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
124 "movq "MANGLE(bone)", %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
125 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
126 "movq (%1, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
127 "movq (%2, %%"REG_a"), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
128 "movq 1(%1, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
129 "movq 1(%2, %%"REG_a"), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
130 "pavgb %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
131 "pavgb %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
132 "psubusb %%mm5, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
133 "pavgb %%mm3, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
134 "movq (%3, %%"REG_a"), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
135 "psadbw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
136 "add %4, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
137 "movq (%1, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
138 "movq (%2, %%"REG_a"), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
139 "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
140 "movq 1(%2, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
141 "pavgb %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
142 "pavgb %%mm4, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
143 "psubusb %%mm5, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
144 "pavgb %%mm1, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
145 "movq (%3, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
146 "psadbw %%mm1, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
147 "paddw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
148 "paddw %%mm0, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
149 "add %4, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
150 " js 1b \n\t"
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
151 : "+a" (len)
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1845
diff changeset
152 : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
153 );
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
154 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
155
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 1057
diff changeset
156 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
157 {
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1845
diff changeset
158 long len= -(stride*h);
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
159 asm volatile(
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
160 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
161 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
162 "movq (%1, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
163 "movq (%2, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
164 "movq (%1, %%"REG_a"), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
165 "movq (%2, %%"REG_a"), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
166 "punpcklbw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
167 "punpcklbw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
168 "punpckhbw %%mm7, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
169 "punpckhbw %%mm7, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
170 "paddw %%mm0, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
171 "paddw %%mm2, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
172 "movq (%3, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
173 "movq (%3, %%"REG_a"), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
174 "paddw %%mm5, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
175 "paddw %%mm5, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
176 "psrlw $1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
177 "psrlw $1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
178 "packuswb %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
179 "psubusb %%mm1, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
180 "psubusb %%mm2, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
181 "por %%mm4, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
182 "movq %%mm1, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
183 "punpcklbw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
184 "punpckhbw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
185 "paddw %%mm1, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
186 "paddw %%mm0, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
187 "add %4, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
188 " js 1b \n\t"
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
189 : "+a" (len)
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1845
diff changeset
190 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
191 );
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
192 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
193
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 1057
diff changeset
194 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
195 {
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1845
diff changeset
196 long len= -(stride*h);
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
197 asm volatile(
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
198 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
199 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
200 "movq (%1, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
201 "movq (%2, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
202 "movq %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
203 "movq %%mm1, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
204 "punpcklbw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
205 "punpcklbw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
206 "punpckhbw %%mm7, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
207 "punpckhbw %%mm7, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
208 "paddw %%mm1, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
209 "paddw %%mm2, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
210 "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
211 "movq 1(%2, %%"REG_a"), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
212 "movq %%mm2, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
213 "punpcklbw %%mm7, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
214 "punpckhbw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
215 "paddw %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
216 "paddw %%mm4, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
217 "movq %%mm3, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
218 "punpcklbw %%mm7, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
219 "punpckhbw %%mm7, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
220 "paddw %%mm3, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
221 "paddw %%mm4, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
222 "movq (%3, %%"REG_a"), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
223 "movq (%3, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
224 "paddw %%mm5, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
225 "paddw %%mm5, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
226 "psrlw $2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
227 "psrlw $2, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
228 "packuswb %%mm1, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
229 "psubusb %%mm2, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
230 "psubusb %%mm4, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
231 "por %%mm3, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
232 "movq %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
233 "punpcklbw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
234 "punpckhbw %%mm7, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
235 "paddw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
236 "paddw %%mm0, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
237 "add %4, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
238 " js 1b \n\t"
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
239 : "+a" (len)
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 1845
diff changeset
240 : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
241 );
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
242 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
243
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
244 static inline int sum_mmx(void)
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
245 {
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
246 int ret;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
247 asm volatile(
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
248 "movq %%mm6, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
249 "psrlq $32, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
250 "paddw %%mm0, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
251 "movq %%mm6, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
252 "psrlq $16, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
253 "paddw %%mm0, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
254 "movd %%mm6, %0 \n\t"
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
255 : "=r" (ret)
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
256 );
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
257 return ret&0xFFFF;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
258 }
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
259
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
260 static inline int sum_mmx2(void)
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
261 {
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
262 int ret;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
263 asm volatile(
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
264 "movd %%mm6, %0 \n\t"
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
265 : "=r" (ret)
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
266 );
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
267 return ret;
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
268 }
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
269
900
52c26a143399 100l (sad8x8_x2 had params in wrong order)
michaelni
parents: 429
diff changeset
270
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
271 #define PIX_SAD(suf)\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
272 static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
273 {\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
274 assert(h==8);\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
275 asm volatile("pxor %%mm7, %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
276 "pxor %%mm6, %%mm6 \n\t":);\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
277 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
278 sad8_1_ ## suf(blk1, blk2, stride, 8);\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
279 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
280 return sum_ ## suf();\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
281 }\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
282 static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
283 {\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
284 assert(h==8);\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
285 asm volatile("pxor %%mm7, %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
286 "pxor %%mm6, %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
287 "movq %0, %%mm5 \n\t"\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
288 :: "m"(round_tab[1]) \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
289 );\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
290 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
291 sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 8);\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
292 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
293 return sum_ ## suf();\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
294 }\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
295 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
296 static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
297 {\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
298 assert(h==8);\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
299 asm volatile("pxor %%mm7, %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
300 "pxor %%mm6, %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
301 "movq %0, %%mm5 \n\t"\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
302 :: "m"(round_tab[1]) \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
303 );\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
304 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
305 sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 8);\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
306 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
307 return sum_ ## suf();\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
308 }\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
309 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
310 static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
311 {\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
312 assert(h==8);\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
313 asm volatile("pxor %%mm7, %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
314 "pxor %%mm6, %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
315 "movq %0, %%mm5 \n\t"\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
316 :: "m"(round_tab[2]) \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
317 );\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
318 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
319 sad8_4_ ## suf(blk1, blk2, stride, 8);\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
320 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
321 return sum_ ## suf();\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
322 }\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
323 \
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
324 static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
325 {\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
326 asm volatile("pxor %%mm7, %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
327 "pxor %%mm6, %%mm6 \n\t":);\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
328 \
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
329 sad8_1_ ## suf(blk1 , blk2 , stride, h);\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
330 sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
331 \
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
332 return sum_ ## suf();\
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
333 }\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
334 static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
335 {\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
336 asm volatile("pxor %%mm7, %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
337 "pxor %%mm6, %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
338 "movq %0, %%mm5 \n\t"\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
339 :: "m"(round_tab[1]) \
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
340 );\
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
341 \
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
342 sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, h);\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
343 sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, h);\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
344 \
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
345 return sum_ ## suf();\
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
346 }\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
347 static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
348 {\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
349 asm volatile("pxor %%mm7, %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
350 "pxor %%mm6, %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
351 "movq %0, %%mm5 \n\t"\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
352 :: "m"(round_tab[1]) \
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
353 );\
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
354 \
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
355 sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, h);\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
356 sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, h);\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
357 \
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
358 return sum_ ## suf();\
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
359 }\
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
360 static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
361 {\
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
362 asm volatile("pxor %%mm7, %%mm7 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
363 "pxor %%mm6, %%mm6 \n\t"\
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
364 "movq %0, %%mm5 \n\t"\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
365 :: "m"(round_tab[2]) \
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
366 );\
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
367 \
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
368 sad8_4_ ## suf(blk1 , blk2 , stride, h);\
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
369 sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
370 \
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
371 return sum_ ## suf();\
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
372 }\
72
3049d6d452a3 suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff changeset
373
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
374 PIX_SAD(mmx)
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 72
diff changeset
375 PIX_SAD(mmx2)
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
376
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1065
diff changeset
377 void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
378 {
4197
bbe0bc387a19 revert bad checkin
mru
parents: 4196
diff changeset
379 if (mm_flags & MM_MMX) {
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
380 c->pix_abs[0][0] = sad16_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
381 c->pix_abs[0][1] = sad16_x2_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
382 c->pix_abs[0][2] = sad16_y2_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
383 c->pix_abs[0][3] = sad16_xy2_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
384 c->pix_abs[1][0] = sad8_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
385 c->pix_abs[1][1] = sad8_x2_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
386 c->pix_abs[1][2] = sad8_y2_mmx;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
387 c->pix_abs[1][3] = sad8_xy2_mmx;
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
388
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
389 c->sad[0]= sad16_mmx;
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
390 c->sad[1]= sad8_mmx;
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
391 }
4197
bbe0bc387a19 revert bad checkin
mru
parents: 4196
diff changeset
392 if (mm_flags & MM_MMXEXT) {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
393 c->pix_abs[0][0] = sad16_mmx2;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
394 c->pix_abs[1][0] = sad8_mmx2;
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
395
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
396 c->sad[0]= sad16_mmx2;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
397 c->sad[1]= sad8_mmx2;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2293
diff changeset
398
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1065
diff changeset
399 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
400 c->pix_abs[0][1] = sad16_x2_mmx2;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
401 c->pix_abs[0][2] = sad16_y2_mmx2;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
402 c->pix_abs[0][3] = sad16_xy2_mmx2;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
403 c->pix_abs[1][1] = sad8_x2_mmx2;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
404 c->pix_abs[1][2] = sad8_y2_mmx2;
dea5b2946999 interlaced motion estimation
michael
parents: 1569
diff changeset
405 c->pix_abs[1][3] = sad8_xy2_mmx2;
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1065
diff changeset
406 }
1057
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
407 }
bb5de8a59da8 * static,const,compiler warning cleanup
kabi
parents: 936
diff changeset
408 }