Mercurial > libavcodec.hg
annotate i386/motion_est_mmx.c @ 112:a2c063b6ecf9 libavcodec
fixed a bug in the tmp buffer
fixed the color range for yuv
fixed the width %8!=0 bug (another 1% speed loss)
author | michael |
---|---|
date | Fri, 19 Oct 2001 13:41:38 +0000 |
parents | 3049d6d452a3 |
children | 944632089814 |
rev | line source |
---|---|
72
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
1 /* |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
2 * MMX optimized motion estimation |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
3 * Copyright (c) 2001 Gerard Lantau. |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
4 * |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
5 * This program is free software; you can redistribute it and/or modify |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
6 * it under the terms of the GNU General Public License as published by |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
7 * the Free Software Foundation; either version 2 of the License, or |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
8 * (at your option) any later version. |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
9 * |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
10 * This program is distributed in the hope that it will be useful, |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
13 * GNU General Public License for more details. |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
14 * |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
15 * You should have received a copy of the GNU General Public License |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
16 * along with this program; if not, write to the Free Software |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
18 * |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
19 */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
20 #include "../dsputil.h" |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
21 #include "mmx.h" |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
22 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
23 static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
24 static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
25 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
26 /* mm7 is accumulator, mm6 is zero */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
27 static inline void sad_add(const UINT8 *p1, const UINT8 *p2) |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
28 { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
29 movq_m2r(*p1, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
30 movq_m2r(*p2, mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
31 movq_r2r(mm0, mm2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
32 psubusb_r2r(mm1, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
33 psubusb_r2r(mm2, mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
34 por_r2r(mm1, mm0); /* mm0 is absolute value */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
35 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
36 movq_r2r(mm0, mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
37 punpcklbw_r2r(mm6, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
38 punpckhbw_r2r(mm6, mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
39 paddusw_r2r(mm0, mm7); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
40 paddusw_r2r(mm1, mm7); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
41 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
42 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
43 /* convert mm7 to value */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
44 static inline int sad_end(void) |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
45 { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
46 int res; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
47 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
48 movq_r2r(mm7, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
49 psrlq_i2r(32, mm7); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
50 paddusw_r2r(mm0, mm7); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
51 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
52 movq_r2r(mm7, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
53 psrlq_i2r(16, mm7); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
54 paddusw_r2r(mm0, mm7); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
55 __asm __volatile ("movd %%mm7, %0" : "=a" (res)); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
56 return res & 0xffff; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
57 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
58 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
59 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h) |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
60 { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
61 const UINT8 *p1, *p2; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
62 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
63 h >>= 1; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
64 p1 = blk1; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
65 p2 = blk2; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
66 pxor_r2r(mm7, mm7); /* mm7 is accumulator */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
67 pxor_r2r(mm6, mm6); /* mm7 is zero constant */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
68 do { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
69 sad_add(p1, p2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
70 sad_add(p1 + 8, p2 + 8); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
71 p1 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
72 p2 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
73 sad_add(p1, p2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
74 sad_add(p1 + 8, p2 + 8); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
75 p1 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
76 p2 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
77 } while (--h); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
78 return sad_end(); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
79 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
80 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
81 /* please test it ! */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
82 static inline void sad_add_sse(const UINT8 *p1, const UINT8 *p2) |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
83 { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
84 movq_m2r(*(p1 + 0), mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
85 movq_m2r(*(p1 + 8), mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
86 psadbw_m2r(*(p2 + 0), mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
87 psadbw_m2r(*(p2 + 8), mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
88 paddusw_r2r(mm0, mm7); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
89 paddusw_r2r(mm1, mm7); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
90 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
91 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
92 int pix_abs16x16_sse(UINT8 *blk1, UINT8 *blk2, int lx, int h) |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
93 { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
94 const UINT8 *p1, *p2; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
95 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
96 h >>= 1; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
97 p1 = blk1; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
98 p2 = blk2; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
99 pxor_r2r(mm7, mm7); /* mm7 is accumulator */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
100 do { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
101 sad_add_sse(p1, p2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
102 p1 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
103 p2 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
104 sad_add_sse(p1, p2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
105 p1 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
106 p2 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
107 } while (--h); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
108 return sad_end(); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
109 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
110 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
111 #define DUMP(reg) { mmx_t tmp; movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
112 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
113 /* mm7 is accumulator, mm6 is zero */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
114 static inline void sad_add_x2(const UINT8 *p1, const UINT8 *p2, const UINT8 *p3) |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
115 { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
116 movq_m2r(*(p2 + 0), mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
117 movq_m2r(*(p3 + 0), mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
118 movq_r2r(mm0, mm2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
119 movq_r2r(mm1, mm3); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
120 punpcklbw_r2r(mm6, mm0); /* extract 4 bytes low */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
121 punpcklbw_r2r(mm6, mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
122 punpckhbw_r2r(mm6, mm2); /* high */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
123 punpckhbw_r2r(mm6, mm3); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
124 paddusw_r2r(mm1, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
125 paddusw_r2r(mm3, mm2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
126 movq_m2r(*(p1 + 0), mm1); /* mm1 : other value */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
127 paddusw_r2r(mm5, mm0); /* + 1 */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
128 paddusw_r2r(mm5, mm2); /* + 1 */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
129 psrlw_i2r(1, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
130 psrlw_i2r(1, mm2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
131 packuswb_r2r(mm2, mm0); /* average is in mm0 */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
132 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
133 movq_r2r(mm1, mm2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
134 psubusb_r2r(mm0, mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
135 psubusb_r2r(mm2, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
136 por_r2r(mm1, mm0); /* mm0 is absolute value */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
137 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
138 movq_r2r(mm0, mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
139 punpcklbw_r2r(mm6, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
140 punpckhbw_r2r(mm6, mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
141 paddusw_r2r(mm0, mm7); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
142 paddusw_r2r(mm1, mm7); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
143 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
144 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
145 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h) |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
146 { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
147 const UINT8 *p1, *p2; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
148 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
149 p1 = blk1; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
150 p2 = blk2; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
151 pxor_r2r(mm7, mm7); /* mm7 is accumulator */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
152 pxor_r2r(mm6, mm6); /* mm7 is zero constant */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
153 movq_m2r(mm_wone, mm5); /* one constant */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
154 do { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
155 sad_add_x2(p1, p2, p2 + 1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
156 sad_add_x2(p1 + 8, p2 + 8, p2 + 9); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
157 p1 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
158 p2 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
159 } while (--h); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
160 return sad_end(); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
161 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
162 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
163 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h) |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
164 { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
165 const UINT8 *p1, *p2; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
166 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
167 p1 = blk1; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
168 p2 = blk2; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
169 pxor_r2r(mm7, mm7); /* mm7 is accumulator */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
170 pxor_r2r(mm6, mm6); /* mm7 is zero constant */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
171 movq_m2r(mm_wone, mm5); /* one constant */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
172 do { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
173 sad_add_x2(p1, p2, p2 + lx); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
174 sad_add_x2(p1 + 8, p2 + 8, p2 + 8 + lx); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
175 p1 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
176 p2 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
177 } while (--h); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
178 return sad_end(); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
179 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
180 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
181 /* mm7 is accumulator, mm6 is zero */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
182 static inline void sad_add_xy2(const UINT8 *p1, const UINT8 *p2, const UINT8 *p3) |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
183 { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
184 movq_m2r(*(p2 + 0), mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
185 movq_m2r(*(p3 + 0), mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
186 movq_r2r(mm0, mm2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
187 movq_r2r(mm1, mm3); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
188 punpcklbw_r2r(mm6, mm0); /* extract 4 bytes low */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
189 punpcklbw_r2r(mm6, mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
190 punpckhbw_r2r(mm6, mm2); /* high */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
191 punpckhbw_r2r(mm6, mm3); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
192 paddusw_r2r(mm1, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
193 paddusw_r2r(mm3, mm2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
194 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
195 movq_m2r(*(p2 + 1), mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
196 movq_m2r(*(p3 + 1), mm3); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
197 movq_r2r(mm1, mm4); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
198 punpcklbw_r2r(mm6, mm1); /* low */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
199 punpckhbw_r2r(mm6, mm4); /* high */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
200 paddusw_r2r(mm1, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
201 paddusw_r2r(mm4, mm2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
202 movq_r2r(mm3, mm4); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
203 punpcklbw_r2r(mm6, mm3); /* low */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
204 punpckhbw_r2r(mm6, mm4); /* high */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
205 paddusw_r2r(mm3, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
206 paddusw_r2r(mm4, mm2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
207 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
208 movq_m2r(*(p1 + 0), mm1); /* mm1 : other value */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
209 paddusw_r2r(mm5, mm0); /* + 2 */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
210 paddusw_r2r(mm5, mm2); /* + 2 */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
211 psrlw_i2r(2, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
212 psrlw_i2r(2, mm2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
213 packuswb_r2r(mm2, mm0); /* average is in mm0 */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
214 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
215 movq_r2r(mm1, mm2); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
216 psubusb_r2r(mm0, mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
217 psubusb_r2r(mm2, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
218 por_r2r(mm1, mm0); /* mm0 is absolute value */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
219 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
220 movq_r2r(mm0, mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
221 punpcklbw_r2r(mm6, mm0); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
222 punpckhbw_r2r(mm6, mm1); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
223 paddusw_r2r(mm0, mm7); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
224 paddusw_r2r(mm1, mm7); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
225 } |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
226 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
227 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h) |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
228 { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
229 const UINT8 *p1, *p2, *p3; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
230 |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
231 p1 = blk1; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
232 p2 = blk2; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
233 p3 = blk2 + lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
234 pxor_r2r(mm7, mm7); /* mm7 is accumulator */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
235 pxor_r2r(mm6, mm6); /* mm7 is zero constant */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
236 movq_m2r(mm_wtwo, mm5); /* one constant */ |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
237 do { |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
238 sad_add_xy2(p1, p2, p2 + lx); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
239 sad_add_xy2(p1 + 8, p2 + 8, p2 + 8 + lx); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
240 p1 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
241 p2 += lx; |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
242 } while (--h); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
243 return sad_end(); |
3049d6d452a3
suppressed nasm dependancy - rewrote forward DCT and motion estimation code
glantau
parents:
diff
changeset
|
244 } |