annotate alpha/motion_est_alpha.c @ 1708:dea5b2946999 libavcodec

interlaced motion estimation interlaced mpeg2 encoding P & B frames rate distored interlaced mb decission alternate scantable support 4mv encoding fixes (thats also why the regression tests change) passing height to most dsp functions interlaced mpeg4 encoding (no direct mode MBs yet) various related cleanups disabled old motion estimaton algorithms (log, full, ...) they will either be fixed or removed
author michael
date Tue, 30 Dec 2003 16:07:57 +0000
parents 107a56aa74f5
children ef2149182f1c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
1 /*
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
2 * Alpha optimized DSP utils
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
4 *
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
5 * This library is free software; you can redistribute it and/or
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
6 * modify it under the terms of the GNU Lesser General Public
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
7 * License as published by the Free Software Foundation; either
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
8 * version 2 of the License, or (at your option) any later version.
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
9 *
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
10 * This library is distributed in the hope that it will be useful,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
13 * Lesser General Public License for more details.
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
14 *
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
15 * You should have received a copy of the GNU Lesser General Public
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
16 * License along with this library; if not, write to the Free Software
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
18 */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
19
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
20 #include "asm.h"
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
21 #include "../dsputil.h"
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
22
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
23 void get_pixels_mvi(DCTELEM *restrict block,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
24 const uint8_t *restrict pixels, int line_size)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
25 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
26 int h = 8;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
27
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
28 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
29 uint64_t p;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
30
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
31 p = ldq(pixels);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
32 stq(unpkbw(p), block);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
33 stq(unpkbw(p >> 32), block + 4);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
34
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
35 pixels += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
36 block += 8;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
37 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
38 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
39
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
40 void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
41 int stride) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
42 int h = 8;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
43 uint64_t mask = 0x4040;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
44
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
45 mask |= mask << 16;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
46 mask |= mask << 32;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
47 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
48 uint64_t x, y, c, d, a;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
49 uint64_t signs;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
50
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
51 x = ldq(s1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
52 y = ldq(s2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
53 c = cmpbge(x, y);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
54 d = x - y;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
55 a = zap(mask, c); /* We use 0x4040404040404040 here... */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
56 d += 4 * a; /* ...so we can use s4addq here. */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
57 signs = zap(-1, c);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
58
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
59 stq(unpkbw(d) | (unpkbw(signs) << 8), block);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
60 stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
61
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
62 s1 += stride;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
63 s2 += stride;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
64 block += 8;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
65 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
66 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
67
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
68 static inline uint64_t avg2(uint64_t a, uint64_t b)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
69 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
70 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
71 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
72
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
73 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
74 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
75 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
76 + ((l2 & ~BYTE_VEC(0x03)) >> 2)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
77 + ((l3 & ~BYTE_VEC(0x03)) >> 2)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
78 + ((l4 & ~BYTE_VEC(0x03)) >> 2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
79 uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
80 + (l2 & BYTE_VEC(0x03))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
81 + (l3 & BYTE_VEC(0x03))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
82 + (l4 & BYTE_VEC(0x03))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
83 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
84 return r1 + r2;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
85 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
86
1708
dea5b2946999 interlaced motion estimation
michael
parents: 705
diff changeset
87 int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
88 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
89 int result = 0;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
90
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
91 if ((size_t) pix2 & 0x7) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
92 /* works only when pix2 is actually unaligned */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
93 do { /* do 8 pixel a time */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
94 uint64_t p1, p2;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
95
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
96 p1 = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
97 p2 = uldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
98 result += perr(p1, p2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
99
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
100 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
101 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
102 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
103 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
104 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
105 uint64_t p1, p2;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
106
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
107 p1 = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
108 p2 = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
109 result += perr(p1, p2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
110
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
111 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
112 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
113 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
114 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
115
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
116 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
117 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
118
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents: 586
diff changeset
119 #if 0 /* now done in assembly */
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
120 int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
121 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
122 int result = 0;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
123 int h = 16;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
124
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
125 if ((size_t) pix2 & 0x7) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
126 /* works only when pix2 is actually unaligned */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
127 do { /* do 16 pixel a time */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
128 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
129 uint64_t t;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
130
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
131 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
132 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
133 t = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
134 p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
135 p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
136 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
137 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
138
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
139 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
140 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
141 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
142 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
143 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
144 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
145
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
146 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
147 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
148 p2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
149 p2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
150 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
151 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
152
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
153 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
154 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
155 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
156 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
157
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
158 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
159 }
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents: 586
diff changeset
160 #endif
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
161
1708
dea5b2946999 interlaced motion estimation
michael
parents: 705
diff changeset
162 int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
163 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
164 int result = 0;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
165 uint64_t disalign = (size_t) pix2 & 0x7;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
166
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
167 switch (disalign) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
168 case 0:
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
169 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
170 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
171 uint64_t l, r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
172
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
173 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
174 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
175 l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
176 r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
177 p2_l = avg2(l, (l >> 8) | ((uint64_t) r << 56));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
178 p2_r = avg2(r, (r >> 8) | ((uint64_t) pix2[16] << 56));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
179 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
180 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
181
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
182 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
183 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
184 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
185 break;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
186 case 7:
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
187 /* |.......l|lllllllr|rrrrrrr*|
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
188 This case is special because disalign1 would be 8, which
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
189 gets treated as 0 by extqh. At least it is a bit faster
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
190 that way :) */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
191 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
192 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
193 uint64_t l, m, r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
194
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
195 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
196 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
197 l = ldq_u(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
198 m = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
199 r = ldq_u(pix2 + 16);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
200 p2_l = avg2(extql(l, disalign) | extqh(m, disalign), m);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
201 p2_r = avg2(extql(m, disalign) | extqh(r, disalign), r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
202 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
203 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
204
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
205 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
206 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
207 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
208 break;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
209 default:
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
210 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
211 uint64_t disalign1 = disalign + 1;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
212 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
213 uint64_t l, m, r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
214
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
215 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
216 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
217 l = ldq_u(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
218 m = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
219 r = ldq_u(pix2 + 16);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
220 p2_l = avg2(extql(l, disalign) | extqh(m, disalign),
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
221 extql(l, disalign1) | extqh(m, disalign1));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
222 p2_r = avg2(extql(m, disalign) | extqh(r, disalign),
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
223 extql(m, disalign1) | extqh(r, disalign1));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
224 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
225 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
226
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
227 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
228 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
229 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
230 break;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
231 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
232 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
233 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
234
1708
dea5b2946999 interlaced motion estimation
michael
parents: 705
diff changeset
235 int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
236 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
237 int result = 0;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
238
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
239 if ((size_t) pix2 & 0x7) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
240 uint64_t t, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
241 t = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
242 p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
243 p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
244
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
245 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
246 uint64_t p1_l, p1_r, np2_l, np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
247 uint64_t t;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
248
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
249 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
250 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
251 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
252 t = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
253 np2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
254 np2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
255
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
256 result += perr(p1_l, avg2(p2_l, np2_l))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
257 + perr(p1_r, avg2(p2_r, np2_r));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
258
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
259 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
260 p2_l = np2_l;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
261 p2_r = np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
262
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
263 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
264 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
265 uint64_t p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
266 p2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
267 p2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
268 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
269 uint64_t p1_l, p1_r, np2_l, np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
270
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
271 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
272 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
273 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
274 np2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
275 np2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
276
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
277 result += perr(p1_l, avg2(p2_l, np2_l))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
278 + perr(p1_r, avg2(p2_r, np2_r));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
279
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
280 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
281 p2_l = np2_l;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
282 p2_r = np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
283 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
284 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
285 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
286 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
287
1708
dea5b2946999 interlaced motion estimation
michael
parents: 705
diff changeset
288 int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
289 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
290 int result = 0;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
291
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
292 uint64_t p1_l, p1_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
293 uint64_t p2_l, p2_r, p2_x;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
294
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
295 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
296 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
297
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
298 if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
299 p2_l = uldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
300 p2_r = uldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
301 p2_x = (uint64_t) pix2[16] << 56;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
302 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
303 p2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
304 p2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
305 p2_x = ldq(pix2 + 16) << 56;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
306 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
307
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
308 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
309 uint64_t np1_l, np1_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
310 uint64_t np2_l, np2_r, np2_x;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
311
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
312 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
313 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
314
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
315 np1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
316 np1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
317
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
318 if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
319 np2_l = uldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
320 np2_r = uldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
321 np2_x = (uint64_t) pix2[16] << 56;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
322 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
323 np2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
324 np2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
325 np2_x = ldq(pix2 + 16) << 56;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
326 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
327
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
328 result += perr(p1_l,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
329 avg4( p2_l, ( p2_l >> 8) | ((uint64_t) p2_r << 56),
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
330 np2_l, (np2_l >> 8) | ((uint64_t) np2_r << 56)))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
331 + perr(p1_r,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
332 avg4( p2_r, ( p2_r >> 8) | ((uint64_t) p2_x),
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
333 np2_r, (np2_r >> 8) | ((uint64_t) np2_x)));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
334
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
335 p1_l = np1_l;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
336 p1_r = np1_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
337 p2_l = np2_l;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
338 p2_r = np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
339 p2_x = np2_x;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
340 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
341
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
342 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
343 }