annotate alpha/motion_est_alpha.c @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 9e7d38743146
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
1 /*
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
2 * Alpha optimized DSP utils
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
15 * Lesser General Public License for more details.
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
16 *
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
20 */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
21
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 5010
diff changeset
22 #include "libavcodec/dsputil.h"
11396
9e7d38743146 Alpha: move dsputil prototypes to a header file
mru
parents: 6763
diff changeset
23 #include "dsputil_alpha.h"
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
24 #include "asm.h"
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
25
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
26 void get_pixels_mvi(DCTELEM *restrict block,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
27 const uint8_t *restrict pixels, int line_size)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
28 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
29 int h = 8;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
30
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
31 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
32 uint64_t p;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
33
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
34 p = ldq(pixels);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
35 stq(unpkbw(p), block);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
36 stq(unpkbw(p >> 32), block + 4);
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
37
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
38 pixels += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
39 block += 8;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
40 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
41 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
42
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
43 void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
44 int stride) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
45 int h = 8;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
46 uint64_t mask = 0x4040;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
47
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
48 mask |= mask << 16;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
49 mask |= mask << 32;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
50 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
51 uint64_t x, y, c, d, a;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
52 uint64_t signs;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
53
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
54 x = ldq(s1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
55 y = ldq(s2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
56 c = cmpbge(x, y);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
57 d = x - y;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
58 a = zap(mask, c); /* We use 0x4040404040404040 here... */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
59 d += 4 * a; /* ...so we can use s4addq here. */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
60 signs = zap(-1, c);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
61
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
62 stq(unpkbw(d) | (unpkbw(signs) << 8), block);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
63 stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
64
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
65 s1 += stride;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
66 s2 += stride;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
67 block += 8;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
68 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
69 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
70
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
71 static inline uint64_t avg2(uint64_t a, uint64_t b)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
72 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
73 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
74 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
75
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
76 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
77 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
78 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
79 + ((l2 & ~BYTE_VEC(0x03)) >> 2)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
80 + ((l3 & ~BYTE_VEC(0x03)) >> 2)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
81 + ((l4 & ~BYTE_VEC(0x03)) >> 2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
82 uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
83 + (l2 & BYTE_VEC(0x03))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
84 + (l3 & BYTE_VEC(0x03))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
85 + (l4 & BYTE_VEC(0x03))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
86 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
87 return r1 + r2;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
88 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
89
1708
dea5b2946999 interlaced motion estimation
michael
parents: 705
diff changeset
90 int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
91 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
92 int result = 0;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
93
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
94 if ((size_t) pix2 & 0x7) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
95 /* works only when pix2 is actually unaligned */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
96 do { /* do 8 pixel a time */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
97 uint64_t p1, p2;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
98
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
99 p1 = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
100 p2 = uldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
101 result += perr(p1, p2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
102
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
103 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
104 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
105 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
106 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
107 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
108 uint64_t p1, p2;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
109
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
110 p1 = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
111 p2 = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
112 result += perr(p1, p2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
113
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
114 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
115 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
116 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
117 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
118
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
119 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
120 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
121
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
122 #if 0 /* now done in assembly */
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
123 int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
124 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
125 int result = 0;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
126 int h = 16;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
127
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
128 if ((size_t) pix2 & 0x7) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
129 /* works only when pix2 is actually unaligned */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
130 do { /* do 16 pixel a time */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
131 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
132 uint64_t t;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
133
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
134 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
135 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
136 t = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
137 p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
138 p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
139 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
140 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
141
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
142 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
143 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
144 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
145 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
146 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
147 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
148
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
149 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
150 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
151 p2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
152 p2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
153 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
154 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
155
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
156 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
157 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
158 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
159 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
160
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
161 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
162 }
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents: 586
diff changeset
163 #endif
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
164
1708
dea5b2946999 interlaced motion estimation
michael
parents: 705
diff changeset
165 int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
166 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
167 int result = 0;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
168 uint64_t disalign = (size_t) pix2 & 0x7;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
169
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
170 switch (disalign) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
171 case 0:
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
172 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
173 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
174 uint64_t l, r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
175
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
176 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
177 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
178 l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
179 r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
180 p2_l = avg2(l, (l >> 8) | ((uint64_t) r << 56));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
181 p2_r = avg2(r, (r >> 8) | ((uint64_t) pix2[16] << 56));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
182 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
183 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
184
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
185 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
186 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
187 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
188 break;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
189 case 7:
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
190 /* |.......l|lllllllr|rrrrrrr*|
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
191 This case is special because disalign1 would be 8, which
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
192 gets treated as 0 by extqh. At least it is a bit faster
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
193 that way :) */
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
194 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
195 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
196 uint64_t l, m, r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
197
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
198 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
199 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
200 l = ldq_u(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
201 m = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
202 r = ldq_u(pix2 + 16);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
203 p2_l = avg2(extql(l, disalign) | extqh(m, disalign), m);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
204 p2_r = avg2(extql(m, disalign) | extqh(r, disalign), r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
205 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
206 pix2 += line_size;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
207
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
208 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
209 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
210 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
211 break;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
212 default:
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
213 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
214 uint64_t disalign1 = disalign + 1;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
215 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
216 uint64_t l, m, r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
217
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
218 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
219 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
220 l = ldq_u(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
221 m = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
222 r = ldq_u(pix2 + 16);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
223 p2_l = avg2(extql(l, disalign) | extqh(m, disalign),
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
224 extql(l, disalign1) | extqh(m, disalign1));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
225 p2_r = avg2(extql(m, disalign) | extqh(r, disalign),
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
226 extql(m, disalign1) | extqh(r, disalign1));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
227 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
228 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
229
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
230 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
231 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
232 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
233 break;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
234 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
235 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
236 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
237
1708
dea5b2946999 interlaced motion estimation
michael
parents: 705
diff changeset
238 int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
239 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
240 int result = 0;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
241
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
242 if ((size_t) pix2 & 0x7) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
243 uint64_t t, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
244 t = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
245 p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
246 p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
247
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
248 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
249 uint64_t p1_l, p1_r, np2_l, np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
250 uint64_t t;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
251
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
252 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
253 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
254 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
255 t = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
256 np2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
257 np2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
258
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
259 result += perr(p1_l, avg2(p2_l, np2_l))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
260 + perr(p1_r, avg2(p2_r, np2_r));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
261
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
262 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
263 p2_l = np2_l;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
264 p2_r = np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
265
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
266 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
267 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
268 uint64_t p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
269 p2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
270 p2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
271 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
272 uint64_t p1_l, p1_r, np2_l, np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
273
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
274 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
275 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
276 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
277 np2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
278 np2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
279
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
280 result += perr(p1_l, avg2(p2_l, np2_l))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
281 + perr(p1_r, avg2(p2_r, np2_r));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
282
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
283 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
284 p2_l = np2_l;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
285 p2_r = np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
286 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
287 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
288 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
289 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
290
1708
dea5b2946999 interlaced motion estimation
michael
parents: 705
diff changeset
291 int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
292 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
293 int result = 0;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
294
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
295 uint64_t p1_l, p1_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
296 uint64_t p2_l, p2_r, p2_x;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
297
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
298 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
299 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
300
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
301 if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
302 p2_l = uldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
303 p2_r = uldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
304 p2_x = (uint64_t) pix2[16] << 56;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
305 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
306 p2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
307 p2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
308 p2_x = ldq(pix2 + 16) << 56;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
309 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
310
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
311 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
312 uint64_t np1_l, np1_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
313 uint64_t np2_l, np2_r, np2_x;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
314
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
315 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
316 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
317
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
318 np1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
319 np1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
320
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
321 if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
322 np2_l = uldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
323 np2_r = uldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
324 np2_x = (uint64_t) pix2[16] << 56;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
325 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
326 np2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
327 np2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
328 np2_x = ldq(pix2 + 16) << 56;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
329 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
330
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
331 result += perr(p1_l,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
332 avg4( p2_l, ( p2_l >> 8) | ((uint64_t) p2_r << 56),
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
333 np2_l, (np2_l >> 8) | ((uint64_t) np2_r << 56)))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
334 + perr(p1_r,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
335 avg4( p2_r, ( p2_r >> 8) | ((uint64_t) p2_x),
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
336 np2_r, (np2_r >> 8) | ((uint64_t) np2_x)));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
337
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
338 p1_l = np1_l;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
339 p1_r = np1_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
340 p2_l = np2_l;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
341 p2_r = np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
342 p2_x = np2_x;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
343 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
344
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
345 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
346 }