annotate alpha/motion_est_alpha.c @ 7855:9a135b6a1dc7 libavcodec

Correct order of parsing for pulse scalefactor band and offset to match the specification. Patch by Alex Converse (alex converse gmail com)
author superdump
date Sat, 13 Sep 2008 18:47:43 +0000
parents f7cbb7733146
children 9e7d38743146
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
1 /*
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
2 * Alpha optimized DSP utils
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
15 * Lesser General Public License for more details.
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
16 *
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
20 */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
21
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 5010
diff changeset
22 #include "libavcodec/dsputil.h"
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
23 #include "asm.h"
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
24
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
25 void get_pixels_mvi(DCTELEM *restrict block,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
26 const uint8_t *restrict pixels, int line_size)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
27 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
28 int h = 8;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
29
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
30 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
31 uint64_t p;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
32
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
33 p = ldq(pixels);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
34 stq(unpkbw(p), block);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
35 stq(unpkbw(p >> 32), block + 4);
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
36
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
37 pixels += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
38 block += 8;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
39 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
40 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
41
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
42 void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
43 int stride) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
44 int h = 8;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
45 uint64_t mask = 0x4040;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
46
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
47 mask |= mask << 16;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
48 mask |= mask << 32;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
49 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
50 uint64_t x, y, c, d, a;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
51 uint64_t signs;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
52
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
53 x = ldq(s1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
54 y = ldq(s2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
55 c = cmpbge(x, y);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
56 d = x - y;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
57 a = zap(mask, c); /* We use 0x4040404040404040 here... */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
58 d += 4 * a; /* ...so we can use s4addq here. */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
59 signs = zap(-1, c);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
60
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
61 stq(unpkbw(d) | (unpkbw(signs) << 8), block);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
62 stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
63
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
64 s1 += stride;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
65 s2 += stride;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
66 block += 8;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
67 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
68 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
69
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
70 static inline uint64_t avg2(uint64_t a, uint64_t b)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
71 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
72 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
73 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
74
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
75 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
76 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
77 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
78 + ((l2 & ~BYTE_VEC(0x03)) >> 2)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
79 + ((l3 & ~BYTE_VEC(0x03)) >> 2)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
80 + ((l4 & ~BYTE_VEC(0x03)) >> 2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
81 uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
82 + (l2 & BYTE_VEC(0x03))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
83 + (l3 & BYTE_VEC(0x03))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
84 + (l4 & BYTE_VEC(0x03))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
85 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
86 return r1 + r2;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
87 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
88
1708
dea5b2946999 interlaced motion estimation
michael
parents: 705
diff changeset
89 int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
90 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
91 int result = 0;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
92
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
93 if ((size_t) pix2 & 0x7) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
94 /* works only when pix2 is actually unaligned */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
95 do { /* do 8 pixel a time */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
96 uint64_t p1, p2;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
97
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
98 p1 = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
99 p2 = uldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
100 result += perr(p1, p2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
101
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
102 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
103 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
104 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
105 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
106 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
107 uint64_t p1, p2;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
108
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
109 p1 = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
110 p2 = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
111 result += perr(p1, p2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
112
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
113 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
114 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
115 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
116 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
117
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
118 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
119 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
120
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
121 #if 0 /* now done in assembly */
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
122 int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
123 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
124 int result = 0;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
125 int h = 16;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
126
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
127 if ((size_t) pix2 & 0x7) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
128 /* works only when pix2 is actually unaligned */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
129 do { /* do 16 pixel a time */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
130 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
131 uint64_t t;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
132
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
133 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
134 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
135 t = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
136 p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
137 p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
138 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
139 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
140
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
141 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
142 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
143 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
144 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
145 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
146 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
147
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
148 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
149 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
150 p2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
151 p2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
152 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
153 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
154
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
155 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
156 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
157 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
158 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
159
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
160 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
161 }
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents: 586
diff changeset
162 #endif
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
163
1708
dea5b2946999 interlaced motion estimation
michael
parents: 705
diff changeset
164 int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
165 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
166 int result = 0;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
167 uint64_t disalign = (size_t) pix2 & 0x7;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
168
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
169 switch (disalign) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
170 case 0:
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
171 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
172 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
173 uint64_t l, r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
174
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
175 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
176 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
177 l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
178 r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
179 p2_l = avg2(l, (l >> 8) | ((uint64_t) r << 56));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
180 p2_r = avg2(r, (r >> 8) | ((uint64_t) pix2[16] << 56));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
181 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
182 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
183
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
184 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
185 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
186 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
187 break;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
188 case 7:
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
189 /* |.......l|lllllllr|rrrrrrr*|
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
190 This case is special because disalign1 would be 8, which
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
191 gets treated as 0 by extqh. At least it is a bit faster
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
192 that way :) */
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
193 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
194 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
195 uint64_t l, m, r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
196
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
197 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
198 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
199 l = ldq_u(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
200 m = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
201 r = ldq_u(pix2 + 16);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
202 p2_l = avg2(extql(l, disalign) | extqh(m, disalign), m);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
203 p2_r = avg2(extql(m, disalign) | extqh(r, disalign), r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
204 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
205 pix2 += line_size;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
206
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
207 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
208 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
209 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
210 break;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
211 default:
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
212 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
213 uint64_t disalign1 = disalign + 1;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
214 uint64_t p1_l, p1_r, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
215 uint64_t l, m, r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
216
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
217 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
218 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
219 l = ldq_u(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
220 m = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
221 r = ldq_u(pix2 + 16);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
222 p2_l = avg2(extql(l, disalign) | extqh(m, disalign),
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
223 extql(l, disalign1) | extqh(m, disalign1));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
224 p2_r = avg2(extql(m, disalign) | extqh(r, disalign),
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
225 extql(m, disalign1) | extqh(r, disalign1));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
226 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
227 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
228
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
229 result += perr(p1_l, p2_l)
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
230 + perr(p1_r, p2_r);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
231 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
232 break;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
233 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
234 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
235 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
236
1708
dea5b2946999 interlaced motion estimation
michael
parents: 705
diff changeset
237 int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
238 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
239 int result = 0;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
240
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
241 if ((size_t) pix2 & 0x7) {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
242 uint64_t t, p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
243 t = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
244 p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
245 p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
246
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
247 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
248 uint64_t p1_l, p1_r, np2_l, np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
249 uint64_t t;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
250
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
251 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
252 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
253 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
254 t = ldq_u(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
255 np2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
256 np2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
257
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
258 result += perr(p1_l, avg2(p2_l, np2_l))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
259 + perr(p1_r, avg2(p2_r, np2_r));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
260
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
261 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
262 p2_l = np2_l;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
263 p2_r = np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
264
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
265 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
266 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
267 uint64_t p2_l, p2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
268 p2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
269 p2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
270 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
271 uint64_t p1_l, p1_r, np2_l, np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
272
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
273 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
274 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
275 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
276 np2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
277 np2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
278
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
279 result += perr(p1_l, avg2(p2_l, np2_l))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
280 + perr(p1_r, avg2(p2_r, np2_r));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
281
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
282 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
283 p2_l = np2_l;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
284 p2_r = np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
285 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
286 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
287 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
288 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
289
1708
dea5b2946999 interlaced motion estimation
michael
parents: 705
diff changeset
290 int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
291 {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
292 int result = 0;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
293
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
294 uint64_t p1_l, p1_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
295 uint64_t p2_l, p2_r, p2_x;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
296
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
297 p1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
298 p1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
299
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
300 if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
301 p2_l = uldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
302 p2_r = uldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
303 p2_x = (uint64_t) pix2[16] << 56;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
304 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
305 p2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
306 p2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
307 p2_x = ldq(pix2 + 16) << 56;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
308 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
309
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
310 do {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
311 uint64_t np1_l, np1_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
312 uint64_t np2_l, np2_r, np2_x;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
313
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
314 pix1 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
315 pix2 += line_size;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
316
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
317 np1_l = ldq(pix1);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
318 np1_r = ldq(pix1 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
319
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
320 if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
321 np2_l = uldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
322 np2_r = uldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
323 np2_x = (uint64_t) pix2[16] << 56;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
324 } else {
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
325 np2_l = ldq(pix2);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
326 np2_r = ldq(pix2 + 8);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
327 np2_x = ldq(pix2 + 16) << 56;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
328 }
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
329
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
330 result += perr(p1_l,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
331 avg4( p2_l, ( p2_l >> 8) | ((uint64_t) p2_r << 56),
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
332 np2_l, (np2_l >> 8) | ((uint64_t) np2_r << 56)))
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
333 + perr(p1_r,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
334 avg4( p2_r, ( p2_r >> 8) | ((uint64_t) p2_x),
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
335 np2_r, (np2_r >> 8) | ((uint64_t) np2_x)));
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
336
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
337 p1_l = np1_l;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
338 p1_r = np1_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
339 p2_l = np2_l;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
340 p2_r = np2_r;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
341 p2_x = np2_x;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
342 } while (--h);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
343
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
344 return result;
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents:
diff changeset
345 }