9857
|
1 /*
|
|
2 * motion_comp_alpha.c
|
12932
|
3 * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org>
|
9857
|
4 *
|
|
5 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
|
|
6 * See http://libmpeg2.sourceforge.net/ for updates.
|
|
7 *
|
|
8 * mpeg2dec is free software; you can redistribute it and/or modify
|
|
9 * it under the terms of the GNU General Public License as published by
|
|
10 * the Free Software Foundation; either version 2 of the License, or
|
|
11 * (at your option) any later version.
|
|
12 *
|
|
13 * mpeg2dec is distributed in the hope that it will be useful,
|
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16 * GNU General Public License for more details.
|
|
17 *
|
|
18 * You should have received a copy of the GNU General Public License
|
|
19 * along with this program; if not, write to the Free Software
|
|
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
21 */
|
|
22
|
|
23 #include "config.h"
|
|
24
|
28290
|
25 #if ARCH_ALPHA
|
9857
|
26
|
|
27 #include <inttypes.h>
|
|
28
|
|
29 #include "mpeg2.h"
|
12932
|
30 #include "attributes.h"
|
9857
|
31 #include "mpeg2_internal.h"
|
|
32 #include "alpha_asm.h"
|
|
33
|
12932
|
34 static inline uint64_t avg2 (uint64_t a, uint64_t b)
|
9857
|
35 {
|
12932
|
36 return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1);
|
9857
|
37 }
|
|
38
|
|
39 // Load two unaligned quadwords from addr. This macro only works if
|
|
40 // addr is actually unaligned.
|
12932
|
41 #define ULOAD16(ret_l,ret_r,addr) \
|
9857
|
42 do { \
|
12932
|
43 uint64_t _l = ldq_u (addr + 0); \
|
|
44 uint64_t _m = ldq_u (addr + 8); \
|
|
45 uint64_t _r = ldq_u (addr + 16); \
|
|
46 ret_l = extql (_l, addr) | extqh (_m, addr); \
|
|
47 ret_r = extql (_m, addr) | extqh (_r, addr); \
|
9857
|
48 } while (0)
|
|
49
|
|
50 // Load two aligned quadwords from addr.
|
12932
|
51 #define ALOAD16(ret_l,ret_r,addr) \
|
9857
|
52 do { \
|
12932
|
53 ret_l = ldq (addr); \
|
|
54 ret_r = ldq (addr + 8); \
|
9857
|
55 } while (0)
|
|
56
|
12932
|
57 #define OP8(LOAD,LOAD16,STORE) \
|
9857
|
58 do { \
|
12932
|
59 STORE (LOAD (pixels), block); \
|
9857
|
60 pixels += line_size; \
|
|
61 block += line_size; \
|
|
62 } while (--h)
|
|
63
|
12932
|
64 #define OP16(LOAD,LOAD16,STORE) \
|
9857
|
65 do { \
|
|
66 uint64_t l, r; \
|
12932
|
67 LOAD16 (l, r, pixels); \
|
|
68 STORE (l, block); \
|
|
69 STORE (r, block + 8); \
|
9857
|
70 pixels += line_size; \
|
|
71 block += line_size; \
|
|
72 } while (--h)
|
|
73
|
12932
|
74 #define OP8_X2(LOAD,LOAD16,STORE) \
|
9857
|
75 do { \
|
|
76 uint64_t p0, p1; \
|
|
77 \
|
12932
|
78 p0 = LOAD (pixels); \
|
9857
|
79 p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56); \
|
12932
|
80 STORE (avg2 (p0, p1), block); \
|
9857
|
81 pixels += line_size; \
|
|
82 block += line_size; \
|
|
83 } while (--h)
|
|
84
|
12932
|
85 #define OP16_X2(LOAD,LOAD16,STORE) \
|
9857
|
86 do { \
|
|
87 uint64_t p0, p1; \
|
|
88 \
|
12932
|
89 LOAD16 (p0, p1, pixels); \
|
|
90 STORE (avg2(p0, p0 >> 8 | p1 << 56), block); \
|
|
91 STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \
|
|
92 block + 8); \
|
9857
|
93 pixels += line_size; \
|
|
94 block += line_size; \
|
|
95 } while (--h)
|
|
96
|
12932
|
97 #define OP8_Y2(LOAD,LOAD16,STORE) \
|
9857
|
98 do { \
|
|
99 uint64_t p0, p1; \
|
12932
|
100 p0 = LOAD (pixels); \
|
9857
|
101 pixels += line_size; \
|
12932
|
102 p1 = LOAD (pixels); \
|
9857
|
103 do { \
|
12932
|
104 uint64_t av = avg2 (p0, p1); \
|
9857
|
105 if (--h == 0) line_size = 0; \
|
|
106 pixels += line_size; \
|
|
107 p0 = p1; \
|
12932
|
108 p1 = LOAD (pixels); \
|
|
109 STORE (av, block); \
|
9857
|
110 block += line_size; \
|
|
111 } while (h); \
|
|
112 } while (0)
|
|
113
|
12932
|
114 #define OP16_Y2(LOAD,LOAD16,STORE) \
|
9857
|
115 do { \
|
|
116 uint64_t p0l, p0r, p1l, p1r; \
|
12932
|
117 LOAD16 (p0l, p0r, pixels); \
|
9857
|
118 pixels += line_size; \
|
12932
|
119 LOAD16 (p1l, p1r, pixels); \
|
9857
|
120 do { \
|
|
121 uint64_t avl, avr; \
|
|
122 if (--h == 0) line_size = 0; \
|
12932
|
123 avl = avg2 (p0l, p1l); \
|
|
124 avr = avg2 (p0r, p1r); \
|
9857
|
125 p0l = p1l; \
|
|
126 p0r = p1r; \
|
|
127 pixels += line_size; \
|
12932
|
128 LOAD16 (p1l, p1r, pixels); \
|
|
129 STORE (avl, block); \
|
|
130 STORE (avr, block + 8); \
|
9857
|
131 block += line_size; \
|
|
132 } while (h); \
|
|
133 } while (0)
|
|
134
|
12932
|
135 #define OP8_XY2(LOAD,LOAD16,STORE) \
|
9857
|
136 do { \
|
|
137 uint64_t pl, ph; \
|
12932
|
138 uint64_t p1 = LOAD (pixels); \
|
9857
|
139 uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56); \
|
|
140 \
|
12932
|
141 ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \
|
|
142 ((p2 & ~BYTE_VEC (0x03)) >> 2)); \
|
|
143 pl = ((p1 & BYTE_VEC (0x03)) + \
|
|
144 (p2 & BYTE_VEC (0x03))); \
|
9857
|
145 \
|
|
146 do { \
|
|
147 uint64_t npl, nph; \
|
|
148 \
|
|
149 pixels += line_size; \
|
12932
|
150 p1 = LOAD (pixels); \
|
9857
|
151 p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56); \
|
12932
|
152 nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \
|
|
153 ((p2 & ~BYTE_VEC (0x03)) >> 2)); \
|
|
154 npl = ((p1 & BYTE_VEC (0x03)) + \
|
|
155 (p2 & BYTE_VEC (0x03))); \
|
9857
|
156 \
|
12932
|
157 STORE (ph + nph + \
|
|
158 (((pl + npl + BYTE_VEC (0x02)) >> 2) & \
|
|
159 BYTE_VEC (0x03)), block); \
|
9857
|
160 \
|
|
161 block += line_size; \
|
|
162 pl = npl; \
|
|
163 ph = nph; \
|
|
164 } while (--h); \
|
|
165 } while (0)
|
|
166
|
12932
|
167 #define OP16_XY2(LOAD,LOAD16,STORE) \
|
9857
|
168 do { \
|
|
169 uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r; \
|
12932
|
170 LOAD16 (p0, p2, pixels); \
|
9857
|
171 p1 = p0 >> 8 | (p2 << 56); \
|
12932
|
172 p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \
|
9857
|
173 \
|
12932
|
174 ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \
|
|
175 ((p1 & ~BYTE_VEC (0x03)) >> 2)); \
|
|
176 pl_l = ((p0 & BYTE_VEC (0x03)) + \
|
|
177 (p1 & BYTE_VEC(0x03))); \
|
|
178 ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \
|
|
179 ((p3 & ~BYTE_VEC (0x03)) >> 2)); \
|
|
180 pl_r = ((p2 & BYTE_VEC (0x03)) + \
|
|
181 (p3 & BYTE_VEC (0x03))); \
|
9857
|
182 \
|
|
183 do { \
|
|
184 uint64_t npl_l, nph_l, npl_r, nph_r; \
|
|
185 \
|
|
186 pixels += line_size; \
|
12932
|
187 LOAD16 (p0, p2, pixels); \
|
9857
|
188 p1 = p0 >> 8 | (p2 << 56); \
|
12932
|
189 p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \
|
|
190 nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \
|
|
191 ((p1 & ~BYTE_VEC (0x03)) >> 2)); \
|
|
192 npl_l = ((p0 & BYTE_VEC (0x03)) + \
|
|
193 (p1 & BYTE_VEC (0x03))); \
|
|
194 nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \
|
|
195 ((p3 & ~BYTE_VEC (0x03)) >> 2)); \
|
|
196 npl_r = ((p2 & BYTE_VEC (0x03)) + \
|
|
197 (p3 & BYTE_VEC (0x03))); \
|
9857
|
198 \
|
12932
|
199 STORE (ph_l + nph_l + \
|
|
200 (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) & \
|
|
201 BYTE_VEC(0x03)), block); \
|
|
202 STORE (ph_r + nph_r + \
|
|
203 (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) & \
|
|
204 BYTE_VEC(0x03)), block + 8); \
|
9857
|
205 \
|
|
206 block += line_size; \
|
|
207 pl_l = npl_l; \
|
|
208 ph_l = nph_l; \
|
|
209 pl_r = npl_r; \
|
|
210 ph_r = nph_r; \
|
|
211 } while (--h); \
|
|
212 } while (0)
|
|
213
|
12932
|
214 #define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE) \
|
9857
|
215 static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha \
|
|
216 (uint8_t *restrict block, const uint8_t *restrict pixels, \
|
|
217 int line_size, int h) \
|
|
218 { \
|
|
219 if ((uint64_t) pixels & 0x7) { \
|
12932
|
220 OPKIND (uldq, ULOAD16, STORE); \
|
9857
|
221 } else { \
|
12932
|
222 OPKIND (ldq, ALOAD16, STORE); \
|
9857
|
223 } \
|
|
224 }
|
|
225
|
12932
|
226 #define PIXOP(OPNAME,STORE) \
|
|
227 MAKE_OP (OPNAME, 8, o, OP8, STORE); \
|
|
228 MAKE_OP (OPNAME, 8, x, OP8_X2, STORE); \
|
|
229 MAKE_OP (OPNAME, 8, y, OP8_Y2, STORE); \
|
|
230 MAKE_OP (OPNAME, 8, xy, OP8_XY2, STORE); \
|
|
231 MAKE_OP (OPNAME, 16, o, OP16, STORE); \
|
|
232 MAKE_OP (OPNAME, 16, x, OP16_X2, STORE); \
|
|
233 MAKE_OP (OPNAME, 16, y, OP16_Y2, STORE); \
|
|
234 MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE);
|
9857
|
235
|
12932
|
236 #define STORE(l,b) stq (l, b)
|
|
237 PIXOP (put, STORE);
|
9857
|
238 #undef STORE
|
12932
|
239 #define STORE(l,b) stq (avg2 (l, ldq (b)), b);
|
|
240 PIXOP (avg, STORE);
|
9857
|
241
|
|
242 mpeg2_mc_t mpeg2_mc_alpha = {
|
|
243 { MC_put_o_16_alpha, MC_put_x_16_alpha,
|
|
244 MC_put_y_16_alpha, MC_put_xy_16_alpha,
|
|
245 MC_put_o_8_alpha, MC_put_x_8_alpha,
|
|
246 MC_put_y_8_alpha, MC_put_xy_8_alpha },
|
|
247 { MC_avg_o_16_alpha, MC_avg_x_16_alpha,
|
|
248 MC_avg_y_16_alpha, MC_avg_xy_16_alpha,
|
|
249 MC_avg_o_8_alpha, MC_avg_x_8_alpha,
|
|
250 MC_avg_y_8_alpha, MC_avg_xy_8_alpha }
|
|
251 };
|
|
252
|
|
253 #endif
|