Mercurial > mplayer.hg
comparison libmpeg2/motion_comp_alpha.c @ 9857:89b48bc6c441
Importing libmpeg2 from mpeg2dec-0.3.1
author | arpi |
---|---|
date | Sun, 06 Apr 2003 16:41:49 +0000 |
parents | |
children | d0a8810e155c |
comparison
equal
deleted
inserted
replaced
9856:08496327b7ec | 9857:89b48bc6c441 |
---|---|
1 /* | |
2 * motion_comp_alpha.c | |
3 * Copyright (C) 2002 Falk Hueffner <falk@debian.org> | |
4 * | |
5 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | |
6 * See http://libmpeg2.sourceforge.net/ for updates. | |
7 * | |
8 * mpeg2dec is free software; you can redistribute it and/or modify | |
9 * it under the terms of the GNU General Public License as published by | |
10 * the Free Software Foundation; either version 2 of the License, or | |
11 * (at your option) any later version. | |
12 * | |
13 * mpeg2dec is distributed in the hope that it will be useful, | |
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 * GNU General Public License for more details. | |
17 * | |
18 * You should have received a copy of the GNU General Public License | |
19 * along with this program; if not, write to the Free Software | |
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
21 */ | |
22 | |
23 #include "config.h" | |
24 | |
25 #ifdef ARCH_ALPHA | |
26 | |
27 #include <inttypes.h> | |
28 | |
29 #include "mpeg2.h" | |
30 #include "mpeg2_internal.h" | |
31 #include "alpha_asm.h" | |
32 | |
33 static inline uint64_t avg2(uint64_t a, uint64_t b) | |
34 { | |
35 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | |
36 } | |
37 | |
38 // Load two unaligned quadwords from addr. This macro only works if | |
39 // addr is actually unaligned. | |
40 #define ULOAD16(ret_l, ret_r, addr) \ | |
41 do { \ | |
42 uint64_t _l = ldq_u(addr + 0); \ | |
43 uint64_t _m = ldq_u(addr + 8); \ | |
44 uint64_t _r = ldq_u(addr + 16); \ | |
45 ret_l = extql(_l, addr) | extqh(_m, addr); \ | |
46 ret_r = extql(_m, addr) | extqh(_r, addr); \ | |
47 } while (0) | |
48 | |
49 // Load two aligned quadwords from addr. | |
50 #define ALOAD16(ret_l, ret_r, addr) \ | |
51 do { \ | |
52 ret_l = ldq(addr); \ | |
53 ret_r = ldq(addr + 8); \ | |
54 } while (0) | |
55 | |
56 #define OP8(LOAD, LOAD16, STORE) \ | |
57 do { \ | |
58 STORE(LOAD(pixels), block); \ | |
59 pixels += line_size; \ | |
60 block += line_size; \ | |
61 } while (--h) | |
62 | |
63 #define OP16(LOAD, LOAD16, STORE) \ | |
64 do { \ | |
65 uint64_t l, r; \ | |
66 LOAD16(l, r, pixels); \ | |
67 STORE(l, block); \ | |
68 STORE(r, block + 8); \ | |
69 pixels += line_size; \ | |
70 block += line_size; \ | |
71 } while (--h) | |
72 | |
73 #define OP8_X2(LOAD, LOAD16, STORE) \ | |
74 do { \ | |
75 uint64_t p0, p1; \ | |
76 \ | |
77 p0 = LOAD(pixels); \ | |
78 p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56); \ | |
79 STORE(avg2(p0, p1), block); \ | |
80 pixels += line_size; \ | |
81 block += line_size; \ | |
82 } while (--h) | |
83 | |
84 #define OP16_X2(LOAD, LOAD16, STORE) \ | |
85 do { \ | |
86 uint64_t p0, p1; \ | |
87 \ | |
88 LOAD16(p0, p1, pixels); \ | |
89 STORE(avg2(p0, p0 >> 8 | p1 << 56), block); \ | |
90 STORE(avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \ | |
91 block + 8); \ | |
92 pixels += line_size; \ | |
93 block += line_size; \ | |
94 } while (--h) | |
95 | |
96 #define OP8_Y2(LOAD, LOAD16, STORE) \ | |
97 do { \ | |
98 uint64_t p0, p1; \ | |
99 p0 = LOAD(pixels); \ | |
100 pixels += line_size; \ | |
101 p1 = LOAD(pixels); \ | |
102 do { \ | |
103 uint64_t av = avg2(p0, p1); \ | |
104 if (--h == 0) line_size = 0; \ | |
105 pixels += line_size; \ | |
106 p0 = p1; \ | |
107 p1 = LOAD(pixels); \ | |
108 STORE(av, block); \ | |
109 block += line_size; \ | |
110 } while (h); \ | |
111 } while (0) | |
112 | |
113 #define OP16_Y2(LOAD, LOAD16, STORE) \ | |
114 do { \ | |
115 uint64_t p0l, p0r, p1l, p1r; \ | |
116 LOAD16(p0l, p0r, pixels); \ | |
117 pixels += line_size; \ | |
118 LOAD16(p1l, p1r, pixels); \ | |
119 do { \ | |
120 uint64_t avl, avr; \ | |
121 if (--h == 0) line_size = 0; \ | |
122 avl = avg2(p0l, p1l); \ | |
123 avr = avg2(p0r, p1r); \ | |
124 p0l = p1l; \ | |
125 p0r = p1r; \ | |
126 pixels += line_size; \ | |
127 LOAD16(p1l, p1r, pixels); \ | |
128 STORE(avl, block); \ | |
129 STORE(avr, block + 8); \ | |
130 block += line_size; \ | |
131 } while (h); \ | |
132 } while (0) | |
133 | |
134 #define OP8_XY2(LOAD, LOAD16, STORE) \ | |
135 do { \ | |
136 uint64_t pl, ph; \ | |
137 uint64_t p1 = LOAD(pixels); \ | |
138 uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |
139 \ | |
140 ph = ((p1 & ~BYTE_VEC(0x03)) >> 2) \ | |
141 + ((p2 & ~BYTE_VEC(0x03)) >> 2); \ | |
142 pl = (p1 & BYTE_VEC(0x03)) \ | |
143 + (p2 & BYTE_VEC(0x03)); \ | |
144 \ | |
145 do { \ | |
146 uint64_t npl, nph; \ | |
147 \ | |
148 pixels += line_size; \ | |
149 p1 = LOAD(pixels); \ | |
150 p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56); \ | |
151 nph = ((p1 & ~BYTE_VEC(0x03)) >> 2) \ | |
152 + ((p2 & ~BYTE_VEC(0x03)) >> 2); \ | |
153 npl = (p1 & BYTE_VEC(0x03)) \ | |
154 + (p2 & BYTE_VEC(0x03)); \ | |
155 \ | |
156 STORE(ph + nph \ | |
157 + (((pl + npl + BYTE_VEC(0x02)) >> 2) \ | |
158 & BYTE_VEC(0x03)), block); \ | |
159 \ | |
160 block += line_size; \ | |
161 pl = npl; \ | |
162 ph = nph; \ | |
163 } while (--h); \ | |
164 } while (0) | |
165 | |
166 #define OP16_XY2(LOAD, LOAD16, STORE) \ | |
167 do { \ | |
168 uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r; \ | |
169 LOAD16(p0, p2, pixels); \ | |
170 p1 = p0 >> 8 | (p2 << 56); \ | |
171 p3 = p2 >> 8 | ((uint64_t) pixels[16] << 56); \ | |
172 \ | |
173 ph_l = ((p0 & ~BYTE_VEC(0x03)) >> 2) \ | |
174 + ((p1 & ~BYTE_VEC(0x03)) >> 2); \ | |
175 pl_l = (p0 & BYTE_VEC(0x03)) \ | |
176 + (p1 & BYTE_VEC(0x03)); \ | |
177 ph_r = ((p2 & ~BYTE_VEC(0x03)) >> 2) \ | |
178 + ((p3 & ~BYTE_VEC(0x03)) >> 2); \ | |
179 pl_r = (p2 & BYTE_VEC(0x03)) \ | |
180 + (p3 & BYTE_VEC(0x03)); \ | |
181 \ | |
182 do { \ | |
183 uint64_t npl_l, nph_l, npl_r, nph_r; \ | |
184 \ | |
185 pixels += line_size; \ | |
186 LOAD16(p0, p2, pixels); \ | |
187 p1 = p0 >> 8 | (p2 << 56); \ | |
188 p3 = p2 >> 8 | ((uint64_t) pixels[16] << 56); \ | |
189 nph_l = ((p0 & ~BYTE_VEC(0x03)) >> 2) \ | |
190 + ((p1 & ~BYTE_VEC(0x03)) >> 2); \ | |
191 npl_l = (p0 & BYTE_VEC(0x03)) \ | |
192 + (p1 & BYTE_VEC(0x03)); \ | |
193 nph_r = ((p2 & ~BYTE_VEC(0x03)) >> 2) \ | |
194 + ((p3 & ~BYTE_VEC(0x03)) >> 2); \ | |
195 npl_r = (p2 & BYTE_VEC(0x03)) \ | |
196 + (p3 & BYTE_VEC(0x03)); \ | |
197 \ | |
198 STORE(ph_l + nph_l \ | |
199 + (((pl_l + npl_l + BYTE_VEC(0x02)) >> 2) \ | |
200 & BYTE_VEC(0x03)), block); \ | |
201 STORE(ph_r + nph_r \ | |
202 + (((pl_r + npl_r + BYTE_VEC(0x02)) >> 2) \ | |
203 & BYTE_VEC(0x03)), block + 8); \ | |
204 \ | |
205 block += line_size; \ | |
206 pl_l = npl_l; \ | |
207 ph_l = nph_l; \ | |
208 pl_r = npl_r; \ | |
209 ph_r = nph_r; \ | |
210 } while (--h); \ | |
211 } while (0) | |
212 | |
213 #define MAKE_OP(OPNAME, SIZE, SUFF, OPKIND, STORE) \ | |
214 static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha \ | |
215 (uint8_t *restrict block, const uint8_t *restrict pixels, \ | |
216 int line_size, int h) \ | |
217 { \ | |
218 if ((uint64_t) pixels & 0x7) { \ | |
219 OPKIND(uldq, ULOAD16, STORE); \ | |
220 } else { \ | |
221 OPKIND(ldq, ALOAD16, STORE); \ | |
222 } \ | |
223 } | |
224 | |
225 #define PIXOP(OPNAME, STORE) \ | |
226 MAKE_OP(OPNAME, 8, o, OP8, STORE); \ | |
227 MAKE_OP(OPNAME, 8, x, OP8_X2, STORE); \ | |
228 MAKE_OP(OPNAME, 8, y, OP8_Y2, STORE); \ | |
229 MAKE_OP(OPNAME, 8, xy, OP8_XY2, STORE); \ | |
230 MAKE_OP(OPNAME, 16, o, OP16, STORE); \ | |
231 MAKE_OP(OPNAME, 16, x, OP16_X2, STORE); \ | |
232 MAKE_OP(OPNAME, 16, y, OP16_Y2, STORE); \ | |
233 MAKE_OP(OPNAME, 16, xy, OP16_XY2, STORE); | |
234 | |
235 #define STORE(l, b) stq(l, b) | |
236 PIXOP(put, STORE); | |
237 | |
238 #undef STORE | |
239 #define STORE(l, b) stq(avg2(l, ldq(b)), b); | |
240 PIXOP(avg, STORE); | |
241 | |
242 mpeg2_mc_t mpeg2_mc_alpha = { | |
243 { MC_put_o_16_alpha, MC_put_x_16_alpha, | |
244 MC_put_y_16_alpha, MC_put_xy_16_alpha, | |
245 MC_put_o_8_alpha, MC_put_x_8_alpha, | |
246 MC_put_y_8_alpha, MC_put_xy_8_alpha }, | |
247 { MC_avg_o_16_alpha, MC_avg_x_16_alpha, | |
248 MC_avg_y_16_alpha, MC_avg_xy_16_alpha, | |
249 MC_avg_o_8_alpha, MC_avg_x_8_alpha, | |
250 MC_avg_y_8_alpha, MC_avg_xy_8_alpha } | |
251 }; | |
252 | |
253 #endif |