comparison sh4/dsputil_align.c @ 1262:82e0e1b9c283 libavcodec

aligned dsputil (for sh4) patch by (BERO <bero at geocities dot co dot jp>)
author michaelni
date Wed, 14 May 2003 17:46:55 +0000
parents
children 2fa34e615c76
comparison
equal deleted inserted replaced
1261:362947395f5c 1262:82e0e1b9c283
1 /*
2 * aligned/packed access motion
3 *
4 * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21
22 #include "../avcodec.h"
23 #include "../dsputil.h"
24
25
26 #define LP(p) *(uint32_t*)(p)
27
28
29 #define BYTE_VEC(c) ((c)*0x01010101UL)
30
31 #define UNPACK(ph,pl,tt0,tt1) do { \
32 uint32_t t0,t1; t0=tt0;t1=tt1; \
33 ph = ( (t0 & ~BYTE_VEC(0x03))>>2) + ( (t1 & ~BYTE_VEC(0x03))>>2); \
34 pl = (t0 & BYTE_VEC(0x03)) + (t1 & BYTE_VEC(0x03)); } while(0)
35
36 #define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC(0x02))>>2) & BYTE_VEC(0x03))
37 #define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC(0x01))>>2) & BYTE_VEC(0x03))
38
39 /* little endian */
40 #define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) )
41 #define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs+1))) )
42 /* big
43 #define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) )
44 #define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)) )
45 */
46
47
48 #define put(d,s) d = s
49 #define avg(d,s) d = rnd_avg2(s,d)
50
51 static inline uint32_t rnd_avg2(uint32_t a, uint32_t b)
52 {
53 return (a | b) - (((a ^ b) & ~BYTE_VEC(0x01)) >> 1);
54 }
55
56 static inline uint32_t no_rnd_avg2(uint32_t a, uint32_t b)
57 {
58 return (a & b) + (((a ^ b) & ~BYTE_VEC(0x01)) >> 1);
59 }
60
61
62 #define OP_C4(ofs) \
63 ref-=ofs; \
64 do { \
65 OP(LP(dest),MERGE1(LP(ref),LP(ref+4),ofs)); \
66 ref+=stride; \
67 dest+=stride; \
68 } while(--height)
69
70 #define OP_C40() \
71 do { \
72 OP(LP(dest),LP(ref)); \
73 ref+=stride; \
74 dest+=stride; \
75 } while(--height)
76
77
78 #define OP put
79
80 static void put_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height)
81 {
82 switch((int)ref&3){
83 case 0: OP_C40(); return;
84 case 1: OP_C4(1); return;
85 case 2: OP_C4(2); return;
86 case 3: OP_C4(3); return;
87 }
88 }
89
90 #undef OP
91 #define OP avg
92
93 static void avg_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height)
94 {
95 switch((int)ref&3){
96 case 0: OP_C40(); return;
97 case 1: OP_C4(1); return;
98 case 2: OP_C4(2); return;
99 case 3: OP_C4(3); return;
100 }
101 }
102
103 #undef OP
104
105 #define OP_C(ofs,sz,avg2) \
106 { \
107 ref-=ofs; \
108 do { \
109 uint32_t t0,t1; \
110 t0 = LP(ref+0); \
111 t1 = LP(ref+4); \
112 OP(LP(dest+0), MERGE1(t0,t1,ofs)); \
113 t0 = LP(ref+8); \
114 OP(LP(dest+4), MERGE1(t1,t0,ofs)); \
115 if (sz==16) { \
116 t1 = LP(ref+12); \
117 OP(LP(dest+8), MERGE1(t0,t1,ofs)); \
118 t0 = LP(ref+16); \
119 OP(LP(dest+12), MERGE1(t1,t0,ofs)); \
120 } \
121 ref+=stride; \
122 dest+= stride; \
123 } while(--height); \
124 }
125
126 /* aligned */
127 #define OP_C0(sz,avg2) \
128 { \
129 do { \
130 OP(LP(dest+0), LP(ref+0)); \
131 OP(LP(dest+4), LP(ref+4)); \
132 if (sz==16) { \
133 OP(LP(dest+8), LP(ref+8)); \
134 OP(LP(dest+12), LP(ref+12)); \
135 } \
136 ref+=stride; \
137 dest+= stride; \
138 } while(--height); \
139 }
140
141 #define OP_X(ofs,sz,avg2) \
142 { \
143 ref-=ofs; \
144 do { \
145 uint32_t t0,t1; \
146 t0 = LP(ref+0); \
147 t1 = LP(ref+4); \
148 OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
149 t0 = LP(ref+8); \
150 OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
151 if (sz==16) { \
152 t1 = LP(ref+12); \
153 OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
154 t0 = LP(ref+16); \
155 OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
156 } \
157 ref+=stride; \
158 dest+= stride; \
159 } while(--height); \
160 }
161
162 /* aligned */
163 #define OP_Y0(sz,avg2) \
164 { \
165 uint32_t t0,t1,t2,t3,t; \
166 \
167 t0 = LP(ref+0); \
168 t1 = LP(ref+4); \
169 if (sz==16) { \
170 t2 = LP(ref+8); \
171 t3 = LP(ref+12); \
172 } \
173 do { \
174 ref += stride; \
175 \
176 t = LP(ref+0); \
177 OP(LP(dest+0), avg2(t0,t)); t0 = t; \
178 t = LP(ref+4); \
179 OP(LP(dest+4), avg2(t1,t)); t1 = t; \
180 if (sz==16) { \
181 t = LP(ref+8); \
182 OP(LP(dest+8), avg2(t2,t)); t2 = t; \
183 t = LP(ref+12); \
184 OP(LP(dest+12), avg2(t3,t)); t3 = t; \
185 } \
186 dest+= stride; \
187 } while(--height); \
188 }
189
190 #define OP_Y(ofs,sz,avg2) \
191 { \
192 uint32_t t0,t1,t2,t3,t,w0,w1; \
193 \
194 ref-=ofs; \
195 w0 = LP(ref+0); \
196 w1 = LP(ref+4); \
197 t0 = MERGE1(w0,w1,ofs); \
198 w0 = LP(ref+8); \
199 t1 = MERGE1(w1,w0,ofs); \
200 if (sz==16) { \
201 w1 = LP(ref+12); \
202 t2 = MERGE1(w0,w1,ofs); \
203 w0 = LP(ref+16); \
204 t3 = MERGE1(w1,w0,ofs); \
205 } \
206 do { \
207 ref += stride; \
208 \
209 w0 = LP(ref+0); \
210 w1 = LP(ref+4); \
211 t = MERGE1(w0,w1,ofs); \
212 OP(LP(dest+0), avg2(t0,t)); t0 = t; \
213 w0 = LP(ref+8); \
214 t = MERGE1(w1,w0,ofs); \
215 OP(LP(dest+4), avg2(t1,t)); t1 = t; \
216 if (sz==16) { \
217 w1 = LP(ref+12); \
218 t = MERGE1(w0,w1,ofs); \
219 OP(LP(dest+8), avg2(t2,t)); t2 = t; \
220 w0 = LP(ref+16); \
221 t = MERGE1(w1,w0,ofs); \
222 OP(LP(dest+12), avg2(t3,t)); t3 = t; \
223 } \
224 dest+=stride; \
225 } while(--height); \
226 }
227
228 #define OP_X0(sz,avg2) OP_X(0,sz,avg2)
229 #define OP_XY0(sz,PACK) OP_XY(0,sz,PACK)
230 #define OP_XY(ofs,sz,PACK) \
231 { \
232 uint32_t t2,t3,w0,w1; \
233 uint32_t a0,a1,a2,a3,a4,a5,a6,a7; \
234 \
235 ref -= ofs; \
236 w0 = LP(ref+0); \
237 w1 = LP(ref+4); \
238 UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
239 w0 = LP(ref+8); \
240 UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
241 if (sz==16) { \
242 w1 = LP(ref+12); \
243 UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
244 w0 = LP(ref+16); \
245 UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
246 } \
247 do { \
248 ref+=stride; \
249 w0 = LP(ref+0); \
250 w1 = LP(ref+4); \
251 UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
252 OP(LP(dest+0),PACK(a0,a1,t2,t3)); \
253 a0 = t2; a1 = t3; \
254 w0 = LP(ref+8); \
255 UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
256 OP(LP(dest+4),PACK(a2,a3,t2,t3)); \
257 a2 = t2; a3 = t3; \
258 if (sz==16) { \
259 w1 = LP(ref+12); \
260 UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
261 OP(LP(dest+8),PACK(a4,a5,t2,t3)); \
262 a4 = t2; a5 = t3; \
263 w0 = LP(ref+16); \
264 UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
265 OP(LP(dest+12),PACK(a6,a7,t2,t3)); \
266 a6 = t2; a7 = t3; \
267 } \
268 dest+=stride; \
269 } while(--height); \
270 }
271
272 #define DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \
273 static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref, \
274 const int stride, int height) \
275 { \
276 switch((int)ref&3) { \
277 case 0:OP_N##0(sz,rnd##_##avgfunc); return; \
278 case 1:OP_N(1,sz,rnd##_##avgfunc); return; \
279 case 2:OP_N(2,sz,rnd##_##avgfunc); return; \
280 case 3:OP_N(3,sz,rnd##_##avgfunc); return; \
281 } \
282 }
283
284 #define OP put
285
286 DEFFUNC(put, rnd,o,8,OP_C,avg2)
287 DEFFUNC(put, rnd,x,8,OP_X,avg2)
288 DEFFUNC(put,no_rnd,x,8,OP_X,avg2)
289 DEFFUNC(put, rnd,y,8,OP_Y,avg2)
290 DEFFUNC(put,no_rnd,y,8,OP_Y,avg2)
291 DEFFUNC(put, rnd,xy,8,OP_XY,PACK)
292 DEFFUNC(put,no_rnd,xy,8,OP_XY,PACK)
293 DEFFUNC(put, rnd,o,16,OP_C,avg2)
294 DEFFUNC(put, rnd,x,16,OP_X,avg2)
295 DEFFUNC(put,no_rnd,x,16,OP_X,avg2)
296 DEFFUNC(put, rnd,y,16,OP_Y,avg2)
297 DEFFUNC(put,no_rnd,y,16,OP_Y,avg2)
298 DEFFUNC(put, rnd,xy,16,OP_XY,PACK)
299 DEFFUNC(put,no_rnd,xy,16,OP_XY,PACK)
300
301 #undef OP
302 #define OP avg
303
304 DEFFUNC(avg, rnd,o,8,OP_C,avg2)
305 DEFFUNC(avg, rnd,x,8,OP_X,avg2)
306 DEFFUNC(avg,no_rnd,x,8,OP_X,avg2)
307 DEFFUNC(avg, rnd,y,8,OP_Y,avg2)
308 DEFFUNC(avg,no_rnd,y,8,OP_Y,avg2)
309 DEFFUNC(avg, rnd,xy,8,OP_XY,PACK)
310 DEFFUNC(avg,no_rnd,xy,8,OP_XY,PACK)
311 DEFFUNC(avg, rnd,o,16,OP_C,avg2)
312 DEFFUNC(avg, rnd,x,16,OP_X,avg2)
313 DEFFUNC(avg,no_rnd,x,16,OP_X,avg2)
314 DEFFUNC(avg, rnd,y,16,OP_Y,avg2)
315 DEFFUNC(avg,no_rnd,y,16,OP_Y,avg2)
316 DEFFUNC(avg, rnd,xy,16,OP_XY,PACK)
317 DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
318
319 #undef OP
320
321 #define put_no_rnd_pixels8_o put_rnd_pixels8_o
322 #define put_no_rnd_pixels16_o put_rnd_pixels16_o
323 #define avg_no_rnd_pixels8_o avg_rnd_pixels8_o
324 #define avg_no_rnd_pixels16_o avg_rnd_pixels16_o
325
326 #define put_pixels8_c put_rnd_pixels8_o
327 #define put_pixels16_c put_rnd_pixels16_o
328 #define avg_pixels8_c avg_rnd_pixels8_o
329 #define avg_pixels16_c avg_rnd_pixels16_o
330 #define put_no_rnd_pixels8_c put_rnd_pixels8_o
331 #define put_no_rnd_pixels16_c put_rnd_pixels16_o
332 #define avg_no_rnd_pixels8_c avg_rnd_pixels8_o
333 #define avg_no_rnd_pixels16_c avg_rnd_pixels16_o
334
335 #define QPEL
336
337 #ifdef QPEL
338
339 #include "qpel.c"
340
341 #endif
342
343 void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
344 {
345 c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
346 c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
347 c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
348 c->put_pixels_tab[0][3] = put_rnd_pixels16_xy;
349 c->put_pixels_tab[1][0] = put_rnd_pixels8_o;
350 c->put_pixels_tab[1][1] = put_rnd_pixels8_x;
351 c->put_pixels_tab[1][2] = put_rnd_pixels8_y;
352 c->put_pixels_tab[1][3] = put_rnd_pixels8_xy;
353
354 c->put_no_rnd_pixels_tab[0][0] = put_no_rnd_pixels16_o;
355 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x;
356 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y;
357 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy;
358 c->put_no_rnd_pixels_tab[1][0] = put_no_rnd_pixels8_o;
359 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x;
360 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y;
361 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy;
362
363 c->avg_pixels_tab[0][0] = avg_rnd_pixels16_o;
364 c->avg_pixels_tab[0][1] = avg_rnd_pixels16_x;
365 c->avg_pixels_tab[0][2] = avg_rnd_pixels16_y;
366 c->avg_pixels_tab[0][3] = avg_rnd_pixels16_xy;
367 c->avg_pixels_tab[1][0] = avg_rnd_pixels8_o;
368 c->avg_pixels_tab[1][1] = avg_rnd_pixels8_x;
369 c->avg_pixels_tab[1][2] = avg_rnd_pixels8_y;
370 c->avg_pixels_tab[1][3] = avg_rnd_pixels8_xy;
371
372 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_o;
373 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x;
374 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y;
375 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy;
376 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_o;
377 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x;
378 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y;
379 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy;
380
381 #ifdef QPEL
382
383 #define dspfunc(PFX, IDX, NUM) \
384 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
385 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
386 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
387 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
388 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
389 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
390 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
391 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
392 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
393 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
394 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
395 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
396 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
397 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
398 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
399 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
400
401 dspfunc(put_qpel, 0, 16);
402 dspfunc(put_no_rnd_qpel, 0, 16);
403
404 dspfunc(avg_qpel, 0, 16);
405 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
406
407 dspfunc(put_qpel, 1, 8);
408 dspfunc(put_no_rnd_qpel, 1, 8);
409
410 dspfunc(avg_qpel, 1, 8);
411 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
412
413 dspfunc(put_h264_qpel, 0, 16);
414 dspfunc(put_h264_qpel, 1, 8);
415 dspfunc(put_h264_qpel, 2, 4);
416 dspfunc(avg_h264_qpel, 0, 16);
417 dspfunc(avg_h264_qpel, 1, 8);
418 dspfunc(avg_h264_qpel, 2, 4);
419
420 #undef dspfunc
421 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
422 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
423 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
424 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
425 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
426 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
427
428 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
429 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
430 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
431 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
432 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
433 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
434 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
435 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
436
437 c->gmc1 = gmc1_c;
438 c->gmc = gmc_c;
439
440 #endif
441 }