comparison sh4/qpel.c @ 5520:c16a59ef6a86 libavcodec

* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
author romansh
date Thu, 09 Aug 2007 17:01:15 +0000
parents 6c66ddbb054f
children bd11ee6ad834
comparison
equal deleted inserted replaced
5519:b790f8c0ee24 5520:c16a59ef6a86
19 * You should have received a copy of the GNU Lesser General Public 19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software 20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */ 22 */
23 23
24 #define LD(adr) *(uint32_t*)(adr)
25
26 #define PIXOP2(OPNAME, OP) \ 24 #define PIXOP2(OPNAME, OP) \
27 /*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 25 /*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
28 {\ 26 {\
29 do {\ 27 do {\
30 OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ 28 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
31 OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ 29 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
32 src1+=src_stride1; \ 30 src1+=src_stride1; \
33 src2+=src_stride2; \ 31 src2+=src_stride2; \
34 dst+=dst_stride; \ 32 dst+=dst_stride; \
35 } while(--h); \ 33 } while(--h); \
36 }\ 34 }\
37 \ 35 \
38 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 36 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
39 {\ 37 {\
40 do {\ 38 do {\
41 OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ 39 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
42 OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ 40 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
43 src1+=src_stride1; \ 41 src1+=src_stride1; \
44 src2+=src_stride2; \ 42 src2+=src_stride2; \
45 dst+=dst_stride; \ 43 dst+=dst_stride; \
46 } while(--h); \ 44 } while(--h); \
47 }\ 45 }\
48 \ 46 \
49 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 47 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
50 {\ 48 {\
51 do {\ 49 do {\
52 OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ 50 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
53 src1+=src_stride1; \ 51 src1+=src_stride1; \
54 src2+=src_stride2; \ 52 src2+=src_stride2; \
55 dst+=dst_stride; \ 53 dst+=dst_stride; \
56 } while(--h); \ 54 } while(--h); \
57 }\ 55 }\
58 \ 56 \
59 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 57 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
60 {\ 58 {\
61 do {\ 59 do {\
62 OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ 60 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
63 OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ 61 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
64 OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ 62 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \
65 OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ 63 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \
66 src1+=src_stride1; \ 64 src1+=src_stride1; \
67 src2+=src_stride2; \ 65 src2+=src_stride2; \
68 dst+=dst_stride; \ 66 dst+=dst_stride; \
69 } while(--h); \ 67 } while(--h); \
70 }\ 68 }\
71 \ 69 \
72 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 70 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
73 {\ 71 {\
74 do {\ 72 do {\
75 OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ 73 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
76 OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ 74 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
77 OP(LP(dst+8),rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ 75 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \
78 OP(LP(dst+12),rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ 76 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \
79 src1+=src_stride1; \ 77 src1+=src_stride1; \
80 src2+=src_stride2; \ 78 src2+=src_stride2; \
81 dst+=dst_stride; \ 79 dst+=dst_stride; \
82 } while(--h); \ 80 } while(--h); \
83 }*/\ 81 }*/\
93 }\ 91 }\
94 \ 92 \
95 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 93 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
96 {\ 94 {\
97 do {\ 95 do {\
98 OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ 96 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
99 src1+=src_stride1; \ 97 src1+=src_stride1; \
100 src2+=src_stride2; \ 98 src2+=src_stride2; \
101 dst+=dst_stride; \ 99 dst+=dst_stride; \
102 } while(--h); \ 100 } while(--h); \
103 }\ 101 }\
104 \ 102 \
105 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 103 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
106 {\ 104 {\
107 do {\ 105 do {\
108 OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ 106 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
109 OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ 107 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
110 OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ 108 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \
111 OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ 109 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \
112 src1+=src_stride1; \ 110 src1+=src_stride1; \
113 src2+=src_stride2; \ 111 src2+=src_stride2; \
114 dst+=dst_stride; \ 112 dst+=dst_stride; \
115 } while(--h); \ 113 } while(--h); \
116 }\ 114 }\
117 \ 115 \
118 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 116 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
119 {\ 117 {\
120 do {\ 118 do {\
121 OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ 119 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
122 OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ 120 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
123 OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ 121 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \
124 OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ 122 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \
125 src1+=src_stride1; \ 123 src1+=src_stride1; \
126 src2+=src_stride2; \ 124 src2+=src_stride2; \
127 dst+=dst_stride; \ 125 dst+=dst_stride; \
128 } while(--h); \ 126 } while(--h); \
129 }\ 127 }\
130 \ 128 \
131 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 129 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
132 {\ 130 {\
133 do { /* onlye src2 aligned */\ 131 do { /* onlye src2 aligned */\
134 OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ 132 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
135 OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ 133 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
136 src1+=src_stride1; \ 134 src1+=src_stride1; \
137 src2+=src_stride2; \ 135 src2+=src_stride2; \
138 dst+=dst_stride; \ 136 dst+=dst_stride; \
139 } while(--h); \ 137 } while(--h); \
140 }\ 138 }\
141 \ 139 \
142 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 140 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
143 {\ 141 {\
144 do {\ 142 do {\
145 OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ 143 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
146 OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ 144 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
147 src1+=src_stride1; \ 145 src1+=src_stride1; \
148 src2+=src_stride2; \ 146 src2+=src_stride2; \
149 dst+=dst_stride; \ 147 dst+=dst_stride; \
150 } while(--h); \ 148 } while(--h); \
151 }\ 149 }\
245 } \ 243 } \
246 \ 244 \
247 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 245 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
248 do { \ 246 do { \
249 uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ 247 uint32_t a0,a1,a2,a3; /* src1 only not aligned */\
250 UNPACK(a0,a1,LD32(src1),LP(src2)); \ 248 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
251 UNPACK(a2,a3,LP(src3),LP(src4)); \ 249 UNPACK(a2,a3,LP(src3),LP(src4)); \
252 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ 250 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
253 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ 251 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
254 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ 252 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
255 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ 253 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
256 src1+=src_stride1;\ 254 src1+=src_stride1;\
257 src2+=src_stride2;\ 255 src2+=src_stride2;\
258 src3+=src_stride3;\ 256 src3+=src_stride3;\
262 } \ 260 } \
263 \ 261 \
264 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 262 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
265 do { \ 263 do { \
266 uint32_t a0,a1,a2,a3; \ 264 uint32_t a0,a1,a2,a3; \
267 UNPACK(a0,a1,LD32(src1),LP(src2)); \ 265 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
268 UNPACK(a2,a3,LP(src3),LP(src4)); \ 266 UNPACK(a2,a3,LP(src3),LP(src4)); \
269 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ 267 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
270 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ 268 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
271 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ 269 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
272 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ 270 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
273 src1+=src_stride1;\ 271 src1+=src_stride1;\
274 src2+=src_stride2;\ 272 src2+=src_stride2;\
275 src3+=src_stride3;\ 273 src3+=src_stride3;\
325 } \ 323 } \
326 \ 324 \
327 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 325 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
328 do { /* src1 is unaligned */\ 326 do { /* src1 is unaligned */\
329 uint32_t a0,a1,a2,a3; \ 327 uint32_t a0,a1,a2,a3; \
330 UNPACK(a0,a1,LD32(src1),LP(src2)); \ 328 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
331 UNPACK(a2,a3,LP(src3),LP(src4)); \ 329 UNPACK(a2,a3,LP(src3),LP(src4)); \
332 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ 330 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
333 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ 331 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
334 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ 332 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
335 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ 333 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
336 UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ 334 UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \
337 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ 335 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
338 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ 336 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
339 UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ 337 UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \
340 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ 338 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
341 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ 339 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
342 src1+=src_stride1;\ 340 src1+=src_stride1;\
343 src2+=src_stride2;\ 341 src2+=src_stride2;\
344 src3+=src_stride3;\ 342 src3+=src_stride3;\
348 } \ 346 } \
349 \ 347 \
350 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 348 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
351 do { \ 349 do { \
352 uint32_t a0,a1,a2,a3; \ 350 uint32_t a0,a1,a2,a3; \
353 UNPACK(a0,a1,LD32(src1),LP(src2)); \ 351 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
354 UNPACK(a2,a3,LP(src3),LP(src4)); \ 352 UNPACK(a2,a3,LP(src3),LP(src4)); \
355 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ 353 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
356 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ 354 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
357 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ 355 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
358 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ 356 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
359 UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ 357 UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \
360 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ 358 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
361 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ 359 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
362 UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ 360 UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \
363 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ 361 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
364 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ 362 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
365 src1+=src_stride1;\ 363 src1+=src_stride1;\
366 src2+=src_stride2;\ 364 src2+=src_stride2;\
367 src3+=src_stride3;\ 365 src3+=src_stride3;\