Mercurial > libavcodec.hg
comparison sh4/qpel.c @ 5520:c16a59ef6a86 libavcodec
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
author | romansh |
---|---|
date | Thu, 09 Aug 2007 17:01:15 +0000 |
parents | 6c66ddbb054f |
children | bd11ee6ad834 |
comparison
equal
deleted
inserted
replaced
5519:b790f8c0ee24 | 5520:c16a59ef6a86 |
---|---|
19 * You should have received a copy of the GNU Lesser General Public | 19 * You should have received a copy of the GNU Lesser General Public |
20 * License along with FFmpeg; if not, write to the Free Software | 20 * License along with FFmpeg; if not, write to the Free Software |
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 */ | 22 */ |
23 | 23 |
24 #define LD(adr) *(uint32_t*)(adr) | |
25 | |
26 #define PIXOP2(OPNAME, OP) \ | 24 #define PIXOP2(OPNAME, OP) \ |
27 /*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | 25 /*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ |
28 {\ | 26 {\ |
29 do {\ | 27 do {\ |
30 OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ | 28 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \ |
31 OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ | 29 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \ |
32 src1+=src_stride1; \ | 30 src1+=src_stride1; \ |
33 src2+=src_stride2; \ | 31 src2+=src_stride2; \ |
34 dst+=dst_stride; \ | 32 dst+=dst_stride; \ |
35 } while(--h); \ | 33 } while(--h); \ |
36 }\ | 34 }\ |
37 \ | 35 \ |
38 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | 36 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ |
39 {\ | 37 {\ |
40 do {\ | 38 do {\ |
41 OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ | 39 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \ |
42 OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ | 40 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \ |
43 src1+=src_stride1; \ | 41 src1+=src_stride1; \ |
44 src2+=src_stride2; \ | 42 src2+=src_stride2; \ |
45 dst+=dst_stride; \ | 43 dst+=dst_stride; \ |
46 } while(--h); \ | 44 } while(--h); \ |
47 }\ | 45 }\ |
48 \ | 46 \ |
49 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | 47 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ |
50 {\ | 48 {\ |
51 do {\ | 49 do {\ |
52 OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ | 50 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \ |
53 src1+=src_stride1; \ | 51 src1+=src_stride1; \ |
54 src2+=src_stride2; \ | 52 src2+=src_stride2; \ |
55 dst+=dst_stride; \ | 53 dst+=dst_stride; \ |
56 } while(--h); \ | 54 } while(--h); \ |
57 }\ | 55 }\ |
58 \ | 56 \ |
59 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | 57 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ |
60 {\ | 58 {\ |
61 do {\ | 59 do {\ |
62 OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ | 60 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \ |
63 OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ | 61 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \ |
64 OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ | 62 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \ |
65 OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ | 63 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \ |
66 src1+=src_stride1; \ | 64 src1+=src_stride1; \ |
67 src2+=src_stride2; \ | 65 src2+=src_stride2; \ |
68 dst+=dst_stride; \ | 66 dst+=dst_stride; \ |
69 } while(--h); \ | 67 } while(--h); \ |
70 }\ | 68 }\ |
71 \ | 69 \ |
72 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | 70 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ |
73 {\ | 71 {\ |
74 do {\ | 72 do {\ |
75 OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ | 73 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \ |
76 OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ | 74 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \ |
77 OP(LP(dst+8),rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ | 75 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \ |
78 OP(LP(dst+12),rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ | 76 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \ |
79 src1+=src_stride1; \ | 77 src1+=src_stride1; \ |
80 src2+=src_stride2; \ | 78 src2+=src_stride2; \ |
81 dst+=dst_stride; \ | 79 dst+=dst_stride; \ |
82 } while(--h); \ | 80 } while(--h); \ |
83 }*/\ | 81 }*/\ |
93 }\ | 91 }\ |
94 \ | 92 \ |
95 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | 93 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ |
96 {\ | 94 {\ |
97 do {\ | 95 do {\ |
98 OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ | 96 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ |
99 src1+=src_stride1; \ | 97 src1+=src_stride1; \ |
100 src2+=src_stride2; \ | 98 src2+=src_stride2; \ |
101 dst+=dst_stride; \ | 99 dst+=dst_stride; \ |
102 } while(--h); \ | 100 } while(--h); \ |
103 }\ | 101 }\ |
104 \ | 102 \ |
105 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | 103 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ |
106 {\ | 104 {\ |
107 do {\ | 105 do {\ |
108 OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ | 106 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ |
109 OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ | 107 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ |
110 OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ | 108 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \ |
111 OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ | 109 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \ |
112 src1+=src_stride1; \ | 110 src1+=src_stride1; \ |
113 src2+=src_stride2; \ | 111 src2+=src_stride2; \ |
114 dst+=dst_stride; \ | 112 dst+=dst_stride; \ |
115 } while(--h); \ | 113 } while(--h); \ |
116 }\ | 114 }\ |
117 \ | 115 \ |
118 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | 116 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ |
119 {\ | 117 {\ |
120 do {\ | 118 do {\ |
121 OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ | 119 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ |
122 OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ | 120 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ |
123 OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ | 121 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \ |
124 OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ | 122 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \ |
125 src1+=src_stride1; \ | 123 src1+=src_stride1; \ |
126 src2+=src_stride2; \ | 124 src2+=src_stride2; \ |
127 dst+=dst_stride; \ | 125 dst+=dst_stride; \ |
128 } while(--h); \ | 126 } while(--h); \ |
129 }\ | 127 }\ |
130 \ | 128 \ |
131 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | 129 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ |
132 {\ | 130 {\ |
133 do { /* onlye src2 aligned */\ | 131 do { /* onlye src2 aligned */\ |
134 OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ | 132 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ |
135 OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ | 133 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ |
136 src1+=src_stride1; \ | 134 src1+=src_stride1; \ |
137 src2+=src_stride2; \ | 135 src2+=src_stride2; \ |
138 dst+=dst_stride; \ | 136 dst+=dst_stride; \ |
139 } while(--h); \ | 137 } while(--h); \ |
140 }\ | 138 }\ |
141 \ | 139 \ |
142 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | 140 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ |
143 {\ | 141 {\ |
144 do {\ | 142 do {\ |
145 OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ | 143 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ |
146 OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ | 144 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ |
147 src1+=src_stride1; \ | 145 src1+=src_stride1; \ |
148 src2+=src_stride2; \ | 146 src2+=src_stride2; \ |
149 dst+=dst_stride; \ | 147 dst+=dst_stride; \ |
150 } while(--h); \ | 148 } while(--h); \ |
151 }\ | 149 }\ |
245 } \ | 243 } \ |
246 \ | 244 \ |
247 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | 245 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
248 do { \ | 246 do { \ |
249 uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ | 247 uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ |
250 UNPACK(a0,a1,LD32(src1),LP(src2)); \ | 248 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ |
251 UNPACK(a2,a3,LP(src3),LP(src4)); \ | 249 UNPACK(a2,a3,LP(src3),LP(src4)); \ |
252 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | 250 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ |
253 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ | 251 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ |
254 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | 252 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ |
255 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ | 253 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ |
256 src1+=src_stride1;\ | 254 src1+=src_stride1;\ |
257 src2+=src_stride2;\ | 255 src2+=src_stride2;\ |
258 src3+=src_stride3;\ | 256 src3+=src_stride3;\ |
262 } \ | 260 } \ |
263 \ | 261 \ |
264 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | 262 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
265 do { \ | 263 do { \ |
266 uint32_t a0,a1,a2,a3; \ | 264 uint32_t a0,a1,a2,a3; \ |
267 UNPACK(a0,a1,LD32(src1),LP(src2)); \ | 265 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ |
268 UNPACK(a2,a3,LP(src3),LP(src4)); \ | 266 UNPACK(a2,a3,LP(src3),LP(src4)); \ |
269 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | 267 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ |
270 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ | 268 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ |
271 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | 269 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ |
272 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | 270 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ |
273 src1+=src_stride1;\ | 271 src1+=src_stride1;\ |
274 src2+=src_stride2;\ | 272 src2+=src_stride2;\ |
275 src3+=src_stride3;\ | 273 src3+=src_stride3;\ |
325 } \ | 323 } \ |
326 \ | 324 \ |
327 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | 325 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
328 do { /* src1 is unaligned */\ | 326 do { /* src1 is unaligned */\ |
329 uint32_t a0,a1,a2,a3; \ | 327 uint32_t a0,a1,a2,a3; \ |
330 UNPACK(a0,a1,LD32(src1),LP(src2)); \ | 328 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ |
331 UNPACK(a2,a3,LP(src3),LP(src4)); \ | 329 UNPACK(a2,a3,LP(src3),LP(src4)); \ |
332 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | 330 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ |
333 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ | 331 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ |
334 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | 332 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ |
335 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | 333 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ |
336 UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ | 334 UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \ |
337 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | 335 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ |
338 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | 336 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ |
339 UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ | 337 UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \ |
340 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | 338 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ |
341 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ | 339 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ |
342 src1+=src_stride1;\ | 340 src1+=src_stride1;\ |
343 src2+=src_stride2;\ | 341 src2+=src_stride2;\ |
344 src3+=src_stride3;\ | 342 src3+=src_stride3;\ |
348 } \ | 346 } \ |
349 \ | 347 \ |
350 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | 348 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
351 do { \ | 349 do { \ |
352 uint32_t a0,a1,a2,a3; \ | 350 uint32_t a0,a1,a2,a3; \ |
353 UNPACK(a0,a1,LD32(src1),LP(src2)); \ | 351 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ |
354 UNPACK(a2,a3,LP(src3),LP(src4)); \ | 352 UNPACK(a2,a3,LP(src3),LP(src4)); \ |
355 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | 353 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ |
356 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ | 354 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ |
357 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | 355 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ |
358 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | 356 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ |
359 UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ | 357 UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \ |
360 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | 358 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ |
361 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ | 359 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ |
362 UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ | 360 UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \ |
363 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | 361 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ |
364 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ | 362 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ |
365 src1+=src_stride1;\ | 363 src1+=src_stride1;\ |
366 src2+=src_stride2;\ | 364 src2+=src_stride2;\ |
367 src3+=src_stride3;\ | 365 src3+=src_stride3;\ |