Mercurial > libavcodec.hg
comparison sh4/qpel.c @ 1262:82e0e1b9c283 libavcodec
aligned dsputil (for sh4) patch by (BERO <bero at geocities dot co dot jp>)
author | michaelni |
---|---|
date | Wed, 14 May 2003 17:46:55 +0000 |
parents | |
children | 2fa34e615c76 |
comparison
equal
deleted
inserted
replaced
1261:362947395f5c | 1262:82e0e1b9c283 |
---|---|
1 /* | |
2 this is optimized for sh, which have post increment addressing (*p++) | |
3 some cpu may be index (p[n]) faster than post increment (*p++) | |
4 */ | |
5 | |
6 #define LD(adr) *(uint32_t*)(adr) | |
7 | |
8 #define PIXOP2(OPNAME, OP) \ | |
9 /*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
10 {\ | |
11 do {\ | |
12 OP(LP(dst ),no_rnd_avg2(LD32(src1 ),LD32(src2 )) ); \ | |
13 OP(LP(dst+4),no_rnd_avg2(LD32(src1+4),LD32(src2+4)) ); \ | |
14 src1+=src_stride1; \ | |
15 src2+=src_stride2; \ | |
16 dst+=dst_stride; \ | |
17 } while(--h); \ | |
18 }\ | |
19 \ | |
20 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
21 {\ | |
22 do {\ | |
23 OP(LP(dst ),rnd_avg2(LD32(src1 ),LD32(src2 )) ); \ | |
24 OP(LP(dst+4),rnd_avg2(LD32(src1+4),LD32(src2+4)) ); \ | |
25 src1+=src_stride1; \ | |
26 src2+=src_stride2; \ | |
27 dst+=dst_stride; \ | |
28 } while(--h); \ | |
29 }\ | |
30 \ | |
31 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
32 {\ | |
33 do {\ | |
34 OP(LP(dst ),rnd_avg2(LD32(src1 ),LD32(src2 )) ); \ | |
35 src1+=src_stride1; \ | |
36 src2+=src_stride2; \ | |
37 dst+=dst_stride; \ | |
38 } while(--h); \ | |
39 }\ | |
40 \ | |
41 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
42 {\ | |
43 do {\ | |
44 OP(LP(dst ),no_rnd_avg2(LD32(src1 ),LD32(src2 )) ); \ | |
45 OP(LP(dst+4),no_rnd_avg2(LD32(src1+4),LD32(src2+4)) ); \ | |
46 OP(LP(dst+8),no_rnd_avg2(LD32(src1+8),LD32(src2+8)) ); \ | |
47 OP(LP(dst+12),no_rnd_avg2(LD32(src1+12),LD32(src2+12)) ); \ | |
48 src1+=src_stride1; \ | |
49 src2+=src_stride2; \ | |
50 dst+=dst_stride; \ | |
51 } while(--h); \ | |
52 }\ | |
53 \ | |
54 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
55 {\ | |
56 do {\ | |
57 OP(LP(dst ),rnd_avg2(LD32(src1 ),LD32(src2 )) ); \ | |
58 OP(LP(dst+4),rnd_avg2(LD32(src1+4),LD32(src2+4)) ); \ | |
59 OP(LP(dst+8),rnd_avg2(LD32(src1+8),LD32(src2+8)) ); \ | |
60 OP(LP(dst+12),rnd_avg2(LD32(src1+12),LD32(src2+12)) ); \ | |
61 src1+=src_stride1; \ | |
62 src2+=src_stride2; \ | |
63 dst+=dst_stride; \ | |
64 } while(--h); \ | |
65 }*/\ | |
66 \ | |
67 static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
68 {\ | |
69 do {\ | |
70 OP(LP(dst ),rnd_avg2(LP(src1 ),LP(src2 )) ); \ | |
71 src1+=src_stride1; \ | |
72 src2+=src_stride2; \ | |
73 dst+=dst_stride; \ | |
74 } while(--h); \ | |
75 }\ | |
76 \ | |
77 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
78 {\ | |
79 do {\ | |
80 OP(LP(dst ),rnd_avg2(LD32(src1 ),LP(src2 )) ); \ | |
81 src1+=src_stride1; \ | |
82 src2+=src_stride2; \ | |
83 dst+=dst_stride; \ | |
84 } while(--h); \ | |
85 }\ | |
86 \ | |
87 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
88 {\ | |
89 do {\ | |
90 OP(LP(dst ),no_rnd_avg2(LD32(src1 ),LP(src2 )) ); \ | |
91 OP(LP(dst+4),no_rnd_avg2(LD32(src1+4),LP(src2+4)) ); \ | |
92 OP(LP(dst+8),no_rnd_avg2(LD32(src1+8),LP(src2+8)) ); \ | |
93 OP(LP(dst+12),no_rnd_avg2(LD32(src1+12),LP(src2+12)) ); \ | |
94 src1+=src_stride1; \ | |
95 src2+=src_stride2; \ | |
96 dst+=dst_stride; \ | |
97 } while(--h); \ | |
98 }\ | |
99 \ | |
100 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
101 {\ | |
102 do {\ | |
103 OP(LP(dst ),rnd_avg2(LD32(src1 ),LP(src2 )) ); \ | |
104 OP(LP(dst+4),rnd_avg2(LD32(src1+4),LP(src2+4)) ); \ | |
105 OP(LP(dst+8),rnd_avg2(LD32(src1+8),LP(src2+8)) ); \ | |
106 OP(LP(dst+12),rnd_avg2(LD32(src1+12),LP(src2+12)) ); \ | |
107 src1+=src_stride1; \ | |
108 src2+=src_stride2; \ | |
109 dst+=dst_stride; \ | |
110 } while(--h); \ | |
111 }\ | |
112 \ | |
113 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
114 {\ | |
115 do { /* onlye src2 aligned */\ | |
116 OP(LP(dst ),no_rnd_avg2(LD32(src1 ),LP(src2 )) ); \ | |
117 OP(LP(dst+4),no_rnd_avg2(LD32(src1+4),LP(src2+4)) ); \ | |
118 src1+=src_stride1; \ | |
119 src2+=src_stride2; \ | |
120 dst+=dst_stride; \ | |
121 } while(--h); \ | |
122 }\ | |
123 \ | |
124 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
125 {\ | |
126 do {\ | |
127 OP(LP(dst ),rnd_avg2(LD32(src1 ),LP(src2 )) ); \ | |
128 OP(LP(dst+4),rnd_avg2(LD32(src1+4),LP(src2+4)) ); \ | |
129 src1+=src_stride1; \ | |
130 src2+=src_stride2; \ | |
131 dst+=dst_stride; \ | |
132 } while(--h); \ | |
133 }\ | |
134 \ | |
135 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
136 {\ | |
137 do {\ | |
138 OP(LP(dst ),no_rnd_avg2(LP(src1 ),LP(src2 )) ); \ | |
139 OP(LP(dst+4),no_rnd_avg2(LP(src1+4),LP(src2+4)) ); \ | |
140 src1+=src_stride1; \ | |
141 src2+=src_stride2; \ | |
142 dst+=dst_stride; \ | |
143 } while(--h); \ | |
144 }\ | |
145 \ | |
146 static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
147 {\ | |
148 do {\ | |
149 OP(LP(dst ),rnd_avg2(LP(src1 ),LP(src2 )) ); \ | |
150 OP(LP(dst+4),rnd_avg2(LP(src1+4),LP(src2+4)) ); \ | |
151 src1+=src_stride1; \ | |
152 src2+=src_stride2; \ | |
153 dst+=dst_stride; \ | |
154 } while(--h); \ | |
155 }\ | |
156 \ | |
157 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
158 {\ | |
159 do {\ | |
160 OP(LP(dst ),no_rnd_avg2(LP(src1 ),LP(src2 )) ); \ | |
161 OP(LP(dst+4),no_rnd_avg2(LP(src1+4),LP(src2+4)) ); \ | |
162 OP(LP(dst+8),no_rnd_avg2(LP(src1+8),LP(src2+8)) ); \ | |
163 OP(LP(dst+12),no_rnd_avg2(LP(src1+12),LP(src2+12)) ); \ | |
164 src1+=src_stride1; \ | |
165 src2+=src_stride2; \ | |
166 dst+=dst_stride; \ | |
167 } while(--h); \ | |
168 }\ | |
169 \ | |
170 static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
171 {\ | |
172 do {\ | |
173 OP(LP(dst ),rnd_avg2(LP(src1 ),LP(src2 )) ); \ | |
174 OP(LP(dst+4),rnd_avg2(LP(src1+4),LP(src2+4)) ); \ | |
175 OP(LP(dst+8),rnd_avg2(LP(src1+8),LP(src2+8)) ); \ | |
176 OP(LP(dst+12),rnd_avg2(LP(src1+12),LP(src2+12)) ); \ | |
177 src1+=src_stride1; \ | |
178 src2+=src_stride2; \ | |
179 dst+=dst_stride; \ | |
180 } while(--h); \ | |
181 }\ | |
182 \ | |
183 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
184 { OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ | |
185 \ | |
186 static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
187 { OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ | |
188 \ | |
189 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
190 { OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ | |
191 \ | |
192 static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | |
193 { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ | |
194 \ | |
195 static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
196 do { \ | |
197 uint32_t a0,a1,a2,a3; \ | |
198 UNPACK(a0,a1,LP(src1),LP(src2)); \ | |
199 UNPACK(a2,a3,LP(src3),LP(src4)); \ | |
200 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | |
201 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ | |
202 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | |
203 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ | |
204 src1+=src_stride1;\ | |
205 src2+=src_stride2;\ | |
206 src3+=src_stride3;\ | |
207 src4+=src_stride4;\ | |
208 dst+=dst_stride;\ | |
209 } while(--h); \ | |
210 } \ | |
211 \ | |
212 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
213 do { \ | |
214 uint32_t a0,a1,a2,a3; \ | |
215 UNPACK(a0,a1,LP(src1),LP(src2)); \ | |
216 UNPACK(a2,a3,LP(src3),LP(src4)); \ | |
217 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | |
218 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ | |
219 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | |
220 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | |
221 src1+=src_stride1;\ | |
222 src2+=src_stride2;\ | |
223 src3+=src_stride3;\ | |
224 src4+=src_stride4;\ | |
225 dst+=dst_stride;\ | |
226 } while(--h); \ | |
227 } \ | |
228 \ | |
229 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
230 do { \ | |
231 uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ | |
232 UNPACK(a0,a1,LD32(src1),LP(src2)); \ | |
233 UNPACK(a2,a3,LP(src3),LP(src4)); \ | |
234 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | |
235 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ | |
236 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | |
237 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ | |
238 src1+=src_stride1;\ | |
239 src2+=src_stride2;\ | |
240 src3+=src_stride3;\ | |
241 src4+=src_stride4;\ | |
242 dst+=dst_stride;\ | |
243 } while(--h); \ | |
244 } \ | |
245 \ | |
246 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
247 do { \ | |
248 uint32_t a0,a1,a2,a3; \ | |
249 UNPACK(a0,a1,LD32(src1),LP(src2)); \ | |
250 UNPACK(a2,a3,LP(src3),LP(src4)); \ | |
251 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | |
252 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ | |
253 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | |
254 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | |
255 src1+=src_stride1;\ | |
256 src2+=src_stride2;\ | |
257 src3+=src_stride3;\ | |
258 src4+=src_stride4;\ | |
259 dst+=dst_stride;\ | |
260 } while(--h); \ | |
261 } \ | |
262 \ | |
263 static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
264 do { \ | |
265 uint32_t a0,a1,a2,a3; \ | |
266 UNPACK(a0,a1,LP(src1),LP(src2)); \ | |
267 UNPACK(a2,a3,LP(src3),LP(src4)); \ | |
268 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | |
269 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ | |
270 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | |
271 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | |
272 UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ | |
273 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | |
274 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | |
275 UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ | |
276 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | |
277 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ | |
278 src1+=src_stride1;\ | |
279 src2+=src_stride2;\ | |
280 src3+=src_stride3;\ | |
281 src4+=src_stride4;\ | |
282 dst+=dst_stride;\ | |
283 } while(--h); \ | |
284 } \ | |
285 \ | |
286 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
287 do { \ | |
288 uint32_t a0,a1,a2,a3; \ | |
289 UNPACK(a0,a1,LP(src1),LP(src2)); \ | |
290 UNPACK(a2,a3,LP(src3),LP(src4)); \ | |
291 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | |
292 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ | |
293 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | |
294 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | |
295 UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ | |
296 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | |
297 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ | |
298 UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ | |
299 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | |
300 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ | |
301 src1+=src_stride1;\ | |
302 src2+=src_stride2;\ | |
303 src3+=src_stride3;\ | |
304 src4+=src_stride4;\ | |
305 dst+=dst_stride;\ | |
306 } while(--h); \ | |
307 } \ | |
308 \ | |
309 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
310 do { /* src1 is unaligned */\ | |
311 uint32_t a0,a1,a2,a3; \ | |
312 UNPACK(a0,a1,LD32(src1),LP(src2)); \ | |
313 UNPACK(a2,a3,LP(src3),LP(src4)); \ | |
314 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | |
315 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ | |
316 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | |
317 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | |
318 UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ | |
319 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | |
320 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | |
321 UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ | |
322 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | |
323 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ | |
324 src1+=src_stride1;\ | |
325 src2+=src_stride2;\ | |
326 src3+=src_stride3;\ | |
327 src4+=src_stride4;\ | |
328 dst+=dst_stride;\ | |
329 } while(--h); \ | |
330 } \ | |
331 \ | |
332 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
333 do { \ | |
334 uint32_t a0,a1,a2,a3; \ | |
335 UNPACK(a0,a1,LD32(src1),LP(src2)); \ | |
336 UNPACK(a2,a3,LP(src3),LP(src4)); \ | |
337 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | |
338 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ | |
339 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | |
340 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | |
341 UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ | |
342 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | |
343 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ | |
344 UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ | |
345 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | |
346 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ | |
347 src1+=src_stride1;\ | |
348 src2+=src_stride2;\ | |
349 src3+=src_stride3;\ | |
350 src4+=src_stride4;\ | |
351 dst+=dst_stride;\ | |
352 } while(--h); \ | |
353 } \ | |
354 \ | |
355 | |
356 #define op_avg(a, b) a = rnd_avg2(a,b) | |
357 #define op_put(a, b) a = b | |
358 | |
359 PIXOP2(avg, op_avg) | |
360 PIXOP2(put, op_put) | |
361 #undef op_avg | |
362 #undef op_put | |
363 | |
364 #define avg2(a,b) ((a+b+1)>>1) | |
365 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | |
366 | |
367 | |
368 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) | |
369 { | |
370 const int A=(16-x16)*(16-y16); | |
371 const int B=( x16)*(16-y16); | |
372 const int C=(16-x16)*( y16); | |
373 const int D=( x16)*( y16); | |
374 | |
375 do { | |
376 int t0,t1,t2,t3; | |
377 uint8_t *s0 = src; | |
378 uint8_t *s1 = src+stride; | |
379 t0 = *s0++; t2 = *s1++; | |
380 t1 = *s0++; t3 = *s1++; | |
381 dst[0]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; | |
382 t0 = *s0++; t2 = *s1++; | |
383 dst[1]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; | |
384 t1 = *s0++; t3 = *s1++; | |
385 dst[2]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; | |
386 t0 = *s0++; t2 = *s1++; | |
387 dst[3]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; | |
388 t1 = *s0++; t3 = *s1++; | |
389 dst[4]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; | |
390 t0 = *s0++; t2 = *s1++; | |
391 dst[5]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; | |
392 t1 = *s0++; t3 = *s1++; | |
393 dst[6]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; | |
394 t0 = *s0++; t2 = *s1++; | |
395 dst[7]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; | |
396 dst+= stride; | |
397 src+= stride; | |
398 }while(--h); | |
399 } | |
400 | |
401 static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, | |
402 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) | |
403 { | |
404 int y, vx, vy; | |
405 const int s= 1<<shift; | |
406 | |
407 width--; | |
408 height--; | |
409 | |
410 for(y=0; y<h; y++){ | |
411 int x; | |
412 | |
413 vx= ox; | |
414 vy= oy; | |
415 for(x=0; x<8; x++){ //XXX FIXME optimize | |
416 int src_x, src_y, frac_x, frac_y, index; | |
417 | |
418 src_x= vx>>16; | |
419 src_y= vy>>16; | |
420 frac_x= src_x&(s-1); | |
421 frac_y= src_y&(s-1); | |
422 src_x>>=shift; | |
423 src_y>>=shift; | |
424 | |
425 if((unsigned)src_x < width){ | |
426 if((unsigned)src_y < height){ | |
427 index= src_x + src_y*stride; | |
428 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | |
429 + src[index +1]* frac_x )*(s-frac_y) | |
430 + ( src[index+stride ]*(s-frac_x) | |
431 + src[index+stride+1]* frac_x )* frac_y | |
432 + r)>>(shift*2); | |
433 }else{ | |
434 index= src_x + clip(src_y, 0, height)*stride; | |
435 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | |
436 + src[index +1]* frac_x )*s | |
437 + r)>>(shift*2); | |
438 } | |
439 }else{ | |
440 if((unsigned)src_y < height){ | |
441 index= clip(src_x, 0, width) + src_y*stride; | |
442 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) | |
443 + src[index+stride ]* frac_y )*s | |
444 + r)>>(shift*2); | |
445 }else{ | |
446 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride; | |
447 dst[y*stride + x]= src[index ]; | |
448 } | |
449 } | |
450 | |
451 vx+= dxx; | |
452 vy+= dyx; | |
453 } | |
454 ox += dxy; | |
455 oy += dyy; | |
456 } | |
457 } | |
458 #define H264_CHROMA_MC(OPNAME, OP)\ | |
459 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
460 const int A=(8-x)*(8-y);\ | |
461 const int B=( x)*(8-y);\ | |
462 const int C=(8-x)*( y);\ | |
463 const int D=( x)*( y);\ | |
464 \ | |
465 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
466 \ | |
467 do {\ | |
468 int t0,t1,t2,t3; \ | |
469 uint8_t *s0 = src; \ | |
470 uint8_t *s1 = src+stride; \ | |
471 t0 = *s0++; t2 = *s1++; \ | |
472 t1 = *s0++; t3 = *s1++; \ | |
473 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ | |
474 t0 = *s0++; t2 = *s1++; \ | |
475 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ | |
476 dst+= stride;\ | |
477 src+= stride;\ | |
478 }while(--h);\ | |
479 }\ | |
480 \ | |
481 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
482 const int A=(8-x)*(8-y);\ | |
483 const int B=( x)*(8-y);\ | |
484 const int C=(8-x)*( y);\ | |
485 const int D=( x)*( y);\ | |
486 \ | |
487 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
488 \ | |
489 do {\ | |
490 int t0,t1,t2,t3; \ | |
491 uint8_t *s0 = src; \ | |
492 uint8_t *s1 = src+stride; \ | |
493 t0 = *s0++; t2 = *s1++; \ | |
494 t1 = *s0++; t3 = *s1++; \ | |
495 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ | |
496 t0 = *s0++; t2 = *s1++; \ | |
497 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ | |
498 t1 = *s0++; t3 = *s1++; \ | |
499 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\ | |
500 t0 = *s0++; t2 = *s1++; \ | |
501 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\ | |
502 dst+= stride;\ | |
503 src+= stride;\ | |
504 }while(--h);\ | |
505 }\ | |
506 \ | |
507 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
508 const int A=(8-x)*(8-y);\ | |
509 const int B=( x)*(8-y);\ | |
510 const int C=(8-x)*( y);\ | |
511 const int D=( x)*( y);\ | |
512 \ | |
513 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
514 \ | |
515 do {\ | |
516 int t0,t1,t2,t3; \ | |
517 uint8_t *s0 = src; \ | |
518 uint8_t *s1 = src+stride; \ | |
519 t0 = *s0++; t2 = *s1++; \ | |
520 t1 = *s0++; t3 = *s1++; \ | |
521 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ | |
522 t0 = *s0++; t2 = *s1++; \ | |
523 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ | |
524 t1 = *s0++; t3 = *s1++; \ | |
525 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\ | |
526 t0 = *s0++; t2 = *s1++; \ | |
527 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\ | |
528 t1 = *s0++; t3 = *s1++; \ | |
529 OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\ | |
530 t0 = *s0++; t2 = *s1++; \ | |
531 OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\ | |
532 t1 = *s0++; t3 = *s1++; \ | |
533 OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\ | |
534 t0 = *s0++; t2 = *s1++; \ | |
535 OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\ | |
536 dst+= stride;\ | |
537 src+= stride;\ | |
538 }while(--h);\ | |
539 } | |
540 | |
541 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) | |
542 #define op_put(a, b) a = (((b) + 32)>>6) | |
543 | |
544 H264_CHROMA_MC(put_ , op_put) | |
545 H264_CHROMA_MC(avg_ , op_avg) | |
546 #undef op_avg | |
547 #undef op_put | |
548 | |
549 /* not yet optimized */ | |
550 static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
551 { | |
552 int i; | |
553 for(i=0; i<h; i++) | |
554 { | |
555 ST32(dst , LD32(src )); | |
556 dst+=dstStride; | |
557 src+=srcStride; | |
558 } | |
559 } | |
560 | |
561 static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
562 { | |
563 int i; | |
564 for(i=0; i<h; i++) | |
565 { | |
566 ST32(dst , LD32(src )); | |
567 ST32(dst+4 , LD32(src+4 )); | |
568 dst+=dstStride; | |
569 src+=srcStride; | |
570 } | |
571 } | |
572 | |
573 static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
574 { | |
575 int i; | |
576 for(i=0; i<h; i++) | |
577 { | |
578 ST32(dst , LD32(src )); | |
579 ST32(dst+4 , LD32(src+4 )); | |
580 ST32(dst+8 , LD32(src+8 )); | |
581 ST32(dst+12, LD32(src+12)); | |
582 dst+=dstStride; | |
583 src+=srcStride; | |
584 } | |
585 } | |
586 | |
587 static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
588 { | |
589 int i; | |
590 for(i=0; i<h; i++) | |
591 { | |
592 ST32(dst , LD32(src )); | |
593 ST32(dst+4 , LD32(src+4 )); | |
594 ST32(dst+8 , LD32(src+8 )); | |
595 ST32(dst+12, LD32(src+12)); | |
596 dst[16]= src[16]; | |
597 dst+=dstStride; | |
598 src+=srcStride; | |
599 } | |
600 } | |
601 | |
602 static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
603 { | |
604 int i; | |
605 for(i=0; i<h; i++) | |
606 { | |
607 ST32(dst , LD32(src )); | |
608 ST32(dst+4 , LD32(src+4 )); | |
609 dst[8]= src[8]; | |
610 dst+=dstStride; | |
611 src+=srcStride; | |
612 } | |
613 } | |
614 /* end not optimized */ | |
615 | |
616 #define QPEL_MC(r, OPNAME, RND, OP) \ | |
617 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ | |
618 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
619 do {\ | |
620 uint8_t *s = src; \ | |
621 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ | |
622 src0= *s++;\ | |
623 src1= *s++;\ | |
624 src2= *s++;\ | |
625 src3= *s++;\ | |
626 src4= *s++;\ | |
627 OP(dst[0], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ | |
628 src5= *s++;\ | |
629 OP(dst[1], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ | |
630 src6= *s++;\ | |
631 OP(dst[2], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ | |
632 src7= *s++;\ | |
633 OP(dst[3], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ | |
634 src8= *s++;\ | |
635 OP(dst[4], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ | |
636 OP(dst[5], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ | |
637 OP(dst[6], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ | |
638 OP(dst[7], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ | |
639 dst+=dstStride;\ | |
640 src+=srcStride;\ | |
641 }while(--h);\ | |
642 }\ | |
643 \ | |
644 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
645 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
646 int w=8;\ | |
647 do{\ | |
648 uint8_t *s = src, *d=dst;\ | |
649 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ | |
650 src0 = *s; s+=srcStride; \ | |
651 src1 = *s; s+=srcStride; \ | |
652 src2 = *s; s+=srcStride; \ | |
653 src3 = *s; s+=srcStride; \ | |
654 src4 = *s; s+=srcStride; \ | |
655 OP(*d, (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));d+=dstStride;\ | |
656 src5 = *s; s+=srcStride; \ | |
657 OP(*d, (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));d+=dstStride;\ | |
658 src6 = *s; s+=srcStride; \ | |
659 OP(*d, (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));d+=dstStride;\ | |
660 src7 = *s; s+=srcStride; \ | |
661 OP(*d, (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));d+=dstStride;\ | |
662 src8 = *s; \ | |
663 OP(*d, (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));d+=dstStride;\ | |
664 OP(*d, (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));d+=dstStride;\ | |
665 OP(*d, (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));d+=dstStride;\ | |
666 OP(*d, (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ | |
667 dst++;\ | |
668 src++;\ | |
669 }while(--w);\ | |
670 }\ | |
671 \ | |
672 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ | |
673 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
674 do {\ | |
675 uint8_t *s = src;\ | |
676 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ | |
677 int src9,src10,src11,src12,src13,src14,src15,src16;\ | |
678 src0= *s++;\ | |
679 src1= *s++;\ | |
680 src2= *s++;\ | |
681 src3= *s++;\ | |
682 src4= *s++;\ | |
683 OP(dst[ 0], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ | |
684 src5= *s++;\ | |
685 OP(dst[ 1], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ | |
686 src6= *s++;\ | |
687 OP(dst[ 2], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ | |
688 src7= *s++;\ | |
689 OP(dst[ 3], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ | |
690 src8= *s++;\ | |
691 OP(dst[ 4], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ | |
692 src9= *s++;\ | |
693 OP(dst[ 5], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ | |
694 src10= *s++;\ | |
695 OP(dst[ 6], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ | |
696 src11= *s++;\ | |
697 OP(dst[ 7], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ | |
698 src12= *s++;\ | |
699 OP(dst[ 8], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ | |
700 src13= *s++;\ | |
701 OP(dst[ 9], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ | |
702 src14= *s++;\ | |
703 OP(dst[10], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ | |
704 src15= *s++;\ | |
705 OP(dst[11], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ | |
706 src16= *s++;\ | |
707 OP(dst[12], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ | |
708 OP(dst[13], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ | |
709 OP(dst[14], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ | |
710 OP(dst[15], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ | |
711 dst+=dstStride;\ | |
712 src+=srcStride;\ | |
713 }while(--h);\ | |
714 }\ | |
715 \ | |
716 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
717 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
718 int w=16;\ | |
719 do {\ | |
720 uint8_t *s = src, *d=dst;\ | |
721 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ | |
722 int src9,src10,src11,src12,src13,src14,src15,src16;\ | |
723 src0 = *s; s+=srcStride; \ | |
724 src1 = *s; s+=srcStride; \ | |
725 src2 = *s; s+=srcStride; \ | |
726 src3 = *s; s+=srcStride; \ | |
727 src4 = *s; s+=srcStride; \ | |
728 OP(*d, (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));d+=dstStride;\ | |
729 src5 = *s; s+=srcStride; \ | |
730 OP(*d, (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));d+=dstStride;\ | |
731 src6 = *s; s+=srcStride; \ | |
732 OP(*d, (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));d+=dstStride;\ | |
733 src7 = *s; s+=srcStride; \ | |
734 OP(*d, (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));d+=dstStride;\ | |
735 src8 = *s; s+=srcStride; \ | |
736 OP(*d, (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));d+=dstStride;\ | |
737 src9 = *s; s+=srcStride; \ | |
738 OP(*d, (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));d+=dstStride;\ | |
739 src10 = *s; s+=srcStride; \ | |
740 OP(*d, (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));d+=dstStride;\ | |
741 src11 = *s; s+=srcStride; \ | |
742 OP(*d, (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));d+=dstStride;\ | |
743 src12 = *s; s+=srcStride; \ | |
744 OP(*d, (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));d+=dstStride;\ | |
745 src13 = *s; s+=srcStride; \ | |
746 OP(*d, (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));d+=dstStride;\ | |
747 src14 = *s; s+=srcStride; \ | |
748 OP(*d, (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));d+=dstStride;\ | |
749 src15 = *s; s+=srcStride; \ | |
750 OP(*d, (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));d+=dstStride;\ | |
751 src16 = *s; \ | |
752 OP(*d, (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));d+=dstStride;\ | |
753 OP(*d, (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));d+=dstStride;\ | |
754 OP(*d, (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));d+=dstStride;\ | |
755 OP(*d, (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ | |
756 dst++;\ | |
757 src++;\ | |
758 }while(--w);\ | |
759 }\ | |
760 \ | |
761 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | |
762 OPNAME ## pixels8_c(dst, src, stride, 8);\ | |
763 }\ | |
764 \ | |
765 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | |
766 uint8_t half[64];\ | |
767 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ | |
768 OPNAME ## pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);\ | |
769 }\ | |
770 \ | |
771 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | |
772 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ | |
773 }\ | |
774 \ | |
775 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | |
776 uint8_t half[64];\ | |
777 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ | |
778 OPNAME ## pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);\ | |
779 }\ | |
780 \ | |
781 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | |
782 uint8_t full[16*9];\ | |
783 uint8_t half[64];\ | |
784 copy_block9(full, src, 16, stride, 9);\ | |
785 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ | |
786 OPNAME ## pixels8_l2_aligned(dst, full, half, stride, 16, 8, 8);\ | |
787 }\ | |
788 \ | |
789 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | |
790 uint8_t full[16*9];\ | |
791 copy_block9(full, src, 16, stride, 9);\ | |
792 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ | |
793 }\ | |
794 \ | |
795 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | |
796 uint8_t full[16*9];\ | |
797 uint8_t half[64];\ | |
798 copy_block9(full, src, 16, stride, 9);\ | |
799 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ | |
800 OPNAME ## pixels8_l2_aligned(dst, full+16, half, stride, 16, 8, 8);\ | |
801 }\ | |
802 static void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ | |
803 uint8_t full[16*9];\ | |
804 uint8_t halfH[72];\ | |
805 uint8_t halfV[64];\ | |
806 uint8_t halfHV[64];\ | |
807 copy_block9(full, src, 16, stride, 9);\ | |
808 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
809 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ | |
810 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
811 OPNAME ## pixels8_l4_aligned(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ | |
812 }\ | |
813 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | |
814 uint8_t full[16*9];\ | |
815 uint8_t halfH[72];\ | |
816 uint8_t halfHV[64];\ | |
817 copy_block9(full, src, 16, stride, 9);\ | |
818 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
819 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\ | |
820 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
821 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
822 }\ | |
823 static void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ | |
824 uint8_t full[16*9];\ | |
825 uint8_t halfH[72];\ | |
826 uint8_t halfV[64];\ | |
827 uint8_t halfHV[64];\ | |
828 copy_block9(full, src, 16, stride, 9);\ | |
829 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
830 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ | |
831 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
832 OPNAME ## pixels8_l4_aligned0(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ | |
833 }\ | |
834 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | |
835 uint8_t full[16*9];\ | |
836 uint8_t halfH[72];\ | |
837 uint8_t halfHV[64];\ | |
838 copy_block9(full, src, 16, stride, 9);\ | |
839 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
840 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
841 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
842 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
843 }\ | |
844 static void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ | |
845 uint8_t full[16*9];\ | |
846 uint8_t halfH[72];\ | |
847 uint8_t halfV[64];\ | |
848 uint8_t halfHV[64];\ | |
849 copy_block9(full, src, 16, stride, 9);\ | |
850 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
851 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ | |
852 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
853 OPNAME ## pixels8_l4_aligned(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ | |
854 }\ | |
855 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | |
856 uint8_t full[16*9];\ | |
857 uint8_t halfH[72];\ | |
858 uint8_t halfHV[64];\ | |
859 copy_block9(full, src, 16, stride, 9);\ | |
860 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
861 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\ | |
862 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
863 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
864 }\ | |
865 static void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ | |
866 uint8_t full[16*9];\ | |
867 uint8_t halfH[72];\ | |
868 uint8_t halfV[64];\ | |
869 uint8_t halfHV[64];\ | |
870 copy_block9(full, src, 16, stride, 9);\ | |
871 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ | |
872 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ | |
873 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
874 OPNAME ## pixels8_l4_aligned0(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ | |
875 }\ | |
876 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | |
877 uint8_t full[16*9];\ | |
878 uint8_t halfH[72];\ | |
879 uint8_t halfHV[64];\ | |
880 copy_block9(full, src, 16, stride, 9);\ | |
881 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
882 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
883 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
884 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
885 }\ | |
886 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | |
887 uint8_t halfH[72];\ | |
888 uint8_t halfHV[64];\ | |
889 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ | |
890 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
891 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
892 }\ | |
893 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | |
894 uint8_t halfH[72];\ | |
895 uint8_t halfHV[64];\ | |
896 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ | |
897 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
898 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
899 }\ | |
900 static void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ | |
901 uint8_t full[16*9];\ | |
902 uint8_t halfH[72];\ | |
903 uint8_t halfV[64];\ | |
904 uint8_t halfHV[64];\ | |
905 copy_block9(full, src, 16, stride, 9);\ | |
906 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
907 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ | |
908 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
909 OPNAME ## pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);\ | |
910 }\ | |
911 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | |
912 uint8_t full[16*9];\ | |
913 uint8_t halfH[72];\ | |
914 copy_block9(full, src, 16, stride, 9);\ | |
915 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
916 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\ | |
917 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
918 }\ | |
919 static void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ | |
920 uint8_t full[16*9];\ | |
921 uint8_t halfH[72];\ | |
922 uint8_t halfV[64];\ | |
923 uint8_t halfHV[64];\ | |
924 copy_block9(full, src, 16, stride, 9);\ | |
925 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
926 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ | |
927 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
928 OPNAME ## pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);\ | |
929 }\ | |
930 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | |
931 uint8_t full[16*9];\ | |
932 uint8_t halfH[72];\ | |
933 copy_block9(full, src, 16, stride, 9);\ | |
934 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
935 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
936 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
937 }\ | |
938 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | |
939 uint8_t halfH[72];\ | |
940 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ | |
941 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
942 }\ | |
943 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | |
944 OPNAME ## pixels16_c(dst, src, stride, 16);\ | |
945 }\ | |
946 \ | |
947 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | |
948 uint8_t half[256];\ | |
949 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ | |
950 OPNAME ## pixels16_l2_aligned2(dst, src, half, stride, stride, 16, 16);\ | |
951 }\ | |
952 \ | |
953 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | |
954 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ | |
955 }\ | |
956 \ | |
957 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | |
958 uint8_t half[256];\ | |
959 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ | |
960 OPNAME ## pixels16_l2_aligned2(dst, src+1, half, stride, stride, 16, 16);\ | |
961 }\ | |
962 \ | |
963 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | |
964 uint8_t full[24*17];\ | |
965 uint8_t half[256];\ | |
966 copy_block17(full, src, 24, stride, 17);\ | |
967 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ | |
968 OPNAME ## pixels16_l2_aligned(dst, full, half, stride, 24, 16, 16);\ | |
969 }\ | |
970 \ | |
971 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | |
972 uint8_t full[24*17];\ | |
973 copy_block17(full, src, 24, stride, 17);\ | |
974 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ | |
975 }\ | |
976 \ | |
977 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | |
978 uint8_t full[24*17];\ | |
979 uint8_t half[256];\ | |
980 copy_block17(full, src, 24, stride, 17);\ | |
981 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ | |
982 OPNAME ## pixels16_l2_aligned(dst, full+24, half, stride, 24, 16, 16);\ | |
983 }\ | |
984 static void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ | |
985 uint8_t full[24*17];\ | |
986 uint8_t halfH[272];\ | |
987 uint8_t halfV[256];\ | |
988 uint8_t halfHV[256];\ | |
989 copy_block17(full, src, 24, stride, 17);\ | |
990 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
991 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ | |
992 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
993 OPNAME ## pixels16_l4_aligned(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ | |
994 }\ | |
995 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | |
996 uint8_t full[24*17];\ | |
997 uint8_t halfH[272];\ | |
998 uint8_t halfHV[256];\ | |
999 copy_block17(full, src, 24, stride, 17);\ | |
1000 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1001 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\ | |
1002 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1003 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
1004 }\ | |
1005 static void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1006 uint8_t full[24*17];\ | |
1007 uint8_t halfH[272];\ | |
1008 uint8_t halfV[256];\ | |
1009 uint8_t halfHV[256];\ | |
1010 copy_block17(full, src, 24, stride, 17);\ | |
1011 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1012 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ | |
1013 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1014 OPNAME ## pixels16_l4_aligned0(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ | |
1015 }\ | |
1016 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1017 uint8_t full[24*17];\ | |
1018 uint8_t halfH[272];\ | |
1019 uint8_t halfHV[256];\ | |
1020 copy_block17(full, src, 24, stride, 17);\ | |
1021 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1022 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
1023 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1024 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
1025 }\ | |
1026 static void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1027 uint8_t full[24*17];\ | |
1028 uint8_t halfH[272];\ | |
1029 uint8_t halfV[256];\ | |
1030 uint8_t halfHV[256];\ | |
1031 copy_block17(full, src, 24, stride, 17);\ | |
1032 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1033 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ | |
1034 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1035 OPNAME ## pixels16_l4_aligned(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ | |
1036 }\ | |
1037 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1038 uint8_t full[24*17];\ | |
1039 uint8_t halfH[272];\ | |
1040 uint8_t halfHV[256];\ | |
1041 copy_block17(full, src, 24, stride, 17);\ | |
1042 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1043 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\ | |
1044 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1045 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
1046 }\ | |
1047 static void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1048 uint8_t full[24*17];\ | |
1049 uint8_t halfH[272];\ | |
1050 uint8_t halfV[256];\ | |
1051 uint8_t halfHV[256];\ | |
1052 copy_block17(full, src, 24, stride, 17);\ | |
1053 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ | |
1054 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ | |
1055 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1056 OPNAME ## pixels16_l4_aligned0(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ | |
1057 }\ | |
1058 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1059 uint8_t full[24*17];\ | |
1060 uint8_t halfH[272];\ | |
1061 uint8_t halfHV[256];\ | |
1062 copy_block17(full, src, 24, stride, 17);\ | |
1063 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1064 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
1065 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1066 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
1067 }\ | |
1068 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1069 uint8_t halfH[272];\ | |
1070 uint8_t halfHV[256];\ | |
1071 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ | |
1072 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1073 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
1074 }\ | |
1075 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1076 uint8_t halfH[272];\ | |
1077 uint8_t halfHV[256];\ | |
1078 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ | |
1079 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1080 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
1081 }\ | |
1082 static void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1083 uint8_t full[24*17];\ | |
1084 uint8_t halfH[272];\ | |
1085 uint8_t halfV[256];\ | |
1086 uint8_t halfHV[256];\ | |
1087 copy_block17(full, src, 24, stride, 17);\ | |
1088 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1089 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ | |
1090 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1091 OPNAME ## pixels16_l2_aligned(dst, halfV, halfHV, stride, 16, 16, 16);\ | |
1092 }\ | |
1093 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1094 uint8_t full[24*17];\ | |
1095 uint8_t halfH[272];\ | |
1096 copy_block17(full, src, 24, stride, 17);\ | |
1097 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1098 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\ | |
1099 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
1100 }\ | |
1101 static void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1102 uint8_t full[24*17];\ | |
1103 uint8_t halfH[272];\ | |
1104 uint8_t halfV[256];\ | |
1105 uint8_t halfHV[256];\ | |
1106 copy_block17(full, src, 24, stride, 17);\ | |
1107 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1108 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ | |
1109 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1110 OPNAME ## pixels16_l2_aligned(dst, halfV, halfHV, stride, 16, 16, 16);\ | |
1111 }\ | |
1112 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1113 uint8_t full[24*17];\ | |
1114 uint8_t halfH[272];\ | |
1115 copy_block17(full, src, 24, stride, 17);\ | |
1116 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1117 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
1118 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
1119 }\ | |
1120 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1121 uint8_t halfH[272];\ | |
1122 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ | |
1123 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
1124 } | |
1125 | |
1126 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | |
1127 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) | |
1128 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
1129 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] | |
1130 | |
1131 QPEL_MC(0, put_ , _ , op_put) | |
1132 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) | |
1133 QPEL_MC(0, avg_ , _ , op_avg) | |
1134 //QPEL_MC(1, avg_no_rnd , _ , op_avg) | |
1135 #undef op_avg | |
1136 #undef op_avg_no_rnd | |
1137 #undef op_put | |
1138 #undef op_put_no_rnd | |
1139 | |
1140 #if 1 | |
1141 #define H264_LOWPASS(OPNAME, OP, OP2) \ | |
1142 static inline void OPNAME ## h264_qpel_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\ | |
1143 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1144 do {\ | |
1145 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\ | |
1146 uint8_t *s = src-2;\ | |
1147 srcB = *s++;\ | |
1148 srcA = *s++;\ | |
1149 src0 = *s++;\ | |
1150 src1 = *s++;\ | |
1151 src2 = *s++;\ | |
1152 src3 = *s++;\ | |
1153 OP(dst[0], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
1154 src4 = *s++;\ | |
1155 OP(dst[1], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
1156 src5 = *s++;\ | |
1157 OP(dst[2], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
1158 src6 = *s++;\ | |
1159 OP(dst[3], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
1160 if (w>4) { /* it optimized */ \ | |
1161 int src7,src8,src9,src10; \ | |
1162 src7 = *s++;\ | |
1163 OP(dst[4], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ | |
1164 src8 = *s++;\ | |
1165 OP(dst[5], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ | |
1166 src9 = *s++;\ | |
1167 OP(dst[6], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ | |
1168 src10 = *s++;\ | |
1169 OP(dst[7], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ | |
1170 if (w>8) { \ | |
1171 int src11,src12,src13,src14,src15,src16,src17,src18; \ | |
1172 src11 = *s++;\ | |
1173 OP(dst[8] , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\ | |
1174 src12 = *s++;\ | |
1175 OP(dst[9] , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\ | |
1176 src13 = *s++;\ | |
1177 OP(dst[10], (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\ | |
1178 src14 = *s++;\ | |
1179 OP(dst[11], (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\ | |
1180 src15 = *s++;\ | |
1181 OP(dst[12], (src12+src13)*20 - (src11+src14)*5 + (src10+src15));\ | |
1182 src16 = *s++;\ | |
1183 OP(dst[13], (src13+src14)*20 - (src12+src15)*5 + (src11+src16));\ | |
1184 src17 = *s++;\ | |
1185 OP(dst[14], (src14+src15)*20 - (src13+src16)*5 + (src12+src17));\ | |
1186 src18 = *s++;\ | |
1187 OP(dst[15], (src15+src16)*20 - (src14+src17)*5 + (src13+src18));\ | |
1188 } \ | |
1189 } \ | |
1190 dst+=dstStride;\ | |
1191 src+=srcStride;\ | |
1192 }while(--h);\ | |
1193 }\ | |
1194 \ | |
1195 static inline void OPNAME ## h264_qpel_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\ | |
1196 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1197 do{\ | |
1198 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\ | |
1199 uint8_t *s = src-2*srcStride,*d=dst;\ | |
1200 srcB = *s; s+=srcStride;\ | |
1201 srcA = *s; s+=srcStride;\ | |
1202 src0 = *s; s+=srcStride;\ | |
1203 src1 = *s; s+=srcStride;\ | |
1204 src2 = *s; s+=srcStride;\ | |
1205 src3 = *s; s+=srcStride;\ | |
1206 OP(*d, (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));d+=dstStride;\ | |
1207 src4 = *s; s+=srcStride;\ | |
1208 OP(*d, (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));d+=dstStride;\ | |
1209 src5 = *s; s+=srcStride;\ | |
1210 OP(*d, (src2+src3)*20 - (src1+src4)*5 + (src0+src5));d+=dstStride;\ | |
1211 src6 = *s; s+=srcStride;\ | |
1212 OP(*d, (src3+src4)*20 - (src2+src5)*5 + (src1+src6));d+=dstStride;\ | |
1213 if (h>4) { \ | |
1214 int src7,src8,src9,src10; \ | |
1215 src7 = *s; s+=srcStride;\ | |
1216 OP(*d, (src4+src5)*20 - (src3+src6)*5 + (src2+src7));d+=dstStride;\ | |
1217 src8 = *s; s+=srcStride;\ | |
1218 OP(*d, (src5+src6)*20 - (src4+src7)*5 + (src3+src8));d+=dstStride;\ | |
1219 src9 = *s; s+=srcStride;\ | |
1220 OP(*d, (src6+src7)*20 - (src5+src8)*5 + (src4+src9));d+=dstStride;\ | |
1221 src10 = *s; s+=srcStride;\ | |
1222 OP(*d, (src7+src8)*20 - (src6+src9)*5 + (src5+src10));d+=dstStride;\ | |
1223 if (h>8) { \ | |
1224 int src11,src12,src13,src14,src15,src16,src17,src18; \ | |
1225 src11 = *s; s+=srcStride;\ | |
1226 OP(*d , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));d+=dstStride;\ | |
1227 src12 = *s; s+=srcStride;\ | |
1228 OP(*d , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));d+=dstStride;\ | |
1229 src13 = *s; s+=srcStride;\ | |
1230 OP(*d, (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));d+=dstStride;\ | |
1231 src14 = *s; s+=srcStride;\ | |
1232 OP(*d, (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));d+=dstStride;\ | |
1233 src15 = *s; s+=srcStride;\ | |
1234 OP(*d, (src12+src13)*20 - (src11+src14)*5 + (src10+src15));d+=dstStride;\ | |
1235 src16 = *s; s+=srcStride;\ | |
1236 OP(*d, (src13+src14)*20 - (src12+src15)*5 + (src11+src16));d+=dstStride;\ | |
1237 src17 = *s; s+=srcStride;\ | |
1238 OP(*d, (src14+src15)*20 - (src13+src16)*5 + (src12+src17));d+=dstStride;\ | |
1239 src18 = *s; s+=srcStride;\ | |
1240 OP(*d, (src15+src16)*20 - (src14+src17)*5 + (src13+src18));d+=dstStride;\ | |
1241 } \ | |
1242 } \ | |
1243 dst++;\ | |
1244 src++;\ | |
1245 }while(--w);\ | |
1246 }\ | |
1247 \ | |
1248 static inline void OPNAME ## h264_qpel_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride,int w,int h){\ | |
1249 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1250 int i;\ | |
1251 src -= 2*srcStride;\ | |
1252 i= h+5; \ | |
1253 do {\ | |
1254 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\ | |
1255 uint8_t *s = src-2;\ | |
1256 srcB = *s++;\ | |
1257 srcA = *s++;\ | |
1258 src0 = *s++;\ | |
1259 src1 = *s++;\ | |
1260 src2 = *s++;\ | |
1261 src3 = *s++;\ | |
1262 tmp[0] = ((src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
1263 src4 = *s++;\ | |
1264 tmp[1] = ((src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
1265 src5 = *s++;\ | |
1266 tmp[2] = ((src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
1267 src6 = *s++;\ | |
1268 tmp[3] = ((src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
1269 if (w>4) { /* it optimized */ \ | |
1270 int src7,src8,src9,src10; \ | |
1271 src7 = *s++;\ | |
1272 tmp[4] = ((src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ | |
1273 src8 = *s++;\ | |
1274 tmp[5] = ((src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ | |
1275 src9 = *s++;\ | |
1276 tmp[6] = ((src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ | |
1277 src10 = *s++;\ | |
1278 tmp[7] = ((src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ | |
1279 if (w>8) { \ | |
1280 int src11,src12,src13,src14,src15,src16,src17,src18; \ | |
1281 src11 = *s++;\ | |
1282 tmp[8] = ((src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\ | |
1283 src12 = *s++;\ | |
1284 tmp[9] = ((src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\ | |
1285 src13 = *s++;\ | |
1286 tmp[10] = ((src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\ | |
1287 src14 = *s++;\ | |
1288 tmp[11] = ((src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\ | |
1289 src15 = *s++;\ | |
1290 tmp[12] = ((src12+src13)*20 - (src11+src14)*5 + (src10+src15));\ | |
1291 src16 = *s++;\ | |
1292 tmp[13] = ((src13+src14)*20 - (src12+src15)*5 + (src11+src16));\ | |
1293 src17 = *s++;\ | |
1294 tmp[14] = ((src14+src15)*20 - (src13+src16)*5 + (src12+src17));\ | |
1295 src18 = *s++;\ | |
1296 tmp[15] = ((src15+src16)*20 - (src14+src17)*5 + (src13+src18));\ | |
1297 } \ | |
1298 } \ | |
1299 tmp+=tmpStride;\ | |
1300 src+=srcStride;\ | |
1301 }while(--i);\ | |
1302 tmp -= tmpStride*(h+5-2);\ | |
1303 i = w; \ | |
1304 do {\ | |
1305 int tmpB,tmpA,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6;\ | |
1306 int16_t *s = tmp-2*tmpStride; \ | |
1307 uint8_t *d=dst;\ | |
1308 tmpB = *s; s+=tmpStride;\ | |
1309 tmpA = *s; s+=tmpStride;\ | |
1310 tmp0 = *s; s+=tmpStride;\ | |
1311 tmp1 = *s; s+=tmpStride;\ | |
1312 tmp2 = *s; s+=tmpStride;\ | |
1313 tmp3 = *s; s+=tmpStride;\ | |
1314 OP2(*d, (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));d+=dstStride;\ | |
1315 tmp4 = *s; s+=tmpStride;\ | |
1316 OP2(*d, (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));d+=dstStride;\ | |
1317 tmp5 = *s; s+=tmpStride;\ | |
1318 OP2(*d, (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));d+=dstStride;\ | |
1319 tmp6 = *s; s+=tmpStride;\ | |
1320 OP2(*d, (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));d+=dstStride;\ | |
1321 if (h>4) { \ | |
1322 int tmp7,tmp8,tmp9,tmp10; \ | |
1323 tmp7 = *s; s+=tmpStride;\ | |
1324 OP2(*d, (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));d+=dstStride;\ | |
1325 tmp8 = *s; s+=tmpStride;\ | |
1326 OP2(*d, (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));d+=dstStride;\ | |
1327 tmp9 = *s; s+=tmpStride;\ | |
1328 OP2(*d, (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));d+=dstStride;\ | |
1329 tmp10 = *s; s+=tmpStride;\ | |
1330 OP2(*d, (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));d+=dstStride;\ | |
1331 if (h>8) { \ | |
1332 int tmp11,tmp12,tmp13,tmp14,tmp15,tmp16,tmp17,tmp18; \ | |
1333 tmp11 = *s; s+=tmpStride;\ | |
1334 OP2(*d , (tmp8 +tmp9 )*20 - (tmp7 +tmp10)*5 + (tmp6 +tmp11));d+=dstStride;\ | |
1335 tmp12 = *s; s+=tmpStride;\ | |
1336 OP2(*d , (tmp9 +tmp10)*20 - (tmp8 +tmp11)*5 + (tmp7 +tmp12));d+=dstStride;\ | |
1337 tmp13 = *s; s+=tmpStride;\ | |
1338 OP2(*d, (tmp10+tmp11)*20 - (tmp9 +tmp12)*5 + (tmp8 +tmp13));d+=dstStride;\ | |
1339 tmp14 = *s; s+=tmpStride;\ | |
1340 OP2(*d, (tmp11+tmp12)*20 - (tmp10+tmp13)*5 + (tmp9 +tmp14));d+=dstStride;\ | |
1341 tmp15 = *s; s+=tmpStride;\ | |
1342 OP2(*d, (tmp12+tmp13)*20 - (tmp11+tmp14)*5 + (tmp10+tmp15));d+=dstStride;\ | |
1343 tmp16 = *s; s+=tmpStride;\ | |
1344 OP2(*d, (tmp13+tmp14)*20 - (tmp12+tmp15)*5 + (tmp11+tmp16));d+=dstStride;\ | |
1345 tmp17 = *s; s+=tmpStride;\ | |
1346 OP2(*d, (tmp14+tmp15)*20 - (tmp13+tmp16)*5 + (tmp12+tmp17));d+=dstStride;\ | |
1347 tmp18 = *s; s+=tmpStride;\ | |
1348 OP2(*d, (tmp15+tmp16)*20 - (tmp14+tmp17)*5 + (tmp13+tmp18));d+=dstStride;\ | |
1349 } \ | |
1350 } \ | |
1351 dst++;\ | |
1352 tmp++;\ | |
1353 }while(--i);\ | |
1354 }\ | |
1355 \ | |
1356 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1357 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,4,4); \ | |
1358 }\ | |
1359 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1360 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,8,8); \ | |
1361 }\ | |
1362 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1363 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,16,16); \ | |
1364 }\ | |
1365 \ | |
1366 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1367 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,4,4); \ | |
1368 }\ | |
1369 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1370 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,8,8); \ | |
1371 }\ | |
1372 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1373 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,16,16); \ | |
1374 }\ | |
1375 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
1376 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,4,4); \ | |
1377 }\ | |
1378 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
1379 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,8,8); \ | |
1380 }\ | |
1381 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
1382 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,16,16); \ | |
1383 }\ | |
1384 | |
1385 #define H264_MC(OPNAME, SIZE) \ | |
1386 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | |
1387 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ | |
1388 }\ | |
1389 \ | |
1390 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1391 uint8_t half[SIZE*SIZE];\ | |
1392 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
1393 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src, half, stride, stride, SIZE, SIZE);\ | |
1394 }\ | |
1395 \ | |
1396 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1397 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ | |
1398 }\ | |
1399 \ | |
1400 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1401 uint8_t half[SIZE*SIZE];\ | |
1402 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
1403 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src+1, half, stride, stride, SIZE, SIZE);\ | |
1404 }\ | |
1405 \ | |
1406 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1407 uint8_t full[SIZE*(SIZE+5)];\ | |
1408 uint8_t * const full_mid= full + SIZE*2;\ | |
1409 uint8_t half[SIZE*SIZE];\ | |
1410 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
1411 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
1412 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ | |
1413 }\ | |
1414 \ | |
1415 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1416 uint8_t full[SIZE*(SIZE+5)];\ | |
1417 uint8_t * const full_mid= full + SIZE*2;\ | |
1418 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
1419 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ | |
1420 }\ | |
1421 \ | |
1422 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1423 uint8_t full[SIZE*(SIZE+5)];\ | |
1424 uint8_t * const full_mid= full + SIZE*2;\ | |
1425 uint8_t half[SIZE*SIZE];\ | |
1426 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
1427 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
1428 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ | |
1429 }\ | |
1430 \ | |
1431 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1432 uint8_t full[SIZE*(SIZE+5)];\ | |
1433 uint8_t * const full_mid= full + SIZE*2;\ | |
1434 uint8_t halfH[SIZE*SIZE];\ | |
1435 uint8_t halfV[SIZE*SIZE];\ | |
1436 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
1437 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
1438 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
1439 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
1440 }\ | |
1441 \ | |
1442 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1443 uint8_t full[SIZE*(SIZE+5)];\ | |
1444 uint8_t * const full_mid= full + SIZE*2;\ | |
1445 uint8_t halfH[SIZE*SIZE];\ | |
1446 uint8_t halfV[SIZE*SIZE];\ | |
1447 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
1448 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
1449 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
1450 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
1451 }\ | |
1452 \ | |
1453 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1454 uint8_t full[SIZE*(SIZE+5)];\ | |
1455 uint8_t * const full_mid= full + SIZE*2;\ | |
1456 uint8_t halfH[SIZE*SIZE];\ | |
1457 uint8_t halfV[SIZE*SIZE];\ | |
1458 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
1459 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
1460 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
1461 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
1462 }\ | |
1463 \ | |
1464 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1465 uint8_t full[SIZE*(SIZE+5)];\ | |
1466 uint8_t * const full_mid= full + SIZE*2;\ | |
1467 uint8_t halfH[SIZE*SIZE];\ | |
1468 uint8_t halfV[SIZE*SIZE];\ | |
1469 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
1470 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
1471 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
1472 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
1473 }\ | |
1474 \ | |
1475 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1476 int16_t tmp[SIZE*(SIZE+5)];\ | |
1477 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ | |
1478 }\ | |
1479 \ | |
1480 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1481 int16_t tmp[SIZE*(SIZE+5)];\ | |
1482 uint8_t halfH[SIZE*SIZE];\ | |
1483 uint8_t halfHV[SIZE*SIZE];\ | |
1484 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
1485 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
1486 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
1487 }\ | |
1488 \ | |
1489 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1490 int16_t tmp[SIZE*(SIZE+5)];\ | |
1491 uint8_t halfH[SIZE*SIZE];\ | |
1492 uint8_t halfHV[SIZE*SIZE];\ | |
1493 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
1494 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
1495 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
1496 }\ | |
1497 \ | |
1498 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1499 uint8_t full[SIZE*(SIZE+5)];\ | |
1500 uint8_t * const full_mid= full + SIZE*2;\ | |
1501 int16_t tmp[SIZE*(SIZE+5)];\ | |
1502 uint8_t halfV[SIZE*SIZE];\ | |
1503 uint8_t halfHV[SIZE*SIZE];\ | |
1504 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
1505 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
1506 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
1507 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
1508 }\ | |
1509 \ | |
1510 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | |
1511 uint8_t full[SIZE*(SIZE+5)];\ | |
1512 uint8_t * const full_mid= full + SIZE*2;\ | |
1513 int16_t tmp[SIZE*(SIZE+5)];\ | |
1514 uint8_t halfV[SIZE*SIZE];\ | |
1515 uint8_t halfHV[SIZE*SIZE];\ | |
1516 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
1517 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
1518 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
1519 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
1520 }\ | |
1521 | |
1522 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | |
1523 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) | |
1524 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
1525 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) | |
1526 #define op2_put(a, b) a = cm[((b) + 512)>>10] | |
1527 | |
1528 H264_LOWPASS(put_ , op_put, op2_put) | |
1529 H264_LOWPASS(avg_ , op_avg, op2_avg) | |
1530 H264_MC(put_, 4) | |
1531 H264_MC(put_, 8) | |
1532 H264_MC(put_, 16) | |
1533 H264_MC(avg_, 4) | |
1534 H264_MC(avg_, 8) | |
1535 H264_MC(avg_, 16) | |
1536 | |
1537 #undef op_avg | |
1538 #undef op_put | |
1539 #undef op2_avg | |
1540 #undef op2_put | |
1541 #endif | |
1542 | |
1543 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ | |
1544 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
1545 | |
1546 do{ | |
1547 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9; | |
1548 uint8_t *s = src; | |
1549 src_1 = s[-1]; | |
1550 src0 = *s++; | |
1551 src1 = *s++; | |
1552 src2 = *s++; | |
1553 dst[0]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; | |
1554 src3 = *s++; | |
1555 dst[1]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; | |
1556 src4 = *s++; | |
1557 dst[2]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; | |
1558 src5 = *s++; | |
1559 dst[3]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; | |
1560 src6 = *s++; | |
1561 dst[4]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; | |
1562 src7 = *s++; | |
1563 dst[5]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; | |
1564 src8 = *s++; | |
1565 dst[6]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; | |
1566 src9 = *s++; | |
1567 dst[7]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; | |
1568 dst+=dstStride; | |
1569 src+=srcStride; | |
1570 }while(--h); | |
1571 } | |
1572 | |
1573 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ | |
1574 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
1575 | |
1576 do{ | |
1577 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9; | |
1578 uint8_t *s = src,*d = dst; | |
1579 src_1 = *(s-srcStride); | |
1580 src0 = *s; s+=srcStride; | |
1581 src1 = *s; s+=srcStride; | |
1582 src2 = *s; s+=srcStride; | |
1583 *d= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; d+=dstStride; | |
1584 src3 = *s; s+=srcStride; | |
1585 *d= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; d+=dstStride; | |
1586 src4 = *s; s+=srcStride; | |
1587 *d= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; d+=dstStride; | |
1588 src5 = *s; s+=srcStride; | |
1589 *d= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; d+=dstStride; | |
1590 src6 = *s; s+=srcStride; | |
1591 *d= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; d+=dstStride; | |
1592 src7 = *s; s+=srcStride; | |
1593 *d= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; d+=dstStride; | |
1594 src8 = *s; s+=srcStride; | |
1595 *d= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; d+=dstStride; | |
1596 src9 = *s; | |
1597 *d= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; d+=dstStride; | |
1598 src++; | |
1599 dst++; | |
1600 }while(--w); | |
1601 } | |
1602 | |
1603 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ | |
1604 put_pixels8_c(dst, src, stride, 8); | |
1605 } | |
1606 | |
1607 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ | |
1608 uint8_t half[64]; | |
1609 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
1610 put_pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8); | |
1611 } | |
1612 | |
1613 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ | |
1614 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); | |
1615 } | |
1616 | |
1617 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ | |
1618 uint8_t half[64]; | |
1619 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
1620 put_pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8); | |
1621 } | |
1622 | |
1623 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ | |
1624 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); | |
1625 } | |
1626 | |
1627 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ | |
1628 uint8_t halfH[88]; | |
1629 uint8_t halfV[64]; | |
1630 uint8_t halfHV[64]; | |
1631 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
1632 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); | |
1633 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
1634 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8); | |
1635 } | |
1636 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ | |
1637 uint8_t halfH[88]; | |
1638 uint8_t halfV[64]; | |
1639 uint8_t halfHV[64]; | |
1640 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
1641 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); | |
1642 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
1643 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8); | |
1644 } | |
1645 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ | |
1646 uint8_t halfH[88]; | |
1647 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
1648 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); | |
1649 } |