Mercurial > mplayer.hg
comparison libmpeg2/libmpeg2_changes.diff @ 27573:1645692c116e
Sync diff with libmpeg2 update.
author | diego |
---|---|
date | Sat, 13 Sep 2008 17:33:04 +0000 |
parents | 7c3d535aba22 |
children | 83d915449a10 |
comparison
equal
deleted
inserted
replaced
27572:da2271c341ee | 27573:1645692c116e |
---|---|
1 --- libmpeg2/cpu_accel.c 2006-06-16 20:12:26.000000000 +0200 | 1 --- libmpeg2/cpu_accel.c 2006-06-16 20:12:26.000000000 +0200 |
2 +++ libmpeg2/cpu_accel.c 2006-06-16 20:12:50.000000000 +0200 | 2 +++ libmpeg2/cpu_accel.c 2006-06-16 20:12:50.000000000 +0200 |
3 @@ -22,6 +26,7 @@ | 3 @@ -29,9 +33,13 @@ |
4 */ | |
5 | |
6 #include "config.h" | |
7 +#include "cpudetect.h" | |
8 | |
9 #include <inttypes.h> | |
10 | |
11 @@ -30,9 +35,17 @@ | |
12 #include "mpeg2_internal.h" | |
13 | |
14 #ifdef ACCEL_DETECT | |
15 -#ifdef ARCH_X86 | |
16 +#if defined(ARCH_X86) || defined(ARCH_X86_64) | |
17 + | |
18 +/* MPlayer imports libmpeg2 as decoder, which detects MMX / 3DNow! | |
19 + * instructions via assembly. However, it is regarded as duplicated work | |
20 + * in MPlayer, so that we enforce using MPlayer's implementation. | |
21 + */ | |
22 +#define MPLAYER_CPUDETECT | |
23 + | |
24 static inline uint32_t arch_accel (void) | |
25 { | |
26 +#if !defined(MPLAYER_CPUDETECT) | |
27 uint32_t eax, ebx, ecx, edx; | |
28 int AMD; | |
29 uint32_t caps; | |
30 @@ -107,8 +120,22 @@ | |
31 caps |= MPEG2_ACCEL_X86_MMXEXT; | |
32 | |
33 return caps; | |
34 +#else /* MPLAYER_CPUDETECT: Use MPlayer's CPU capability property. */ | |
35 + caps = 0; | |
36 + if (gCpuCaps.hasMMX) | |
37 + caps |= MPEG2_ACCEL_X86_MMX; | |
38 + if (gCpuCaps.hasSSE2) | |
39 + caps |= MPEG2_ACCEL_X86_SSE2; | |
40 + if (gCpuCaps.hasMMX2) | |
41 + caps |= MPEG2_ACCEL_X86_MMXEXT; | |
42 + if (gCpuCaps.has3DNow) | |
43 + caps |= MPEG2_ACCEL_X86_3DNOW; | |
44 + | |
45 + return caps; | |
46 + | |
47 +#endif /* MPLAYER_CPUDETECT */ | |
48 } | |
49 -#endif /* ARCH_X86 */ | |
50 +#endif /* ARCH_X86 || ARCH_X86_64 */ | |
51 | |
52 #if defined(ARCH_PPC) || defined(ARCH_SPARC) | |
53 #include <signal.h> | |
54 @@ -214,7 +241,7 @@ | |
55 | |
56 accel = 0; | |
57 #ifdef ACCEL_DETECT | |
58 -#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) | |
59 +#if defined (ARCH_X86) || defined (ARCH_X86_64) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) | |
60 accel = arch_accel (); | |
61 #endif | |
62 #endif | |
63 --- libmpeg2/cpu_state.c 2006-06-16 20:12:26.000000000 +0200 | |
64 +++ libmpeg2/cpu_state.c 2006-06-16 20:12:50.000000000 +0200 | |
65 @@ -29,14 +33,14 @@ | |
66 #include "mpeg2.h" | |
67 #include "attributes.h" | 4 #include "attributes.h" |
68 #include "mpeg2_internal.h" | 5 #include "mpeg2_internal.h" |
69 -#ifdef ARCH_X86 | 6 |
70 +#if defined(ARCH_X86) || defined(ARCH_X86_64) | 7 +#include "cpudetect.h" |
71 #include "mmx.h" | 8 + |
72 #endif | 9 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
73 | 10 static inline uint32_t arch_accel (uint32_t accel) |
74 void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL; | |
75 void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL; | |
76 | |
77 -#ifdef ARCH_X86 | |
78 +#if defined(ARCH_X86) || defined(ARCH_X86_64) | |
79 static void state_restore_mmx (cpu_state_t * state) | |
80 { | 11 { |
81 emms (); | 12 +/* Use MPlayer CPU detection instead of libmpeg2 variant. */ |
82 @@ -44,18 +48,18 @@ | 13 +#if 0 |
83 #endif | 14 if (accel & (MPEG2_ACCEL_X86_3DNOW | MPEG2_ACCEL_X86_MMXEXT)) |
84 | 15 accel |= MPEG2_ACCEL_X86_MMX; |
85 #ifdef ARCH_PPC | 16 |
86 -#ifdef HAVE_ALTIVEC_H /* gnu */ | 17 @@ -124,6 +132,21 @@ |
87 -#define LI(a,b) "li " #a "," #b "\n\t" | 18 #endif /* ACCEL_DETECT */ |
88 -#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" | 19 |
89 -#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t" | 20 return accel; |
90 -#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t" | 21 + |
91 -#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t" | 22 +#else /* 0 */ |
92 -#else /* apple */ | 23 + accel = 0; |
93 +#if defined(__APPLE_CC__) /* apple */ | 24 + if (gCpuCaps.hasMMX) |
94 #define LI(a,b) "li r" #a "," #b "\n\t" | 25 + accel |= MPEG2_ACCEL_X86_MMX; |
95 #define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t" | 26 + if (gCpuCaps.hasSSE2) |
96 #define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t" | 27 + accel |= MPEG2_ACCEL_X86_SSE2; |
97 #define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t" | 28 + if (gCpuCaps.hasMMX2) |
98 #define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t" | 29 + accel |= MPEG2_ACCEL_X86_MMXEXT; |
99 +#else /* gnu */ | 30 + if (gCpuCaps.has3DNow) |
100 +#define LI(a,b) "li " #a "," #b "\n\t" | 31 + accel |= MPEG2_ACCEL_X86_3DNOW; |
101 +#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" | 32 + |
102 +#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t" | 33 + return accel; |
103 +#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t" | 34 + |
104 +#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t" | 35 +#endif /* 0 */ |
105 #endif | 36 } |
106 | 37 #endif /* ARCH_X86 || ARCH_X86_64 */ |
107 static void state_save_altivec (cpu_state_t * state) | 38 |
108 @@ -115,7 +119,7 @@ | |
109 | |
110 void mpeg2_cpu_state_init (uint32_t accel) | |
111 { | |
112 -#ifdef ARCH_X86 | |
113 +#if defined(ARCH_X86) || defined(ARCH_X86_64) | |
114 if (accel & MPEG2_ACCEL_X86_MMX) { | |
115 mpeg2_cpu_state_restore = state_restore_mmx; | |
116 } | |
117 --- libmpeg2/decode.c 2006-06-16 20:12:26.000000000 +0200 | 39 --- libmpeg2/decode.c 2006-06-16 20:12:26.000000000 +0200 |
118 +++ libmpeg2/decode.c 2006-06-16 20:12:50.000000000 +0200 | 40 +++ libmpeg2/decode.c 2006-06-16 20:12:50.000000000 +0200 |
119 @@ -351,6 +355,15 @@ | 41 @@ -345,6 +349,15 @@ |
120 fbuf->buf[1] = buf[1]; | 42 fbuf->buf[1] = buf[1]; |
121 fbuf->buf[2] = buf[2]; | 43 fbuf->buf[2] = buf[2]; |
122 fbuf->id = id; | 44 fbuf->id = id; |
123 + // HACK! FIXME! At first I frame, copy pointers to prediction frame too! | 45 + // HACK! FIXME! At first I frame, copy pointers to prediction frame too! |
124 + if (mpeg2dec->custom_fbuf && !mpeg2dec->fbuf[1]->buf[0]){ | 46 + if (mpeg2dec->custom_fbuf && !mpeg2dec->fbuf[1]->buf[0]){ |
125 + mpeg2dec->fbuf[1]->buf[0]=buf[0]; | 47 + mpeg2dec->fbuf[1]->buf[0]=buf[0]; |
126 + mpeg2dec->fbuf[1]->buf[1]=buf[1]; | 48 + mpeg2dec->fbuf[1]->buf[1]=buf[1]; |
127 + mpeg2dec->fbuf[1]->buf[2]=buf[2]; | 49 + mpeg2dec->fbuf[1]->buf[2]=buf[2]; |
128 + mpeg2dec->fbuf[1]->id=NULL; | 50 + mpeg2dec->fbuf[1]->id=NULL; |
129 + } | 51 + } |
130 +// printf("libmpeg2: FBUF 0:%p 1:%p 2:%p\n", | 52 +// printf("libmpeg2: FBUF 0:%p 1:%p 2:%p\n", |
131 +// mpeg2dec->fbuf[0]->buf[0],mpeg2dec->fbuf[1]->buf[0],mpeg2dec->fbuf[2]->buf[0]); | 53 +// mpeg2dec->fbuf[0]->buf[0],mpeg2dec->fbuf[1]->buf[0],mpeg2dec->fbuf[2]->buf[0]); |
132 } | 54 } |
133 | 55 |
134 void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf) | 56 void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf) |
135 --- libmpeg2/header.c 2006-06-16 20:12:26.000000000 +0200 | 57 --- libmpeg2/header.c 2006-06-16 20:12:26.000000000 +0200 |
136 +++ libmpeg2/header.c 2006-06-16 20:12:50.000000000 +0200 | 58 +++ libmpeg2/header.c 2006-06-16 20:12:50.000000000 +0200 |
142 + memset(&mpeg2dec->fbuf_alloc[1].fbuf, 0, sizeof(mpeg2_fbuf_t)); | 64 + memset(&mpeg2dec->fbuf_alloc[1].fbuf, 0, sizeof(mpeg2_fbuf_t)); |
143 + memset(&mpeg2dec->fbuf_alloc[2].fbuf, 0, sizeof(mpeg2_fbuf_t)); | 65 + memset(&mpeg2dec->fbuf_alloc[2].fbuf, 0, sizeof(mpeg2_fbuf_t)); |
144 mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf; | 66 mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf; |
145 mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf; | 67 mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf; |
146 mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf; | 68 mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf; |
147 @@ -553,6 +560,7 @@ | 69 @@ -872,6 +879,7 @@ |
148 if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) { | 70 mpeg2dec->scaled[idx] = decoder->q_scale_type; |
149 picture->nb_fields = (buffer[3] & 2) ? 3 : 2; | |
150 flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0; | |
151 + flags |= (buffer[3] & 2) ? PIC_FLAG_REPEAT_FIRST_FIELD : 0; | |
152 } else | |
153 picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2; | |
154 break; | |
155 @@ -801,6 +809,7 @@ | |
156 mpeg2dec->scaled[index] = mpeg2dec->q_scale_type; | |
157 for (i = 0; i < 32; i++) { | 71 for (i = 0; i < 32; i++) { |
158 k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1); | 72 k = decoder->q_scale_type ? non_linear_scale[i] : (i << 1); |
159 + decoder->quantizer_scales[i] = k; | 73 + decoder->quantizer_scales[i] = k; |
160 for (j = 0; j < 64; j++) | 74 for (j = 0; j < 64; j++) |
161 decoder->quantizer_prescale[index][i][j] = | 75 decoder->quantizer_prescale[idx][i][j] = |
162 k * mpeg2dec->quantizer_matrix[index][j]; | 76 k * mpeg2dec->quantizer_matrix[idx][j]; |
163 --- libmpeg2/idct.c (revision 26652) | 77 --- libmpeg2/idct.c (revision 26652) |
164 +++ libmpeg2/idct.c (working copy) | 78 +++ libmpeg2/idct.c (working copy) |
165 @@ -235,30 +239,40 @@ | 79 @@ -235,34 +239,40 @@ |
166 | 80 |
167 void mpeg2_idct_init (uint32_t accel) | 81 void mpeg2_idct_init (uint32_t accel) |
168 { | 82 { |
169 -#ifdef ARCH_X86 | 83 -#ifdef ARCH_X86 |
170 +#ifdef HAVE_SSE2 | 84 +#ifdef HAVE_SSE2 |
171 + if (accel & MPEG2_ACCEL_X86_SSE2) { | 85 if (accel & MPEG2_ACCEL_X86_SSE2) { |
172 + mpeg2_idct_copy = mpeg2_idct_copy_sse2; | 86 mpeg2_idct_copy = mpeg2_idct_copy_sse2; |
173 + mpeg2_idct_add = mpeg2_idct_add_sse2; | 87 mpeg2_idct_add = mpeg2_idct_add_sse2; |
174 + mpeg2_idct_mmx_init (); | 88 mpeg2_idct_mmx_init (); |
89 - } else if (accel & MPEG2_ACCEL_X86_MMXEXT) { | |
175 + } else | 90 + } else |
176 +#elif HAVE_MMX2 | 91 +#elif HAVE_MMX2 |
177 if (accel & MPEG2_ACCEL_X86_MMXEXT) { | 92 + if (accel & MPEG2_ACCEL_X86_MMXEXT) { |
178 mpeg2_idct_copy = mpeg2_idct_copy_mmxext; | 93 mpeg2_idct_copy = mpeg2_idct_copy_mmxext; |
179 mpeg2_idct_add = mpeg2_idct_add_mmxext; | 94 mpeg2_idct_add = mpeg2_idct_add_mmxext; |
180 mpeg2_idct_mmx_init (); | 95 mpeg2_idct_mmx_init (); |
181 - } else if (accel & MPEG2_ACCEL_X86_MMX) { | 96 - } else if (accel & MPEG2_ACCEL_X86_MMX) { |
182 + } else | 97 + } else |
206 +#elif ARCH_ALPHA | 121 +#elif ARCH_ALPHA |
207 + if (accel & MPEG2_ACCEL_ALPHA) { | 122 + if (accel & MPEG2_ACCEL_ALPHA) { |
208 int i; | 123 int i; |
209 | 124 |
210 mpeg2_idct_copy = mpeg2_idct_copy_alpha; | 125 mpeg2_idct_copy = mpeg2_idct_copy_alpha; |
211 --- libmpeg2/idct_mmx.c 2006-06-16 20:12:26.000000000 +0200 | |
212 +++ libmpeg2/idct_mmx.c 2006-06-16 20:12:50.000000000 +0200 | |
213 @@ -23,7 +27,7 @@ | |
214 | |
215 #include "config.h" | |
216 | |
217 -#ifdef ARCH_X86 | |
218 +#if defined(ARCH_X86) || defined(ARCH_X86_64) | |
219 | |
220 #include <inttypes.h> | |
221 | |
222 --- libmpeg2/motion_comp.c 2006-06-16 20:12:26.000000000 +0200 | 126 --- libmpeg2/motion_comp.c 2006-06-16 20:12:26.000000000 +0200 |
223 +++ libmpeg2/motion_comp.c 2006-06-16 20:12:50.000000000 +0200 | 127 +++ libmpeg2/motion_comp.c 2006-06-16 20:12:50.000000000 +0200 |
224 @@ -33,16 +37,22 @@ | 128 @@ -33,16 +37,22 @@ |
225 | 129 |
226 void mpeg2_mc_init (uint32_t accel) | 130 void mpeg2_mc_init (uint32_t accel) |
246 -#ifdef ARCH_PPC | 150 -#ifdef ARCH_PPC |
247 +#ifdef HAVE_ALTIVEC | 151 +#ifdef HAVE_ALTIVEC |
248 if (accel & MPEG2_ACCEL_PPC_ALTIVEC) | 152 if (accel & MPEG2_ACCEL_PPC_ALTIVEC) |
249 mpeg2_mc = mpeg2_mc_altivec; | 153 mpeg2_mc = mpeg2_mc_altivec; |
250 else | 154 else |
251 @@ -52,11 +62,21 @@ | 155 @@ -52,15 +62,20 @@ |
252 mpeg2_mc = mpeg2_mc_alpha; | 156 mpeg2_mc = mpeg2_mc_alpha; |
253 else | 157 else |
254 #endif | 158 #endif |
255 -#ifdef ARCH_SPARC | 159 -#ifdef ARCH_SPARC |
256 +#ifdef HAVE_VIS | 160 +#ifdef HAVE_VIS |
257 if (accel & MPEG2_ACCEL_SPARC_VIS) | 161 if (accel & MPEG2_ACCEL_SPARC_VIS) |
258 mpeg2_mc = mpeg2_mc_vis; | 162 mpeg2_mc = mpeg2_mc_vis; |
259 else | 163 else |
260 #endif | 164 #endif |
261 +#ifdef ARCH_ARM | 165 #ifdef ARCH_ARM |
166 - if (accel & MPEG2_ACCEL_ARM) { | |
262 +#ifdef HAVE_IWMMXT | 167 +#ifdef HAVE_IWMMXT |
263 + if (accel & MPEG2_ACCEL_ARM_IWMMXT) | 168 + if (accel & MPEG2_ACCEL_ARM_IWMMXT) |
264 + mpeg2_mc = mpeg2_mc_iwmmxt; | 169 + mpeg2_mc = mpeg2_mc_iwmmxt; |
265 + else | 170 + else |
266 +#endif | 171 +#endif |
267 + if (accel & MPEG2_ACCEL_ARM) | 172 + if (accel & MPEG2_ACCEL_ARM) |
268 + mpeg2_mc = mpeg2_mc_arm; | 173 mpeg2_mc = mpeg2_mc_arm; |
174 - } else | |
269 + else | 175 + else |
270 +#endif | 176 #endif |
271 mpeg2_mc = mpeg2_mc_c; | 177 mpeg2_mc = mpeg2_mc_c; |
272 } | 178 } |
273 | |
274 --- libmpeg2/motion_comp_mmx.c 2006-06-16 20:12:26.000000000 +0200 | |
275 +++ libmpeg2/motion_comp_mmx.c 2006-06-16 20:12:50.000000000 +0200 | |
276 @@ -23,7 +27,7 @@ | |
277 | |
278 #include "config.h" | |
279 | |
280 -#ifdef ARCH_X86 | |
281 +#if defined(ARCH_X86) || defined(ARCH_X86_64) | |
282 | |
283 #include <inttypes.h> | |
284 | |
285 --- include/mpeg2.h 2006-06-16 20:12:26.000000000 +0200 | 179 --- include/mpeg2.h 2006-06-16 20:12:26.000000000 +0200 |
286 +++ libmpeg2/mpeg2.h 2006-06-16 20:12:50.000000000 +0200 | 180 +++ libmpeg2/mpeg2.h 2006-06-16 20:12:50.000000000 +0200 |
287 @@ -82,6 +86,7 @@ | 181 @@ -164,6 +168,7 @@ |
288 #define PIC_FLAG_COMPOSITE_DISPLAY 32 | |
289 #define PIC_FLAG_SKIP 64 | |
290 #define PIC_FLAG_TAGS 128 | |
291 +#define PIC_FLAG_REPEAT_FIRST_FIELD 256 | |
292 #define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000 | |
293 | |
294 typedef struct mpeg2_picture_s { | |
295 @@ -154,11 +159,14 @@ | |
296 #define MPEG2_ACCEL_X86_MMX 1 | |
297 #define MPEG2_ACCEL_X86_3DNOW 2 | |
298 #define MPEG2_ACCEL_X86_MMXEXT 4 | |
299 +#define MPEG2_ACCEL_X86_SSE2 8 | |
300 #define MPEG2_ACCEL_PPC_ALTIVEC 1 | |
301 #define MPEG2_ACCEL_ALPHA 1 | |
302 #define MPEG2_ACCEL_ALPHA_MVI 2 | |
303 #define MPEG2_ACCEL_SPARC_VIS 1 | 182 #define MPEG2_ACCEL_SPARC_VIS 1 |
304 #define MPEG2_ACCEL_SPARC_VIS2 2 | 183 #define MPEG2_ACCEL_SPARC_VIS2 2 |
305 +#define MPEG2_ACCEL_ARM 1 | 184 #define MPEG2_ACCEL_ARM 1 |
306 +#define MPEG2_ACCEL_ARM_IWMMXT 2 | 185 +#define MPEG2_ACCEL_ARM_IWMMXT 2 |
307 #define MPEG2_ACCEL_DETECT 0x80000000 | 186 #define MPEG2_ACCEL_DETECT 0x80000000 |
308 | 187 |
309 uint32_t mpeg2_accel (uint32_t accel); | 188 uint32_t mpeg2_accel (uint32_t accel); |
310 --- libmpeg2/mpeg2_internal.h 2006-06-16 20:12:26.000000000 +0200 | 189 --- libmpeg2/mpeg2_internal.h 2006-06-16 20:12:26.000000000 +0200 |
311 +++ libmpeg2/mpeg2_internal.h 2006-06-16 20:12:50.000000000 +0200 | 190 +++ libmpeg2/mpeg2_internal.h 2006-06-16 20:12:50.000000000 +0200 |
312 @@ -144,6 +148,11 @@ | 191 @@ -152,6 +156,11 @@ |
313 int second_field; | 192 |
314 | 193 /* XXX: stuff due to xine shit */ |
315 int mpeg1; | 194 int8_t q_scale_type; |
316 + | 195 + |
317 + int quantizer_scales[32]; | 196 + int quantizer_scales[32]; |
318 + int quantizer_scale; | 197 + int quantizer_scale; |
319 + char* quant_store; | 198 + char* quant_store; |
320 + int quant_stride; | 199 + int quant_stride; |
321 }; | 200 }; |
322 | 201 |
323 typedef struct { | 202 typedef struct { |
324 @@ -214,6 +223,9 @@ | 203 @@ -223,6 +232,9 @@ |
325 int8_t q_scale_type, scaled[4]; | 204 //int8_t q_scale_type, scaled[4]; |
326 uint8_t quantizer_matrix[4][64]; | 205 uint8_t quantizer_matrix[4][64]; |
327 uint8_t new_quantizer_matrix[4][64]; | 206 uint8_t new_quantizer_matrix[4][64]; |
328 + | 207 + |
329 + unsigned char *pending_buffer; | 208 + unsigned char *pending_buffer; |
330 + int pending_length; | 209 + int pending_length; |
331 }; | 210 }; |
332 | 211 |
333 typedef struct { | 212 typedef struct { |
334 @@ -298,3 +313,5 @@ | 213 @@ -313,5 +325,6 @@ |
335 extern mpeg2_mc_t mpeg2_mc_altivec; | |
336 extern mpeg2_mc_t mpeg2_mc_alpha; | 214 extern mpeg2_mc_t mpeg2_mc_alpha; |
337 extern mpeg2_mc_t mpeg2_mc_vis; | 215 extern mpeg2_mc_t mpeg2_mc_vis; |
338 +extern mpeg2_mc_t mpeg2_mc_arm; | 216 extern mpeg2_mc_t mpeg2_mc_arm; |
339 +extern mpeg2_mc_t mpeg2_mc_iwmmxt; | 217 +extern mpeg2_mc_t mpeg2_mc_iwmmxt; |
218 | |
219 #endif /* LIBMPEG2_MPEG2_INTERNAL_H */ | |
340 --- libmpeg2/slice.c 2006-06-16 20:12:26.000000000 +0200 | 220 --- libmpeg2/slice.c 2006-06-16 20:12:26.000000000 +0200 |
341 +++ libmpeg2/slice.c 2006-06-16 20:12:50.000000000 +0200 | 221 +++ libmpeg2/slice.c 2006-06-16 20:12:50.000000000 +0200 |
342 @@ -142,6 +146,7 @@ | 222 @@ -142,6 +146,7 @@ |
343 | 223 |
344 quantizer_scale_code = UBITS (bit_buf, 5); | 224 quantizer_scale_code = UBITS (bit_buf, 5); |
345 DUMPBITS (bit_buf, bits, 5); | 225 DUMPBITS (bit_buf, bits, 5); |
346 + decoder->quantizer_scale = decoder->quantizer_scales[quantizer_scale_code]; | 226 + decoder->quantizer_scale = decoder->quantizer_scales[quantizer_scale_code]; |
347 | 227 |
348 decoder->quantizer_matrix[0] = | 228 decoder->quantizer_matrix[0] = |
349 decoder->quantizer_prescale[0][quantizer_scale_code]; | 229 decoder->quantizer_prescale[0][quantizer_scale_code]; |
350 @@ -1564,6 +1569,18 @@ | 230 @@ -1564,6 +1569,24 @@ |
351 | 231 |
352 #define NEXT_MACROBLOCK \ | 232 #define NEXT_MACROBLOCK \ |
353 do { \ | 233 do { \ |
354 + if(decoder->quant_store) { \ | 234 + if(decoder->quant_store) { \ |
355 + if (decoder->picture_structure == TOP_FIELD) \ | 235 + if (decoder->picture_structure == TOP_FIELD) \ |
356 + decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \ | 236 + decoder->quant_store[2 * decoder->quant_stride \ |
357 + +(decoder->offset>>4)] = decoder->quantizer_scale; \ | 237 + * (decoder->v_offset >> 4) \ |
358 + else if (decoder->picture_structure == BOTTOM_FIELD) \ | 238 + + (decoder->offset >> 4)] \ |
359 + decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \ | 239 + = decoder->quantizer_scale; \ |
360 + + decoder->quant_stride \ | 240 + else if (decoder->picture_structure == BOTTOM_FIELD) \ |
361 + +(decoder->offset>>4)] = decoder->quantizer_scale; \ | 241 + decoder->quant_store[2 * decoder->quant_stride \ |
362 + else \ | 242 + * (decoder->v_offset >> 4) \ |
363 + decoder->quant_store[decoder->quant_stride*(decoder->v_offset>>4) \ | 243 + + decoder->quant_stride \ |
364 + +(decoder->offset>>4)] = decoder->quantizer_scale; \ | 244 + + (decoder->offset >> 4)] \ |
365 + } \ | 245 + = decoder->quantizer_scale; \ |
246 + else \ | |
247 + decoder->quant_store[decoder->quant_stride \ | |
248 + * (decoder->v_offset >> 4) \ | |
249 + + (decoder->offset >> 4)] \ | |
250 + = decoder->quantizer_scale; \ | |
251 + } \ | |
366 decoder->offset += 16; \ | 252 decoder->offset += 16; \ |
367 if (decoder->offset == decoder->width) { \ | 253 if (decoder->offset == decoder->width) { \ |
368 do { /* just so we can use the break statement */ \ | 254 do { /* just so we can use the break statement */ \ |
369 @@ -1587,6 +1604,12 @@ | |
370 } \ | |
371 } while (0) | |
372 | |
373 +static void motion_dummy (mpeg2_decoder_t * const decoder, | |
374 + motion_t * const motion, | |
375 + mpeg2_mc_fct * const * const table) | |
376 +{ | |
377 +} | |
378 + | |
379 void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], | |
380 uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]) | |
381 { | |
382 @@ -1644,7 +1667,9 @@ | |
383 | |
384 if (decoder->mpeg1) { | |
385 decoder->motion_parser[0] = motion_zero_420; | |
386 + decoder->motion_parser[MC_FIELD] = motion_dummy; | |
387 decoder->motion_parser[MC_FRAME] = motion_mp1; | |
388 + decoder->motion_parser[MC_DMV] = motion_dummy; | |
389 decoder->motion_parser[4] = motion_reuse_420; | |
390 } else if (decoder->picture_structure == FRAME_PICTURE) { | |
391 if (decoder->chroma_format == 0) { | |
392 --- libmpeg2/idct_altivec.c 2004/08/02 11:26:43 12933 | |
393 +++ libmpeg2/idct_altivec.c 2005/05/15 20:11:34 15484 | |
394 @@ -41,7 +41,7 @@ | |
395 typedef vector signed int vector_s32_t; | |
396 typedef vector unsigned int vector_u32_t; | |
397 | |
398 -#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) | |
399 +#if defined(HAVE_ALTIVEC_H) && !defined(__APPLE_CC__) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) | |
400 /* work around gcc <3.3 vec_mergel bug */ | |
401 static inline vector_s16_t my_vec_mergel (vector_s16_t const A, | |
402 vector_s16_t const B) | |
403 Index: libmpeg2/motion_comp_arm.c | |
404 =================================================================== | |
405 --- libmpeg2/motion_comp_arm.c (revision 0) | |
406 +++ libmpeg2/motion_comp_arm.c (revision 0) | |
407 @@ -0,0 +1,187 @@ | |
408 +/* | |
409 + * motion_comp_arm.c | |
410 + * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp> | |
411 + * | |
412 + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | |
413 + * See http://libmpeg2.sourceforge.net/ for updates. | |
414 + * | |
415 + * mpeg2dec is free software; you can redistribute it and/or modify | |
416 + * it under the terms of the GNU General Public License as published by | |
417 + * the Free Software Foundation; either version 2 of the License, or | |
418 + * (at your option) any later version. | |
419 + * | |
420 + * mpeg2dec is distributed in the hope that it will be useful, | |
421 + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
422 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
423 + * GNU General Public License for more details. | |
424 + * | |
425 + * You should have received a copy of the GNU General Public License | |
426 + * along with this program; if not, write to the Free Software | |
427 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
428 + */ | |
429 + | |
430 +#include "config.h" | |
431 + | |
432 +#ifdef ARCH_ARM | |
433 + | |
434 +#include <inttypes.h> | |
435 + | |
436 +#include "mpeg2.h" | |
437 +#include "attributes.h" | |
438 +#include "mpeg2_internal.h" | |
439 + | |
440 +#define avg2(a,b) ((a+b+1)>>1) | |
441 +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | |
442 + | |
443 +#define predict_o(i) (ref[i]) | |
444 +#define predict_x(i) (avg2 (ref[i], ref[i+1])) | |
445 +#define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) | |
446 +#define predict_xy(i) (avg4 (ref[i], ref[i+1], \ | |
447 + (ref+stride)[i], (ref+stride)[i+1])) | |
448 + | |
449 +#define put(predictor,i) dest[i] = predictor (i) | |
450 +#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) | |
451 + | |
452 +/* mc function template */ | |
453 + | |
454 +#define MC_FUNC(op,xy) \ | |
455 +static void inline MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \ | |
456 + const int stride, int height) \ | |
457 +{ \ | |
458 + do { \ | |
459 + op (predict_##xy, 0); \ | |
460 + op (predict_##xy, 1); \ | |
461 + op (predict_##xy, 2); \ | |
462 + op (predict_##xy, 3); \ | |
463 + op (predict_##xy, 4); \ | |
464 + op (predict_##xy, 5); \ | |
465 + op (predict_##xy, 6); \ | |
466 + op (predict_##xy, 7); \ | |
467 + op (predict_##xy, 8); \ | |
468 + op (predict_##xy, 9); \ | |
469 + op (predict_##xy, 10); \ | |
470 + op (predict_##xy, 11); \ | |
471 + op (predict_##xy, 12); \ | |
472 + op (predict_##xy, 13); \ | |
473 + op (predict_##xy, 14); \ | |
474 + op (predict_##xy, 15); \ | |
475 + ref += stride; \ | |
476 + dest += stride; \ | |
477 + } while (--height); \ | |
478 +} \ | |
479 +static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \ | |
480 + const int stride, int height) \ | |
481 +{ \ | |
482 + do { \ | |
483 + op (predict_##xy, 0); \ | |
484 + op (predict_##xy, 1); \ | |
485 + op (predict_##xy, 2); \ | |
486 + op (predict_##xy, 3); \ | |
487 + op (predict_##xy, 4); \ | |
488 + op (predict_##xy, 5); \ | |
489 + op (predict_##xy, 6); \ | |
490 + op (predict_##xy, 7); \ | |
491 + ref += stride; \ | |
492 + dest += stride; \ | |
493 + } while (--height); \ | |
494 +} \ | |
495 +/* definitions of the actual mc functions */ | |
496 + | |
497 +MC_FUNC (put,o) | |
498 +MC_FUNC (avg,o) | |
499 +MC_FUNC (put,x) | |
500 +MC_FUNC (avg,x) | |
501 +MC_FUNC (put,y) | |
502 +MC_FUNC (avg,y) | |
503 +MC_FUNC (put,xy) | |
504 +MC_FUNC (avg,xy) | |
505 + | |
506 + | |
507 +extern void MC_put_o_16_arm (uint8_t * dest, const uint8_t * ref, | |
508 + int stride, int height); | |
509 + | |
510 +extern void MC_put_x_16_arm (uint8_t * dest, const uint8_t * ref, | |
511 + int stride, int height); | |
512 + | |
513 + | |
514 +static void MC_put_y_16_arm (uint8_t * dest, const uint8_t * ref, | |
515 + int stride, int height) | |
516 +{ | |
517 + MC_put_y_16_c(dest, ref, stride, height); | |
518 +} | |
519 + | |
520 +static void MC_put_xy_16_arm (uint8_t * dest, const uint8_t * ref, | |
521 + int stride, int height) | |
522 +{ | |
523 + MC_put_xy_16_c(dest, ref, stride, height); | |
524 +} | |
525 + | |
526 +extern void MC_put_o_8_arm (uint8_t * dest, const uint8_t * ref, | |
527 + int stride, int height); | |
528 + | |
529 +extern void MC_put_x_8_arm (uint8_t * dest, const uint8_t * ref, | |
530 + int stride, int height); | |
531 + | |
532 +static void MC_put_y_8_arm (uint8_t * dest, const uint8_t * ref, | |
533 + int stride, int height) | |
534 +{ | |
535 + MC_put_y_8_c(dest, ref, stride, height); | |
536 +} | |
537 + | |
538 +static void MC_put_xy_8_arm (uint8_t * dest, const uint8_t * ref, | |
539 + int stride, int height) | |
540 +{ | |
541 + MC_put_xy_8_c(dest, ref, stride, height); | |
542 +} | |
543 + | |
544 +static void MC_avg_o_16_arm (uint8_t * dest, const uint8_t * ref, | |
545 + int stride, int height) | |
546 +{ | |
547 + MC_avg_o_16_c(dest, ref, stride, height); | |
548 +} | |
549 + | |
550 +static void MC_avg_x_16_arm (uint8_t * dest, const uint8_t * ref, | |
551 + int stride, int height) | |
552 +{ | |
553 + MC_avg_x_16_c(dest, ref, stride, height); | |
554 +} | |
555 + | |
556 +static void MC_avg_y_16_arm (uint8_t * dest, const uint8_t * ref, | |
557 + int stride, int height) | |
558 +{ | |
559 + MC_avg_y_16_c(dest, ref, stride, height); | |
560 +} | |
561 + | |
562 +static void MC_avg_xy_16_arm (uint8_t * dest, const uint8_t * ref, | |
563 + int stride, int height) | |
564 +{ | |
565 + MC_avg_xy_16_c(dest, ref, stride, height); | |
566 +} | |
567 + | |
568 +static void MC_avg_o_8_arm (uint8_t * dest, const uint8_t * ref, | |
569 + int stride, int height) | |
570 +{ | |
571 + MC_avg_o_8_c(dest, ref, stride, height); | |
572 +} | |
573 + | |
574 +static void MC_avg_x_8_arm (uint8_t * dest, const uint8_t * ref, | |
575 + int stride, int height) | |
576 +{ | |
577 + MC_avg_x_8_c(dest, ref, stride, height); | |
578 +} | |
579 + | |
580 +static void MC_avg_y_8_arm (uint8_t * dest, const uint8_t * ref, | |
581 + int stride, int height) | |
582 +{ | |
583 + MC_avg_y_8_c(dest, ref, stride, height); | |
584 +} | |
585 + | |
586 +static void MC_avg_xy_8_arm (uint8_t * dest, const uint8_t * ref, | |
587 + int stride, int height) | |
588 +{ | |
589 + MC_avg_xy_8_c(dest, ref, stride, height); | |
590 +} | |
591 + | |
592 +MPEG2_MC_EXTERN (arm) | |
593 + | |
594 +#endif | |
595 Index: libmpeg2/motion_comp_arm_s.S | |
596 =================================================================== | |
597 --- libmpeg2/motion_comp_arm_s.S (revision 0) | |
598 +++ libmpeg2/motion_comp_arm_s.S (revision 0) | |
599 @@ -0,0 +1,322 @@ | |
600 +@ motion_comp_arm_s.S | |
601 +@ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp> | |
602 +@ | |
603 +@ This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | |
604 +@ See http://libmpeg2.sourceforge.net/ for updates. | |
605 +@ | |
606 +@ mpeg2dec is free software; you can redistribute it and/or modify | |
607 +@ it under the terms of the GNU General Public License as published by | |
608 +@ the Free Software Foundation; either version 2 of the License, or | |
609 +@ (at your option) any later version. | |
610 +@ | |
611 +@ mpeg2dec is distributed in the hope that it will be useful, | |
612 +@ but WITHOUT ANY WARRANTY; without even the implied warranty of | |
613 +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
614 +@ GNU General Public License for more details. | |
615 +@ | |
616 +@ You should have received a copy of the GNU General Public License | |
617 +@ along with this program; if not, write to the Free Software | |
618 +@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
619 + | |
620 + .text | |
621 + | |
622 +@ ---------------------------------------------------------------- | |
623 + .align | |
624 + .global MC_put_o_16_arm | |
625 +MC_put_o_16_arm: | |
626 + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) | |
627 + pld [r1] | |
628 + stmfd sp!, {r4-r11, lr} @ R14 is also called LR | |
629 + and r4, r1, #3 | |
630 + adr r5, MC_put_o_16_arm_align_jt | |
631 + add r5, r5, r4, lsl #2 | |
632 + ldr pc, [r5] | |
633 + | |
634 +MC_put_o_16_arm_align0: | |
635 + ldmia r1, {r4-r7} | |
636 + add r1, r1, r2 | |
637 + pld [r1] | |
638 + stmia r0, {r4-r7} | |
639 + subs r3, r3, #1 | |
640 + add r0, r0, r2 | |
641 + bne MC_put_o_16_arm_align0 | |
642 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. | |
643 + | |
644 +.macro PROC shift | |
645 + ldmia r1, {r4-r8} | |
646 + add r1, r1, r2 | |
647 + mov r9, r4, lsr #(\shift) | |
648 + pld [r1] | |
649 + mov r10, r5, lsr #(\shift) | |
650 + orr r9, r9, r5, lsl #(32-\shift) | |
651 + mov r11, r6, lsr #(\shift) | |
652 + orr r10, r10, r6, lsl #(32-\shift) | |
653 + mov r12, r7, lsr #(\shift) | |
654 + orr r11, r11, r7, lsl #(32-\shift) | |
655 + orr r12, r12, r8, lsl #(32-\shift) | |
656 + stmia r0, {r9-r12} | |
657 + subs r3, r3, #1 | |
658 + add r0, r0, r2 | |
659 +.endm | |
660 + | |
661 +MC_put_o_16_arm_align1: | |
662 + and r1, r1, #0xFFFFFFFC | |
663 +1: PROC(8) | |
664 + bne 1b | |
665 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. | |
666 +MC_put_o_16_arm_align2: | |
667 + and r1, r1, #0xFFFFFFFC | |
668 +1: PROC(16) | |
669 + bne 1b | |
670 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. | |
671 +MC_put_o_16_arm_align3: | |
672 + and r1, r1, #0xFFFFFFFC | |
673 +1: PROC(24) | |
674 + bne 1b | |
675 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. | |
676 +MC_put_o_16_arm_align_jt: | |
677 + .word MC_put_o_16_arm_align0 | |
678 + .word MC_put_o_16_arm_align1 | |
679 + .word MC_put_o_16_arm_align2 | |
680 + .word MC_put_o_16_arm_align3 | |
681 + | |
682 +@ ---------------------------------------------------------------- | |
683 + .align | |
684 + .global MC_put_o_8_arm | |
685 +MC_put_o_8_arm: | |
686 + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) | |
687 + pld [r1] | |
688 + stmfd sp!, {r4-r10, lr} @ R14 is also called LR | |
689 + and r4, r1, #3 | |
690 + adr r5, MC_put_o_8_arm_align_jt | |
691 + add r5, r5, r4, lsl #2 | |
692 + ldr pc, [r5] | |
693 +MC_put_o_8_arm_align0: | |
694 + ldmia r1, {r4-r5} | |
695 + add r1, r1, r2 | |
696 + pld [r1] | |
697 + stmia r0, {r4-r5} | |
698 + add r0, r0, r2 | |
699 + subs r3, r3, #1 | |
700 + bne MC_put_o_8_arm_align0 | |
701 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. | |
702 + | |
703 +.macro PROC8 shift | |
704 + ldmia r1, {r4-r6} | |
705 + add r1, r1, r2 | |
706 + mov r9, r4, lsr #(\shift) | |
707 + pld [r1] | |
708 + mov r10, r5, lsr #(\shift) | |
709 + orr r9, r9, r5, lsl #(32-\shift) | |
710 + orr r10, r10, r6, lsl #(32-\shift) | |
711 + stmia r0, {r9-r10} | |
712 + subs r3, r3, #1 | |
713 + add r0, r0, r2 | |
714 +.endm | |
715 + | |
716 +MC_put_o_8_arm_align1: | |
717 + and r1, r1, #0xFFFFFFFC | |
718 +1: PROC8(8) | |
719 + bne 1b | |
720 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. | |
721 + | |
722 +MC_put_o_8_arm_align2: | |
723 + and r1, r1, #0xFFFFFFFC | |
724 +1: PROC8(16) | |
725 + bne 1b | |
726 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. | |
727 + | |
728 +MC_put_o_8_arm_align3: | |
729 + and r1, r1, #0xFFFFFFFC | |
730 +1: PROC8(24) | |
731 + bne 1b | |
732 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. | |
733 + | |
734 +MC_put_o_8_arm_align_jt: | |
735 + .word MC_put_o_8_arm_align0 | |
736 + .word MC_put_o_8_arm_align1 | |
737 + .word MC_put_o_8_arm_align2 | |
738 + .word MC_put_o_8_arm_align3 | |
739 + | |
740 +@ ---------------------------------------------------------------- | |
741 +.macro AVG_PW rW1, rW2 | |
742 + mov \rW2, \rW2, lsl #24 | |
743 + orr \rW2, \rW2, \rW1, lsr #8 | |
744 + eor r9, \rW1, \rW2 | |
745 + and \rW2, \rW1, \rW2 | |
746 + and r10, r9, r12 | |
747 + add \rW2, \rW2, r10, lsr #1 | |
748 + and r10, r9, r11 | |
749 + add \rW2, \rW2, r10 | |
750 +.endm | |
751 + | |
752 + .align | |
753 + .global MC_put_x_16_arm | |
754 +MC_put_x_16_arm: | |
755 + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) | |
756 + pld [r1] | |
757 + stmfd sp!, {r4-r11,lr} @ R14 is also called LR | |
758 + and r4, r1, #3 | |
759 + adr r5, MC_put_x_16_arm_align_jt | |
760 + ldr r11, [r5] | |
761 + mvn r12, r11 | |
762 + add r5, r5, r4, lsl #2 | |
763 + ldr pc, [r5, #4] | |
764 + | |
765 +.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4 | |
766 + mov \R0, \R0, lsr #(\shift) | |
767 + orr \R0, \R0, \R1, lsl #(32 - \shift) | |
768 + mov \R1, \R1, lsr #(\shift) | |
769 + orr \R1, \R1, \R2, lsl #(32 - \shift) | |
770 + mov \R2, \R2, lsr #(\shift) | |
771 + orr \R2, \R2, \R3, lsl #(32 - \shift) | |
772 + mov \R3, \R3, lsr #(\shift) | |
773 + orr \R3, \R3, \R4, lsl #(32 - \shift) | |
774 + mov \R4, \R4, lsr #(\shift) | |
775 +@ and \R4, \R4, #0xFF | |
776 +.endm | |
777 + | |
778 +MC_put_x_16_arm_align0: | |
779 + ldmia r1, {r4-r8} | |
780 + add r1, r1, r2 | |
781 + pld [r1] | |
782 + AVG_PW r7, r8 | |
783 + AVG_PW r6, r7 | |
784 + AVG_PW r5, r6 | |
785 + AVG_PW r4, r5 | |
786 + stmia r0, {r5-r8} | |
787 + subs r3, r3, #1 | |
788 + add r0, r0, r2 | |
789 + bne MC_put_x_16_arm_align0 | |
790 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
791 +MC_put_x_16_arm_align1: | |
792 + and r1, r1, #0xFFFFFFFC | |
793 +1: ldmia r1, {r4-r8} | |
794 + add r1, r1, r2 | |
795 + pld [r1] | |
796 + ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8 | |
797 + AVG_PW r7, r8 | |
798 + AVG_PW r6, r7 | |
799 + AVG_PW r5, r6 | |
800 + AVG_PW r4, r5 | |
801 + stmia r0, {r5-r8} | |
802 + subs r3, r3, #1 | |
803 + add r0, r0, r2 | |
804 + bne 1b | |
805 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
806 +MC_put_x_16_arm_align2: | |
807 + and r1, r1, #0xFFFFFFFC | |
808 +1: ldmia r1, {r4-r8} | |
809 + add r1, r1, r2 | |
810 + pld [r1] | |
811 + ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8 | |
812 + AVG_PW r7, r8 | |
813 + AVG_PW r6, r7 | |
814 + AVG_PW r5, r6 | |
815 + AVG_PW r4, r5 | |
816 + stmia r0, {r5-r8} | |
817 + subs r3, r3, #1 | |
818 + add r0, r0, r2 | |
819 + bne 1b | |
820 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
821 +MC_put_x_16_arm_align3: | |
822 + and r1, r1, #0xFFFFFFFC | |
823 +1: ldmia r1, {r4-r8} | |
824 + add r1, r1, r2 | |
825 + pld [r1] | |
826 + ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8 | |
827 + AVG_PW r7, r8 | |
828 + AVG_PW r6, r7 | |
829 + AVG_PW r5, r6 | |
830 + AVG_PW r4, r5 | |
831 + stmia r0, {r5-r8} | |
832 + subs r3, r3, #1 | |
833 + add r0, r0, r2 | |
834 + bne 1b | |
835 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
836 +MC_put_x_16_arm_align_jt: | |
837 + .word 0x01010101 | |
838 + .word MC_put_x_16_arm_align0 | |
839 + .word MC_put_x_16_arm_align1 | |
840 + .word MC_put_x_16_arm_align2 | |
841 + .word MC_put_x_16_arm_align3 | |
842 + | |
843 +@ ---------------------------------------------------------------- | |
844 + .align | |
845 + .global MC_put_x_8_arm | |
846 +MC_put_x_8_arm: | |
847 + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) | |
848 + pld [r1] | |
849 + stmfd sp!, {r4-r11,lr} @ R14 is also called LR | |
850 + and r4, r1, #3 | |
851 + adr r5, MC_put_x_8_arm_align_jt | |
852 + ldr r11, [r5] | |
853 + mvn r12, r11 | |
854 + add r5, r5, r4, lsl #2 | |
855 + ldr pc, [r5, #4] | |
856 + | |
857 +.macro ADJ_ALIGN_DW shift, R0, R1, R2 | |
858 + mov \R0, \R0, lsr #(\shift) | |
859 + orr \R0, \R0, \R1, lsl #(32 - \shift) | |
860 + mov \R1, \R1, lsr #(\shift) | |
861 + orr \R1, \R1, \R2, lsl #(32 - \shift) | |
862 + mov \R2, \R2, lsr #(\shift) | |
863 +@ and \R4, \R4, #0xFF | |
864 +.endm | |
865 + | |
866 +MC_put_x_8_arm_align0: | |
867 + ldmia r1, {r4-r6} | |
868 + add r1, r1, r2 | |
869 + pld [r1] | |
870 + AVG_PW r5, r6 | |
871 + AVG_PW r4, r5 | |
872 + stmia r0, {r5-r6} | |
873 + subs r3, r3, #1 | |
874 + add r0, r0, r2 | |
875 + bne MC_put_x_8_arm_align0 | |
876 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
877 +MC_put_x_8_arm_align1: | |
878 + and r1, r1, #0xFFFFFFFC | |
879 +1: ldmia r1, {r4-r6} | |
880 + add r1, r1, r2 | |
881 + pld [r1] | |
882 + ADJ_ALIGN_DW 8, r4, r5, r6 | |
883 + AVG_PW r5, r6 | |
884 + AVG_PW r4, r5 | |
885 + stmia r0, {r5-r6} | |
886 + subs r3, r3, #1 | |
887 + add r0, r0, r2 | |
888 + bne 1b | |
889 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
890 +MC_put_x_8_arm_align2: | |
891 + and r1, r1, #0xFFFFFFFC | |
892 +1: ldmia r1, {r4-r6} | |
893 + add r1, r1, r2 | |
894 + pld [r1] | |
895 + ADJ_ALIGN_DW 16, r4, r5, r6 | |
896 + AVG_PW r5, r6 | |
897 + AVG_PW r4, r5 | |
898 + stmia r0, {r5-r6} | |
899 + subs r3, r3, #1 | |
900 + add r0, r0, r2 | |
901 + bne 1b | |
902 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
903 +MC_put_x_8_arm_align3: | |
904 + and r1, r1, #0xFFFFFFFC | |
905 +1: ldmia r1, {r4-r6} | |
906 + add r1, r1, r2 | |
907 + pld [r1] | |
908 + ADJ_ALIGN_DW 24, r4, r5, r6 | |
909 + AVG_PW r5, r6 | |
910 + AVG_PW r4, r5 | |
911 + stmia r0, {r5-r6} | |
912 + subs r3, r3, #1 | |
913 + add r0, r0, r2 | |
914 + bne 1b | |
915 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. | |
916 +MC_put_x_8_arm_align_jt: | |
917 + .word 0x01010101 | |
918 + .word MC_put_x_8_arm_align0 | |
919 + .word MC_put_x_8_arm_align1 | |
920 + .word MC_put_x_8_arm_align2 | |
921 + .word MC_put_x_8_arm_align3 | |
922 Index: libmpeg2/motion_comp_iwmmxt.c | 255 Index: libmpeg2/motion_comp_iwmmxt.c |
923 =================================================================== | 256 =================================================================== |
924 --- libmpeg2/motion_comp_iwmmxt.c (revision 0) | 257 --- libmpeg2/motion_comp_iwmmxt.c (revision 0) |
925 +++ libmpeg2/motion_comp_iwmmxt.c (revision 0) | 258 +++ libmpeg2/motion_comp_iwmmxt.c (revision 0) |
926 @@ -0,0 +1,59 @@ | 259 @@ -0,0 +1,59 @@ |