Mercurial > mplayer.hg
changeset 27569:7c3d535aba22
Remove version string from name of local changes diff file.
author | diego |
---|---|
date | Sat, 13 Sep 2008 14:23:45 +0000 |
parents | e5095880bc12 |
children | aa795bfca46f |
files | libmpeg2/libmpeg-0.4.1.diff libmpeg2/libmpeg2_changes.diff |
diffstat | 2 files changed, 985 insertions(+), 985 deletions(-) [+] |
line wrap: on
line diff
--- a/libmpeg2/libmpeg-0.4.1.diff Sat Sep 13 13:41:47 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,985 +0,0 @@ ---- libmpeg2/cpu_accel.c 2006-06-16 20:12:26.000000000 +0200 -+++ libmpeg2/cpu_accel.c 2006-06-16 20:12:50.000000000 +0200 -@@ -22,6 +26,7 @@ - */ - - #include "config.h" -+#include "cpudetect.h" - - #include <inttypes.h> - -@@ -30,9 +35,17 @@ - #include "mpeg2_internal.h" - - #ifdef ACCEL_DETECT --#ifdef ARCH_X86 -+#if defined(ARCH_X86) || defined(ARCH_X86_64) -+ -+/* MPlayer imports libmpeg2 as decoder, which detects MMX / 3DNow! -+ * instructions via assembly. However, it is regarded as duplicated work -+ * in MPlayer, so that we enforce using MPlayer's implementation. -+ */ -+#define MPLAYER_CPUDETECT -+ - static inline uint32_t arch_accel (void) - { -+#if !defined(MPLAYER_CPUDETECT) - uint32_t eax, ebx, ecx, edx; - int AMD; - uint32_t caps; -@@ -107,8 +120,22 @@ - caps |= MPEG2_ACCEL_X86_MMXEXT; - - return caps; -+#else /* MPLAYER_CPUDETECT: Use MPlayer's CPU capability property. */ -+ caps = 0; -+ if (gCpuCaps.hasMMX) -+ caps |= MPEG2_ACCEL_X86_MMX; -+ if (gCpuCaps.hasSSE2) -+ caps |= MPEG2_ACCEL_X86_SSE2; -+ if (gCpuCaps.hasMMX2) -+ caps |= MPEG2_ACCEL_X86_MMXEXT; -+ if (gCpuCaps.has3DNow) -+ caps |= MPEG2_ACCEL_X86_3DNOW; -+ -+ return caps; -+ -+#endif /* MPLAYER_CPUDETECT */ - } --#endif /* ARCH_X86 */ -+#endif /* ARCH_X86 || ARCH_X86_64 */ - - #if defined(ARCH_PPC) || defined(ARCH_SPARC) - #include <signal.h> -@@ -214,7 +241,7 @@ - - accel = 0; - #ifdef ACCEL_DETECT --#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) -+#if defined (ARCH_X86) || defined (ARCH_X86_64) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) - accel = arch_accel (); - #endif - #endif ---- libmpeg2/cpu_state.c 2006-06-16 20:12:26.000000000 +0200 -+++ libmpeg2/cpu_state.c 2006-06-16 20:12:50.000000000 +0200 -@@ -29,14 +33,14 @@ - #include "mpeg2.h" - #include "attributes.h" - #include "mpeg2_internal.h" --#ifdef ARCH_X86 -+#if defined(ARCH_X86) || defined(ARCH_X86_64) - #include "mmx.h" - #endif - - void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL; - void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL; - --#ifdef ARCH_X86 -+#if defined(ARCH_X86) || defined(ARCH_X86_64) - static void state_restore_mmx (cpu_state_t * state) - { - emms (); -@@ -44,18 +48,18 @@ - #endif - - #ifdef ARCH_PPC --#ifdef HAVE_ALTIVEC_H /* gnu */ --#define LI(a,b) "li " #a "," #b "\n\t" --#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" --#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t" --#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t" --#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t" --#else /* apple */ -+#if defined(__APPLE_CC__) /* apple */ - #define LI(a,b) "li r" #a "," #b "\n\t" - #define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t" - #define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t" - #define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t" - #define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t" -+#else /* gnu */ -+#define LI(a,b) "li " #a "," #b "\n\t" -+#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" -+#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t" -+#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t" -+#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t" - #endif - - static void state_save_altivec (cpu_state_t * state) -@@ -115,7 +119,7 @@ - - void mpeg2_cpu_state_init (uint32_t accel) - { --#ifdef ARCH_X86 -+#if defined(ARCH_X86) || defined(ARCH_X86_64) - if (accel & MPEG2_ACCEL_X86_MMX) { - mpeg2_cpu_state_restore = state_restore_mmx; - } ---- libmpeg2/decode.c 2006-06-16 20:12:26.000000000 +0200 -+++ libmpeg2/decode.c 2006-06-16 20:12:50.000000000 +0200 -@@ -351,6 +355,15 @@ - fbuf->buf[1] = buf[1]; - fbuf->buf[2] = buf[2]; - fbuf->id = id; -+ // HACK! FIXME! At first I frame, copy pointers to prediction frame too! -+ if (mpeg2dec->custom_fbuf && !mpeg2dec->fbuf[1]->buf[0]){ -+ mpeg2dec->fbuf[1]->buf[0]=buf[0]; -+ mpeg2dec->fbuf[1]->buf[1]=buf[1]; -+ mpeg2dec->fbuf[1]->buf[2]=buf[2]; -+ mpeg2dec->fbuf[1]->id=NULL; -+ } -+// printf("libmpeg2: FBUF 0:%p 1:%p 2:%p\n", -+// mpeg2dec->fbuf[0]->buf[0],mpeg2dec->fbuf[1]->buf[0],mpeg2dec->fbuf[2]->buf[0]); - } - - void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf) ---- libmpeg2/header.c 2006-06-16 20:12:26.000000000 +0200 -+++ libmpeg2/header.c 2006-06-16 20:12:50.000000000 +0200 -@@ -100,6 +104,9 @@ - mpeg2dec->decoder.convert = NULL; - mpeg2dec->decoder.convert_id = NULL; - mpeg2dec->picture = mpeg2dec->pictures; -+ memset(&mpeg2dec->fbuf_alloc[0].fbuf, 0, sizeof(mpeg2_fbuf_t)); -+ memset(&mpeg2dec->fbuf_alloc[1].fbuf, 0, sizeof(mpeg2_fbuf_t)); -+ memset(&mpeg2dec->fbuf_alloc[2].fbuf, 0, sizeof(mpeg2_fbuf_t)); - mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf; - mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf; - mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf; -@@ -553,6 +560,7 @@ - if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) { - picture->nb_fields = (buffer[3] & 2) ? 3 : 2; - flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0; -+ flags |= (buffer[3] & 2) ? PIC_FLAG_REPEAT_FIRST_FIELD : 0; - } else - picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2; - break; -@@ -801,6 +809,7 @@ - mpeg2dec->scaled[index] = mpeg2dec->q_scale_type; - for (i = 0; i < 32; i++) { - k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1); -+ decoder->quantizer_scales[i] = k; - for (j = 0; j < 64; j++) - decoder->quantizer_prescale[index][i][j] = - k * mpeg2dec->quantizer_matrix[index][j]; ---- libmpeg2/idct.c (revision 26652) -+++ libmpeg2/idct.c (working copy) -@@ -235,30 +239,40 @@ - - void mpeg2_idct_init (uint32_t accel) - { --#ifdef ARCH_X86 -+#ifdef HAVE_SSE2 -+ if (accel & MPEG2_ACCEL_X86_SSE2) { -+ mpeg2_idct_copy = mpeg2_idct_copy_sse2; -+ mpeg2_idct_add = mpeg2_idct_add_sse2; -+ mpeg2_idct_mmx_init (); -+ } else -+#elif HAVE_MMX2 - if (accel & MPEG2_ACCEL_X86_MMXEXT) { - mpeg2_idct_copy = mpeg2_idct_copy_mmxext; - mpeg2_idct_add = mpeg2_idct_add_mmxext; - mpeg2_idct_mmx_init (); -- } else if (accel & MPEG2_ACCEL_X86_MMX) { -+ } else -+#elif HAVE_MMX -+ if (accel & MPEG2_ACCEL_X86_MMX) { - mpeg2_idct_copy = mpeg2_idct_copy_mmx; - mpeg2_idct_add = mpeg2_idct_add_mmx; - mpeg2_idct_mmx_init (); - } else - #endif --#ifdef ARCH_PPC -+#ifdef HAVE_ALTIVEC - if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { - mpeg2_idct_copy = mpeg2_idct_copy_altivec; - mpeg2_idct_add = mpeg2_idct_add_altivec; - mpeg2_idct_altivec_init (); - } else - #endif --#ifdef ARCH_ALPHA -+#ifdef HAVE_VIS - if (accel & MPEG2_ACCEL_ALPHA_MVI) { - mpeg2_idct_copy = mpeg2_idct_copy_mvi; - mpeg2_idct_add = mpeg2_idct_add_mvi; - mpeg2_idct_alpha_init (); -- } else if (accel & MPEG2_ACCEL_ALPHA) { -+ } else -+#elif ARCH_ALPHA -+ if (accel & MPEG2_ACCEL_ALPHA) { - int i; - - mpeg2_idct_copy = mpeg2_idct_copy_alpha; ---- libmpeg2/idct_mmx.c 2006-06-16 20:12:26.000000000 +0200 -+++ libmpeg2/idct_mmx.c 2006-06-16 20:12:50.000000000 +0200 -@@ -23,7 +27,7 @@ - - #include "config.h" - --#ifdef ARCH_X86 -+#if defined(ARCH_X86) || defined(ARCH_X86_64) - - #include <inttypes.h> - ---- libmpeg2/motion_comp.c 2006-06-16 20:12:26.000000000 +0200 -+++ libmpeg2/motion_comp.c 2006-06-16 20:12:50.000000000 +0200 -@@ -33,16 +37,22 @@ - - void mpeg2_mc_init (uint32_t accel) - { --#ifdef ARCH_X86 -+#ifdef HAVE_MMX2 - if (accel & MPEG2_ACCEL_X86_MMXEXT) - mpeg2_mc = mpeg2_mc_mmxext; -- else if (accel & MPEG2_ACCEL_X86_3DNOW) -+ else -+#endif -+#ifdef HAVE_3DNOW -+ if (accel & MPEG2_ACCEL_X86_3DNOW) - mpeg2_mc = mpeg2_mc_3dnow; -- else if (accel & MPEG2_ACCEL_X86_MMX) -+ else -+#endif -+#ifdef HAVE_MMX -+ if (accel & MPEG2_ACCEL_X86_MMX) - mpeg2_mc = mpeg2_mc_mmx; - else - #endif --#ifdef ARCH_PPC -+#ifdef HAVE_ALTIVEC - if (accel & MPEG2_ACCEL_PPC_ALTIVEC) - mpeg2_mc = mpeg2_mc_altivec; - else -@@ -52,11 +62,21 @@ - mpeg2_mc = mpeg2_mc_alpha; - else - #endif --#ifdef ARCH_SPARC -+#ifdef HAVE_VIS - if (accel & MPEG2_ACCEL_SPARC_VIS) - mpeg2_mc = mpeg2_mc_vis; - else - #endif -+#ifdef ARCH_ARM -+#ifdef HAVE_IWMMXT -+ if (accel & MPEG2_ACCEL_ARM_IWMMXT) -+ mpeg2_mc = mpeg2_mc_iwmmxt; -+ else -+#endif -+ if (accel & MPEG2_ACCEL_ARM) -+ mpeg2_mc = mpeg2_mc_arm; -+ else -+#endif - mpeg2_mc = mpeg2_mc_c; - } - ---- libmpeg2/motion_comp_mmx.c 2006-06-16 20:12:26.000000000 +0200 -+++ libmpeg2/motion_comp_mmx.c 2006-06-16 20:12:50.000000000 +0200 -@@ -23,7 +27,7 @@ - - #include "config.h" - --#ifdef ARCH_X86 -+#if defined(ARCH_X86) || defined(ARCH_X86_64) - - #include <inttypes.h> - ---- include/mpeg2.h 2006-06-16 20:12:26.000000000 +0200 -+++ libmpeg2/mpeg2.h 2006-06-16 20:12:50.000000000 +0200 -@@ -82,6 +86,7 @@ - #define PIC_FLAG_COMPOSITE_DISPLAY 32 - #define PIC_FLAG_SKIP 64 - #define PIC_FLAG_TAGS 128 -+#define PIC_FLAG_REPEAT_FIRST_FIELD 256 - #define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000 - - typedef struct mpeg2_picture_s { -@@ -154,11 +159,14 @@ - #define MPEG2_ACCEL_X86_MMX 1 - #define MPEG2_ACCEL_X86_3DNOW 2 - #define MPEG2_ACCEL_X86_MMXEXT 4 -+#define MPEG2_ACCEL_X86_SSE2 8 - #define MPEG2_ACCEL_PPC_ALTIVEC 1 - #define MPEG2_ACCEL_ALPHA 1 - #define MPEG2_ACCEL_ALPHA_MVI 2 - #define MPEG2_ACCEL_SPARC_VIS 1 - #define MPEG2_ACCEL_SPARC_VIS2 2 -+#define MPEG2_ACCEL_ARM 1 -+#define MPEG2_ACCEL_ARM_IWMMXT 2 - #define MPEG2_ACCEL_DETECT 0x80000000 - - uint32_t mpeg2_accel (uint32_t accel); ---- libmpeg2/mpeg2_internal.h 2006-06-16 20:12:26.000000000 +0200 -+++ libmpeg2/mpeg2_internal.h 2006-06-16 20:12:50.000000000 +0200 -@@ -144,6 +148,11 @@ - int second_field; - - int mpeg1; -+ -+ int quantizer_scales[32]; -+ int quantizer_scale; -+ char* quant_store; -+ int quant_stride; - }; - - typedef struct { -@@ -214,6 +223,9 @@ - int8_t q_scale_type, scaled[4]; - uint8_t quantizer_matrix[4][64]; - uint8_t new_quantizer_matrix[4][64]; -+ -+ unsigned char *pending_buffer; -+ int pending_length; - }; - - typedef struct { -@@ -298,3 +313,5 @@ - extern mpeg2_mc_t mpeg2_mc_altivec; - extern mpeg2_mc_t mpeg2_mc_alpha; - extern mpeg2_mc_t mpeg2_mc_vis; -+extern mpeg2_mc_t mpeg2_mc_arm; -+extern mpeg2_mc_t mpeg2_mc_iwmmxt; ---- libmpeg2/slice.c 2006-06-16 20:12:26.000000000 +0200 -+++ libmpeg2/slice.c 2006-06-16 20:12:50.000000000 +0200 -@@ -142,6 +146,7 @@ - - quantizer_scale_code = UBITS (bit_buf, 5); - DUMPBITS (bit_buf, bits, 5); -+ decoder->quantizer_scale = decoder->quantizer_scales[quantizer_scale_code]; - - decoder->quantizer_matrix[0] = - decoder->quantizer_prescale[0][quantizer_scale_code]; -@@ -1564,6 +1569,18 @@ - - #define NEXT_MACROBLOCK \ - do { \ -+ if(decoder->quant_store) { \ -+ if (decoder->picture_structure == TOP_FIELD) \ -+ decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \ -+ +(decoder->offset>>4)] = decoder->quantizer_scale; \ -+ else if (decoder->picture_structure == BOTTOM_FIELD) \ -+ decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \ -+ + decoder->quant_stride \ -+ +(decoder->offset>>4)] = decoder->quantizer_scale; \ -+ else \ -+ decoder->quant_store[decoder->quant_stride*(decoder->v_offset>>4) \ -+ +(decoder->offset>>4)] = decoder->quantizer_scale; \ -+ } \ - decoder->offset += 16; \ - if (decoder->offset == decoder->width) { \ - do { /* just so we can use the break statement */ \ -@@ -1587,6 +1604,12 @@ - } \ - } while (0) - -+static void motion_dummy (mpeg2_decoder_t * const decoder, -+ motion_t * const motion, -+ mpeg2_mc_fct * const * const table) -+{ -+} -+ - void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], - uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]) - { -@@ -1644,7 +1667,9 @@ - - if (decoder->mpeg1) { - decoder->motion_parser[0] = motion_zero_420; -+ decoder->motion_parser[MC_FIELD] = motion_dummy; - decoder->motion_parser[MC_FRAME] = motion_mp1; -+ decoder->motion_parser[MC_DMV] = motion_dummy; - decoder->motion_parser[4] = motion_reuse_420; - } else if (decoder->picture_structure == FRAME_PICTURE) { - if (decoder->chroma_format == 0) { ---- libmpeg2/idct_altivec.c 2004/08/02 11:26:43 12933 -+++ libmpeg2/idct_altivec.c 2005/05/15 20:11:34 15484 -@@ -41,7 +41,7 @@ - typedef vector signed int vector_s32_t; - typedef vector unsigned int vector_u32_t; - --#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) -+#if defined(HAVE_ALTIVEC_H) && !defined(__APPLE_CC__) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) - /* work around gcc <3.3 vec_mergel bug */ - static inline vector_s16_t my_vec_mergel (vector_s16_t const A, - vector_s16_t const B) -Index: libmpeg2/motion_comp_arm.c -=================================================================== ---- libmpeg2/motion_comp_arm.c (revision 0) -+++ libmpeg2/motion_comp_arm.c (revision 0) -@@ -0,0 +1,187 @@ -+/* -+ * motion_comp_arm.c -+ * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp> -+ * -+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. -+ * See http://libmpeg2.sourceforge.net/ for updates. -+ * -+ * mpeg2dec is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * mpeg2dec is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ */ -+ -+#include "config.h" -+ -+#ifdef ARCH_ARM -+ -+#include <inttypes.h> -+ -+#include "mpeg2.h" -+#include "attributes.h" -+#include "mpeg2_internal.h" -+ -+#define avg2(a,b) ((a+b+1)>>1) -+#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) -+ -+#define predict_o(i) (ref[i]) -+#define predict_x(i) (avg2 (ref[i], ref[i+1])) -+#define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) -+#define predict_xy(i) (avg4 (ref[i], ref[i+1], \ -+ (ref+stride)[i], (ref+stride)[i+1])) -+ -+#define put(predictor,i) dest[i] = predictor (i) -+#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) -+ -+/* mc function template */ -+ -+#define MC_FUNC(op,xy) \ -+static void inline MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \ -+ const int stride, int height) \ -+{ \ -+ do { \ -+ op (predict_##xy, 0); \ -+ op (predict_##xy, 1); \ -+ op (predict_##xy, 2); \ -+ op (predict_##xy, 3); \ -+ op (predict_##xy, 4); \ -+ op (predict_##xy, 5); \ -+ op (predict_##xy, 6); \ -+ op (predict_##xy, 7); \ -+ op (predict_##xy, 8); \ -+ op (predict_##xy, 9); \ -+ op (predict_##xy, 10); \ -+ op (predict_##xy, 11); \ -+ op (predict_##xy, 12); \ -+ op (predict_##xy, 13); \ -+ op (predict_##xy, 14); \ -+ op (predict_##xy, 15); \ -+ ref += stride; \ -+ dest += stride; \ -+ } while (--height); \ -+} \ -+static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \ -+ const int stride, int height) \ -+{ \ -+ do { \ -+ op (predict_##xy, 0); \ -+ op (predict_##xy, 1); \ -+ op (predict_##xy, 2); \ -+ op (predict_##xy, 3); \ -+ op (predict_##xy, 4); \ -+ op (predict_##xy, 5); \ -+ op (predict_##xy, 6); \ -+ op (predict_##xy, 7); \ -+ ref += stride; \ -+ dest += stride; \ -+ } while (--height); \ -+} \ -+/* definitions of the actual mc functions */ -+ -+MC_FUNC (put,o) -+MC_FUNC (avg,o) -+MC_FUNC (put,x) -+MC_FUNC (avg,x) -+MC_FUNC (put,y) -+MC_FUNC (avg,y) -+MC_FUNC (put,xy) -+MC_FUNC (avg,xy) -+ -+ -+extern void MC_put_o_16_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height); -+ -+extern void MC_put_x_16_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height); -+ -+ -+static void MC_put_y_16_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height) -+{ -+ MC_put_y_16_c(dest, ref, stride, height); -+} -+ -+static void MC_put_xy_16_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height) -+{ -+ MC_put_xy_16_c(dest, ref, stride, height); -+} -+ -+extern void MC_put_o_8_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height); -+ -+extern void MC_put_x_8_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height); -+ -+static void MC_put_y_8_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height) -+{ -+ MC_put_y_8_c(dest, ref, stride, height); -+} -+ -+static void MC_put_xy_8_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height) -+{ -+ MC_put_xy_8_c(dest, ref, stride, height); -+} -+ -+static void MC_avg_o_16_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height) -+{ -+ MC_avg_o_16_c(dest, ref, stride, height); -+} -+ -+static void MC_avg_x_16_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height) -+{ -+ MC_avg_x_16_c(dest, ref, stride, height); -+} -+ -+static void MC_avg_y_16_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height) -+{ -+ MC_avg_y_16_c(dest, ref, stride, height); -+} -+ -+static void MC_avg_xy_16_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height) -+{ -+ MC_avg_xy_16_c(dest, ref, stride, height); -+} -+ -+static void MC_avg_o_8_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height) -+{ -+ MC_avg_o_8_c(dest, ref, stride, height); -+} -+ -+static void MC_avg_x_8_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height) -+{ -+ MC_avg_x_8_c(dest, ref, stride, height); -+} -+ -+static void MC_avg_y_8_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height) -+{ -+ MC_avg_y_8_c(dest, ref, stride, height); -+} -+ -+static void MC_avg_xy_8_arm (uint8_t * dest, const uint8_t * ref, -+ int stride, int height) -+{ -+ MC_avg_xy_8_c(dest, ref, stride, height); -+} -+ -+MPEG2_MC_EXTERN (arm) -+ -+#endif -Index: libmpeg2/motion_comp_arm_s.S -=================================================================== ---- libmpeg2/motion_comp_arm_s.S (revision 0) -+++ libmpeg2/motion_comp_arm_s.S (revision 0) -@@ -0,0 +1,322 @@ -+@ motion_comp_arm_s.S -+@ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp> -+@ -+@ This file is part of mpeg2dec, a free MPEG-2 video stream decoder. -+@ See http://libmpeg2.sourceforge.net/ for updates. -+@ -+@ mpeg2dec is free software; you can redistribute it and/or modify -+@ it under the terms of the GNU General Public License as published by -+@ the Free Software Foundation; either version 2 of the License, or -+@ (at your option) any later version. -+@ -+@ mpeg2dec is distributed in the hope that it will be useful, -+@ but WITHOUT ANY WARRANTY; without even the implied warranty of -+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+@ GNU General Public License for more details. -+@ -+@ You should have received a copy of the GNU General Public License -+@ along with this program; if not, write to the Free Software -+@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ -+ .text -+ -+@ ---------------------------------------------------------------- -+ .align -+ .global MC_put_o_16_arm -+MC_put_o_16_arm: -+ @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) -+ pld [r1] -+ stmfd sp!, {r4-r11, lr} @ R14 is also called LR -+ and r4, r1, #3 -+ adr r5, MC_put_o_16_arm_align_jt -+ add r5, r5, r4, lsl #2 -+ ldr pc, [r5] -+ -+MC_put_o_16_arm_align0: -+ ldmia r1, {r4-r7} -+ add r1, r1, r2 -+ pld [r1] -+ stmia r0, {r4-r7} -+ subs r3, r3, #1 -+ add r0, r0, r2 -+ bne MC_put_o_16_arm_align0 -+ ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. -+ -+.macro PROC shift -+ ldmia r1, {r4-r8} -+ add r1, r1, r2 -+ mov r9, r4, lsr #(\shift) -+ pld [r1] -+ mov r10, r5, lsr #(\shift) -+ orr r9, r9, r5, lsl #(32-\shift) -+ mov r11, r6, lsr #(\shift) -+ orr r10, r10, r6, lsl #(32-\shift) -+ mov r12, r7, lsr #(\shift) -+ orr r11, r11, r7, lsl #(32-\shift) -+ orr r12, r12, r8, lsl #(32-\shift) -+ stmia r0, {r9-r12} -+ subs r3, r3, #1 -+ add r0, r0, r2 -+.endm -+ -+MC_put_o_16_arm_align1: -+ and r1, r1, #0xFFFFFFFC -+1: PROC(8) -+ bne 1b -+ ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. -+MC_put_o_16_arm_align2: -+ and r1, r1, #0xFFFFFFFC -+1: PROC(16) -+ bne 1b -+ ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. -+MC_put_o_16_arm_align3: -+ and r1, r1, #0xFFFFFFFC -+1: PROC(24) -+ bne 1b -+ ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. -+MC_put_o_16_arm_align_jt: -+ .word MC_put_o_16_arm_align0 -+ .word MC_put_o_16_arm_align1 -+ .word MC_put_o_16_arm_align2 -+ .word MC_put_o_16_arm_align3 -+ -+@ ---------------------------------------------------------------- -+ .align -+ .global MC_put_o_8_arm -+MC_put_o_8_arm: -+ @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) -+ pld [r1] -+ stmfd sp!, {r4-r10, lr} @ R14 is also called LR -+ and r4, r1, #3 -+ adr r5, MC_put_o_8_arm_align_jt -+ add r5, r5, r4, lsl #2 -+ ldr pc, [r5] -+MC_put_o_8_arm_align0: -+ ldmia r1, {r4-r5} -+ add r1, r1, r2 -+ pld [r1] -+ stmia r0, {r4-r5} -+ add r0, r0, r2 -+ subs r3, r3, #1 -+ bne MC_put_o_8_arm_align0 -+ ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. -+ -+.macro PROC8 shift -+ ldmia r1, {r4-r6} -+ add r1, r1, r2 -+ mov r9, r4, lsr #(\shift) -+ pld [r1] -+ mov r10, r5, lsr #(\shift) -+ orr r9, r9, r5, lsl #(32-\shift) -+ orr r10, r10, r6, lsl #(32-\shift) -+ stmia r0, {r9-r10} -+ subs r3, r3, #1 -+ add r0, r0, r2 -+.endm -+ -+MC_put_o_8_arm_align1: -+ and r1, r1, #0xFFFFFFFC -+1: PROC8(8) -+ bne 1b -+ ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. -+ -+MC_put_o_8_arm_align2: -+ and r1, r1, #0xFFFFFFFC -+1: PROC8(16) -+ bne 1b -+ ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. -+ -+MC_put_o_8_arm_align3: -+ and r1, r1, #0xFFFFFFFC -+1: PROC8(24) -+ bne 1b -+ ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. -+ -+MC_put_o_8_arm_align_jt: -+ .word MC_put_o_8_arm_align0 -+ .word MC_put_o_8_arm_align1 -+ .word MC_put_o_8_arm_align2 -+ .word MC_put_o_8_arm_align3 -+ -+@ ---------------------------------------------------------------- -+.macro AVG_PW rW1, rW2 -+ mov \rW2, \rW2, lsl #24 -+ orr \rW2, \rW2, \rW1, lsr #8 -+ eor r9, \rW1, \rW2 -+ and \rW2, \rW1, \rW2 -+ and r10, r9, r12 -+ add \rW2, \rW2, r10, lsr #1 -+ and r10, r9, r11 -+ add \rW2, \rW2, r10 -+.endm -+ -+ .align -+ .global MC_put_x_16_arm -+MC_put_x_16_arm: -+ @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) -+ pld [r1] -+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR -+ and r4, r1, #3 -+ adr r5, MC_put_x_16_arm_align_jt -+ ldr r11, [r5] -+ mvn r12, r11 -+ add r5, r5, r4, lsl #2 -+ ldr pc, [r5, #4] -+ -+.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4 -+ mov \R0, \R0, lsr #(\shift) -+ orr \R0, \R0, \R1, lsl #(32 - \shift) -+ mov \R1, \R1, lsr #(\shift) -+ orr \R1, \R1, \R2, lsl #(32 - \shift) -+ mov \R2, \R2, lsr #(\shift) -+ orr \R2, \R2, \R3, lsl #(32 - \shift) -+ mov \R3, \R3, lsr #(\shift) -+ orr \R3, \R3, \R4, lsl #(32 - \shift) -+ mov \R4, \R4, lsr #(\shift) -+@ and \R4, \R4, #0xFF -+.endm -+ -+MC_put_x_16_arm_align0: -+ ldmia r1, {r4-r8} -+ add r1, r1, r2 -+ pld [r1] -+ AVG_PW r7, r8 -+ AVG_PW r6, r7 -+ AVG_PW r5, r6 -+ AVG_PW r4, r5 -+ stmia r0, {r5-r8} -+ subs r3, r3, #1 -+ add r0, r0, r2 -+ bne MC_put_x_16_arm_align0 -+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. -+MC_put_x_16_arm_align1: -+ and r1, r1, #0xFFFFFFFC -+1: ldmia r1, {r4-r8} -+ add r1, r1, r2 -+ pld [r1] -+ ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8 -+ AVG_PW r7, r8 -+ AVG_PW r6, r7 -+ AVG_PW r5, r6 -+ AVG_PW r4, r5 -+ stmia r0, {r5-r8} -+ subs r3, r3, #1 -+ add r0, r0, r2 -+ bne 1b -+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. -+MC_put_x_16_arm_align2: -+ and r1, r1, #0xFFFFFFFC -+1: ldmia r1, {r4-r8} -+ add r1, r1, r2 -+ pld [r1] -+ ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8 -+ AVG_PW r7, r8 -+ AVG_PW r6, r7 -+ AVG_PW r5, r6 -+ AVG_PW r4, r5 -+ stmia r0, {r5-r8} -+ subs r3, r3, #1 -+ add r0, r0, r2 -+ bne 1b -+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. -+MC_put_x_16_arm_align3: -+ and r1, r1, #0xFFFFFFFC -+1: ldmia r1, {r4-r8} -+ add r1, r1, r2 -+ pld [r1] -+ ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8 -+ AVG_PW r7, r8 -+ AVG_PW r6, r7 -+ AVG_PW r5, r6 -+ AVG_PW r4, r5 -+ stmia r0, {r5-r8} -+ subs r3, r3, #1 -+ add r0, r0, r2 -+ bne 1b -+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. -+MC_put_x_16_arm_align_jt: -+ .word 0x01010101 -+ .word MC_put_x_16_arm_align0 -+ .word MC_put_x_16_arm_align1 -+ .word MC_put_x_16_arm_align2 -+ .word MC_put_x_16_arm_align3 -+ -+@ ---------------------------------------------------------------- -+ .align -+ .global MC_put_x_8_arm -+MC_put_x_8_arm: -+ @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) -+ pld [r1] -+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR -+ and r4, r1, #3 -+ adr r5, MC_put_x_8_arm_align_jt -+ ldr r11, [r5] -+ mvn r12, r11 -+ add r5, r5, r4, lsl #2 -+ ldr pc, [r5, #4] -+ -+.macro ADJ_ALIGN_DW shift, R0, R1, R2 -+ mov \R0, \R0, lsr #(\shift) -+ orr \R0, \R0, \R1, lsl #(32 - \shift) -+ mov \R1, \R1, lsr #(\shift) -+ orr \R1, \R1, \R2, lsl #(32 - \shift) -+ mov \R2, \R2, lsr #(\shift) -+@ and \R4, \R4, #0xFF -+.endm -+ -+MC_put_x_8_arm_align0: -+ ldmia r1, {r4-r6} -+ add r1, r1, r2 -+ pld [r1] -+ AVG_PW r5, r6 -+ AVG_PW r4, r5 -+ stmia r0, {r5-r6} -+ subs r3, r3, #1 -+ add r0, r0, r2 -+ bne MC_put_x_8_arm_align0 -+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. -+MC_put_x_8_arm_align1: -+ and r1, r1, #0xFFFFFFFC -+1: ldmia r1, {r4-r6} -+ add r1, r1, r2 -+ pld [r1] -+ ADJ_ALIGN_DW 8, r4, r5, r6 -+ AVG_PW r5, r6 -+ AVG_PW r4, r5 -+ stmia r0, {r5-r6} -+ subs r3, r3, #1 -+ add r0, r0, r2 -+ bne 1b -+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. -+MC_put_x_8_arm_align2: -+ and r1, r1, #0xFFFFFFFC -+1: ldmia r1, {r4-r6} -+ add r1, r1, r2 -+ pld [r1] -+ ADJ_ALIGN_DW 16, r4, r5, r6 -+ AVG_PW r5, r6 -+ AVG_PW r4, r5 -+ stmia r0, {r5-r6} -+ subs r3, r3, #1 -+ add r0, r0, r2 -+ bne 1b -+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. -+MC_put_x_8_arm_align3: -+ and r1, r1, #0xFFFFFFFC -+1: ldmia r1, {r4-r6} -+ add r1, r1, r2 -+ pld [r1] -+ ADJ_ALIGN_DW 24, r4, r5, r6 -+ AVG_PW r5, r6 -+ AVG_PW r4, r5 -+ stmia r0, {r5-r6} -+ subs r3, r3, #1 -+ add r0, r0, r2 -+ bne 1b -+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. -+MC_put_x_8_arm_align_jt: -+ .word 0x01010101 -+ .word MC_put_x_8_arm_align0 -+ .word MC_put_x_8_arm_align1 -+ .word MC_put_x_8_arm_align2 -+ .word MC_put_x_8_arm_align3 -Index: libmpeg2/motion_comp_iwmmxt.c -=================================================================== ---- libmpeg2/motion_comp_iwmmxt.c (revision 0) -+++ libmpeg2/motion_comp_iwmmxt.c (revision 0) -@@ -0,0 +1,59 @@ -+/* -+ * motion_comp_iwmmxt.c -+ * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp> -+ * -+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. -+ * See http://libmpeg2.sourceforge.net/ for updates. -+ * -+ * mpeg2dec is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * mpeg2dec is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ */ -+ -+#include "config.h" -+ -+#if defined(ARCH_ARM) && defined(HAVE_IWMMXT) -+ -+#include <inttypes.h> -+ -+#include "mpeg2.h" -+#include "attributes.h" -+#include "mpeg2_internal.h" -+ -+/* defined in libavcodec */ -+ -+extern void put_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void put_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void put_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void put_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void put_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void put_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void put_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void put_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void avg_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void avg_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void avg_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void avg_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void avg_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void avg_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void avg_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+extern void avg_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); -+ -+mpeg2_mc_t mpeg2_mc_iwmmxt = { -+ {put_pixels16_iwmmxt, put_pixels16_x2_iwmmxt, put_pixels16_y2_iwmmxt, put_pixels16_xy2_iwmmxt, -+ put_pixels8_iwmmxt, put_pixels8_x2_iwmmxt, put_pixels8_y2_iwmmxt, put_pixels8_xy2_iwmmxt}, \ -+ {avg_pixels16_iwmmxt, avg_pixels16_x2_iwmmxt, avg_pixels16_y2_iwmmxt, avg_pixels16_xy2_iwmmxt, -+ avg_pixels8_iwmmxt, avg_pixels8_x2_iwmmxt, avg_pixels8_y2_iwmmxt, avg_pixels8_xy2_iwmmxt}, \ -+}; -+ -+#endif /* defined(ARCH_ARM) && defined(HAVE_IWMMXT) */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpeg2/libmpeg2_changes.diff Sat Sep 13 14:23:45 2008 +0000 @@ -0,0 +1,985 @@ +--- libmpeg2/cpu_accel.c 2006-06-16 20:12:26.000000000 +0200 ++++ libmpeg2/cpu_accel.c 2006-06-16 20:12:50.000000000 +0200 +@@ -22,6 +26,7 @@ + */ + + #include "config.h" ++#include "cpudetect.h" + + #include <inttypes.h> + +@@ -30,9 +35,17 @@ + #include "mpeg2_internal.h" + + #ifdef ACCEL_DETECT +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) ++ ++/* MPlayer imports libmpeg2 as decoder, which detects MMX / 3DNow! ++ * instructions via assembly. However, it is regarded as duplicated work ++ * in MPlayer, so that we enforce using MPlayer's implementation. ++ */ ++#define MPLAYER_CPUDETECT ++ + static inline uint32_t arch_accel (void) + { ++#if !defined(MPLAYER_CPUDETECT) + uint32_t eax, ebx, ecx, edx; + int AMD; + uint32_t caps; +@@ -107,8 +120,22 @@ + caps |= MPEG2_ACCEL_X86_MMXEXT; + + return caps; ++#else /* MPLAYER_CPUDETECT: Use MPlayer's CPU capability property. */ ++ caps = 0; ++ if (gCpuCaps.hasMMX) ++ caps |= MPEG2_ACCEL_X86_MMX; ++ if (gCpuCaps.hasSSE2) ++ caps |= MPEG2_ACCEL_X86_SSE2; ++ if (gCpuCaps.hasMMX2) ++ caps |= MPEG2_ACCEL_X86_MMXEXT; ++ if (gCpuCaps.has3DNow) ++ caps |= MPEG2_ACCEL_X86_3DNOW; ++ ++ return caps; ++ ++#endif /* MPLAYER_CPUDETECT */ + } +-#endif /* ARCH_X86 */ ++#endif /* ARCH_X86 || ARCH_X86_64 */ + + #if defined(ARCH_PPC) || defined(ARCH_SPARC) + #include <signal.h> +@@ -214,7 +241,7 @@ + + accel = 0; + #ifdef ACCEL_DETECT +-#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) ++#if defined (ARCH_X86) || defined (ARCH_X86_64) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) + accel = arch_accel (); + #endif + #endif +--- libmpeg2/cpu_state.c 2006-06-16 20:12:26.000000000 +0200 ++++ libmpeg2/cpu_state.c 2006-06-16 20:12:50.000000000 +0200 +@@ -29,14 +33,14 @@ + #include "mpeg2.h" + #include "attributes.h" + #include "mpeg2_internal.h" +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + #include "mmx.h" + #endif + + void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL; + void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL; + +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + static void state_restore_mmx (cpu_state_t * state) + { + emms (); +@@ -44,18 +48,18 @@ + #endif + + #ifdef ARCH_PPC +-#ifdef HAVE_ALTIVEC_H /* gnu */ +-#define LI(a,b) "li " #a "," #b "\n\t" +-#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" +-#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t" +-#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t" +-#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t" +-#else /* apple */ ++#if defined(__APPLE_CC__) /* apple */ + #define LI(a,b) "li r" #a "," #b "\n\t" + #define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t" + #define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t" + #define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t" + #define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t" ++#else /* gnu */ ++#define LI(a,b) "li " #a "," #b "\n\t" ++#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" ++#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t" ++#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t" ++#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t" + #endif + + static void state_save_altivec (cpu_state_t * state) +@@ -115,7 +119,7 @@ + + void mpeg2_cpu_state_init (uint32_t accel) + { +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + if (accel & MPEG2_ACCEL_X86_MMX) { + mpeg2_cpu_state_restore = state_restore_mmx; + } +--- libmpeg2/decode.c 2006-06-16 20:12:26.000000000 +0200 ++++ libmpeg2/decode.c 2006-06-16 20:12:50.000000000 +0200 +@@ -351,6 +355,15 @@ + fbuf->buf[1] = buf[1]; + fbuf->buf[2] = buf[2]; + fbuf->id = id; ++ // HACK! FIXME! At first I frame, copy pointers to prediction frame too! ++ if (mpeg2dec->custom_fbuf && !mpeg2dec->fbuf[1]->buf[0]){ ++ mpeg2dec->fbuf[1]->buf[0]=buf[0]; ++ mpeg2dec->fbuf[1]->buf[1]=buf[1]; ++ mpeg2dec->fbuf[1]->buf[2]=buf[2]; ++ mpeg2dec->fbuf[1]->id=NULL; ++ } ++// printf("libmpeg2: FBUF 0:%p 1:%p 2:%p\n", ++// mpeg2dec->fbuf[0]->buf[0],mpeg2dec->fbuf[1]->buf[0],mpeg2dec->fbuf[2]->buf[0]); + } + + void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf) +--- libmpeg2/header.c 2006-06-16 20:12:26.000000000 +0200 ++++ libmpeg2/header.c 2006-06-16 20:12:50.000000000 +0200 +@@ -100,6 +104,9 @@ + mpeg2dec->decoder.convert = NULL; + mpeg2dec->decoder.convert_id = NULL; + mpeg2dec->picture = mpeg2dec->pictures; ++ memset(&mpeg2dec->fbuf_alloc[0].fbuf, 0, sizeof(mpeg2_fbuf_t)); ++ memset(&mpeg2dec->fbuf_alloc[1].fbuf, 0, sizeof(mpeg2_fbuf_t)); ++ memset(&mpeg2dec->fbuf_alloc[2].fbuf, 0, sizeof(mpeg2_fbuf_t)); + mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf; + mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf; + mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf; +@@ -553,6 +560,7 @@ + if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) { + picture->nb_fields = (buffer[3] & 2) ? 3 : 2; + flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0; ++ flags |= (buffer[3] & 2) ? PIC_FLAG_REPEAT_FIRST_FIELD : 0; + } else + picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2; + break; +@@ -801,6 +809,7 @@ + mpeg2dec->scaled[index] = mpeg2dec->q_scale_type; + for (i = 0; i < 32; i++) { + k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1); ++ decoder->quantizer_scales[i] = k; + for (j = 0; j < 64; j++) + decoder->quantizer_prescale[index][i][j] = + k * mpeg2dec->quantizer_matrix[index][j]; +--- libmpeg2/idct.c (revision 26652) ++++ libmpeg2/idct.c (working copy) +@@ -235,30 +239,40 @@ + + void mpeg2_idct_init (uint32_t accel) + { +-#ifdef ARCH_X86 ++#ifdef HAVE_SSE2 ++ if (accel & MPEG2_ACCEL_X86_SSE2) { ++ mpeg2_idct_copy = mpeg2_idct_copy_sse2; ++ mpeg2_idct_add = mpeg2_idct_add_sse2; ++ mpeg2_idct_mmx_init (); ++ } else ++#elif HAVE_MMX2 + if (accel & MPEG2_ACCEL_X86_MMXEXT) { + mpeg2_idct_copy = mpeg2_idct_copy_mmxext; + mpeg2_idct_add = mpeg2_idct_add_mmxext; + mpeg2_idct_mmx_init (); +- } else if (accel & MPEG2_ACCEL_X86_MMX) { ++ } else ++#elif HAVE_MMX ++ if (accel & MPEG2_ACCEL_X86_MMX) { + mpeg2_idct_copy = mpeg2_idct_copy_mmx; + mpeg2_idct_add = mpeg2_idct_add_mmx; + mpeg2_idct_mmx_init (); + } else + #endif +-#ifdef ARCH_PPC ++#ifdef HAVE_ALTIVEC + if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { + mpeg2_idct_copy = mpeg2_idct_copy_altivec; + mpeg2_idct_add = mpeg2_idct_add_altivec; + mpeg2_idct_altivec_init (); + } else + #endif +-#ifdef ARCH_ALPHA ++#ifdef HAVE_VIS + if (accel & MPEG2_ACCEL_ALPHA_MVI) { + mpeg2_idct_copy = mpeg2_idct_copy_mvi; + mpeg2_idct_add = mpeg2_idct_add_mvi; + mpeg2_idct_alpha_init (); +- } else if (accel & MPEG2_ACCEL_ALPHA) { ++ } else ++#elif ARCH_ALPHA ++ if (accel & MPEG2_ACCEL_ALPHA) { + int i; + + mpeg2_idct_copy = mpeg2_idct_copy_alpha; +--- libmpeg2/idct_mmx.c 2006-06-16 20:12:26.000000000 +0200 ++++ libmpeg2/idct_mmx.c 2006-06-16 20:12:50.000000000 +0200 +@@ -23,7 +27,7 @@ + + #include "config.h" + +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + + #include <inttypes.h> + +--- libmpeg2/motion_comp.c 2006-06-16 20:12:26.000000000 +0200 ++++ libmpeg2/motion_comp.c 2006-06-16 20:12:50.000000000 +0200 +@@ -33,16 +37,22 @@ + + void mpeg2_mc_init (uint32_t accel) + { +-#ifdef ARCH_X86 ++#ifdef HAVE_MMX2 + if (accel & MPEG2_ACCEL_X86_MMXEXT) + mpeg2_mc = mpeg2_mc_mmxext; +- else if (accel & MPEG2_ACCEL_X86_3DNOW) ++ else ++#endif ++#ifdef HAVE_3DNOW ++ if (accel & MPEG2_ACCEL_X86_3DNOW) + mpeg2_mc = mpeg2_mc_3dnow; +- else if (accel & MPEG2_ACCEL_X86_MMX) ++ else ++#endif ++#ifdef HAVE_MMX ++ if (accel & MPEG2_ACCEL_X86_MMX) + mpeg2_mc = mpeg2_mc_mmx; + else + #endif +-#ifdef ARCH_PPC ++#ifdef HAVE_ALTIVEC + if (accel & MPEG2_ACCEL_PPC_ALTIVEC) + mpeg2_mc = mpeg2_mc_altivec; + else +@@ -52,11 +62,21 @@ + mpeg2_mc = mpeg2_mc_alpha; + else + #endif +-#ifdef ARCH_SPARC ++#ifdef HAVE_VIS + if (accel & MPEG2_ACCEL_SPARC_VIS) + mpeg2_mc = mpeg2_mc_vis; + else + #endif ++#ifdef ARCH_ARM ++#ifdef HAVE_IWMMXT ++ if (accel & MPEG2_ACCEL_ARM_IWMMXT) ++ mpeg2_mc = mpeg2_mc_iwmmxt; ++ else ++#endif ++ if (accel & MPEG2_ACCEL_ARM) ++ mpeg2_mc = mpeg2_mc_arm; ++ else ++#endif + mpeg2_mc = mpeg2_mc_c; + } + +--- libmpeg2/motion_comp_mmx.c 2006-06-16 20:12:26.000000000 +0200 ++++ libmpeg2/motion_comp_mmx.c 2006-06-16 20:12:50.000000000 +0200 +@@ -23,7 +27,7 @@ + + #include "config.h" + +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + + #include <inttypes.h> + +--- include/mpeg2.h 2006-06-16 20:12:26.000000000 +0200 ++++ libmpeg2/mpeg2.h 2006-06-16 20:12:50.000000000 +0200 +@@ -82,6 +86,7 @@ + #define PIC_FLAG_COMPOSITE_DISPLAY 32 + #define PIC_FLAG_SKIP 64 + #define PIC_FLAG_TAGS 128 ++#define PIC_FLAG_REPEAT_FIRST_FIELD 256 + #define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000 + + typedef struct mpeg2_picture_s { +@@ -154,11 +159,14 @@ + #define MPEG2_ACCEL_X86_MMX 1 + #define MPEG2_ACCEL_X86_3DNOW 2 + #define MPEG2_ACCEL_X86_MMXEXT 4 ++#define MPEG2_ACCEL_X86_SSE2 8 + #define MPEG2_ACCEL_PPC_ALTIVEC 1 + #define MPEG2_ACCEL_ALPHA 1 + #define MPEG2_ACCEL_ALPHA_MVI 2 + #define MPEG2_ACCEL_SPARC_VIS 1 + #define MPEG2_ACCEL_SPARC_VIS2 2 ++#define MPEG2_ACCEL_ARM 1 ++#define MPEG2_ACCEL_ARM_IWMMXT 2 + #define MPEG2_ACCEL_DETECT 0x80000000 + + uint32_t mpeg2_accel (uint32_t accel); +--- libmpeg2/mpeg2_internal.h 2006-06-16 20:12:26.000000000 +0200 ++++ libmpeg2/mpeg2_internal.h 2006-06-16 20:12:50.000000000 +0200 +@@ -144,6 +148,11 @@ + int second_field; + + int mpeg1; ++ ++ int quantizer_scales[32]; ++ int quantizer_scale; ++ char* quant_store; ++ int quant_stride; + }; + + typedef struct { +@@ -214,6 +223,9 @@ + int8_t q_scale_type, scaled[4]; + uint8_t quantizer_matrix[4][64]; + uint8_t new_quantizer_matrix[4][64]; ++ ++ unsigned char *pending_buffer; ++ int pending_length; + }; + + typedef struct { +@@ -298,3 +313,5 @@ + extern mpeg2_mc_t mpeg2_mc_altivec; + extern mpeg2_mc_t mpeg2_mc_alpha; + extern mpeg2_mc_t mpeg2_mc_vis; ++extern mpeg2_mc_t mpeg2_mc_arm; ++extern mpeg2_mc_t mpeg2_mc_iwmmxt; +--- libmpeg2/slice.c 2006-06-16 20:12:26.000000000 +0200 ++++ libmpeg2/slice.c 2006-06-16 20:12:50.000000000 +0200 +@@ -142,6 +146,7 @@ + + quantizer_scale_code = UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, 5); ++ decoder->quantizer_scale = decoder->quantizer_scales[quantizer_scale_code]; + + decoder->quantizer_matrix[0] = + decoder->quantizer_prescale[0][quantizer_scale_code]; +@@ -1564,6 +1569,18 @@ + + #define NEXT_MACROBLOCK \ + do { \ ++ if(decoder->quant_store) { \ ++ if (decoder->picture_structure == TOP_FIELD) \ ++ decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \ ++ +(decoder->offset>>4)] = decoder->quantizer_scale; \ ++ else if (decoder->picture_structure == BOTTOM_FIELD) \ ++ decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \ ++ + decoder->quant_stride \ ++ +(decoder->offset>>4)] = decoder->quantizer_scale; \ ++ else \ ++ decoder->quant_store[decoder->quant_stride*(decoder->v_offset>>4) \ ++ +(decoder->offset>>4)] = decoder->quantizer_scale; \ ++ } \ + decoder->offset += 16; \ + if (decoder->offset == decoder->width) { \ + do { /* just so we can use the break statement */ \ +@@ -1587,6 +1604,12 @@ + } \ + } while (0) + ++static void motion_dummy (mpeg2_decoder_t * const decoder, ++ motion_t * const motion, ++ mpeg2_mc_fct * const * const table) ++{ ++} ++ + void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], + uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]) + { +@@ -1644,7 +1667,9 @@ + + if (decoder->mpeg1) { + decoder->motion_parser[0] = motion_zero_420; ++ decoder->motion_parser[MC_FIELD] = motion_dummy; + decoder->motion_parser[MC_FRAME] = motion_mp1; ++ decoder->motion_parser[MC_DMV] = motion_dummy; + decoder->motion_parser[4] = motion_reuse_420; + } else if (decoder->picture_structure == FRAME_PICTURE) { + if (decoder->chroma_format == 0) { +--- libmpeg2/idct_altivec.c 2004/08/02 11:26:43 12933 ++++ libmpeg2/idct_altivec.c 2005/05/15 20:11:34 15484 +@@ -41,7 +41,7 @@ + typedef vector signed int vector_s32_t; + typedef vector unsigned int vector_u32_t; + +-#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) ++#if defined(HAVE_ALTIVEC_H) && !defined(__APPLE_CC__) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) + /* work around gcc <3.3 vec_mergel bug */ + static inline vector_s16_t my_vec_mergel (vector_s16_t const A, + vector_s16_t const B) +Index: libmpeg2/motion_comp_arm.c +=================================================================== +--- libmpeg2/motion_comp_arm.c (revision 0) ++++ libmpeg2/motion_comp_arm.c (revision 0) +@@ -0,0 +1,187 @@ ++/* ++ * motion_comp_arm.c ++ * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp> ++ * ++ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. ++ * See http://libmpeg2.sourceforge.net/ for updates. ++ * ++ * mpeg2dec is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * mpeg2dec is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#include "config.h" ++ ++#ifdef ARCH_ARM ++ ++#include <inttypes.h> ++ ++#include "mpeg2.h" ++#include "attributes.h" ++#include "mpeg2_internal.h" ++ ++#define avg2(a,b) ((a+b+1)>>1) ++#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) ++ ++#define predict_o(i) (ref[i]) ++#define predict_x(i) (avg2 (ref[i], ref[i+1])) ++#define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) ++#define predict_xy(i) (avg4 (ref[i], ref[i+1], \ ++ (ref+stride)[i], (ref+stride)[i+1])) ++ ++#define put(predictor,i) dest[i] = predictor (i) ++#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) ++ ++/* mc function template */ ++ ++#define MC_FUNC(op,xy) \ ++static void inline MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \ ++ const int stride, int height) \ ++{ \ ++ do { \ ++ op (predict_##xy, 0); \ ++ op (predict_##xy, 1); \ ++ op (predict_##xy, 2); \ ++ op (predict_##xy, 3); \ ++ op (predict_##xy, 4); \ ++ op (predict_##xy, 5); \ ++ op (predict_##xy, 6); \ ++ op (predict_##xy, 7); \ ++ op (predict_##xy, 8); \ ++ op (predict_##xy, 9); \ ++ op (predict_##xy, 10); \ ++ op (predict_##xy, 11); \ ++ op (predict_##xy, 12); \ ++ op (predict_##xy, 13); \ ++ op (predict_##xy, 14); \ ++ op (predict_##xy, 15); \ ++ ref += stride; \ ++ dest += stride; \ ++ } while (--height); \ ++} \ ++static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \ ++ const int stride, int height) \ ++{ \ ++ do { \ ++ op (predict_##xy, 0); \ ++ op (predict_##xy, 1); \ ++ op (predict_##xy, 2); \ ++ op (predict_##xy, 3); \ ++ op (predict_##xy, 4); \ ++ op (predict_##xy, 5); \ ++ op (predict_##xy, 6); \ ++ op (predict_##xy, 7); \ ++ ref += stride; \ ++ dest += stride; \ ++ } while (--height); \ ++} \ ++/* definitions of the actual mc functions */ ++ ++MC_FUNC (put,o) ++MC_FUNC (avg,o) ++MC_FUNC (put,x) ++MC_FUNC (avg,x) ++MC_FUNC (put,y) ++MC_FUNC (avg,y) ++MC_FUNC (put,xy) ++MC_FUNC (avg,xy) ++ ++ ++extern void MC_put_o_16_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height); ++ ++extern void MC_put_x_16_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height); ++ ++ ++static void MC_put_y_16_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height) ++{ ++ MC_put_y_16_c(dest, ref, stride, height); ++} ++ ++static void MC_put_xy_16_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height) ++{ ++ MC_put_xy_16_c(dest, ref, stride, height); ++} ++ ++extern void MC_put_o_8_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height); ++ ++extern void MC_put_x_8_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height); ++ ++static void MC_put_y_8_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height) ++{ ++ MC_put_y_8_c(dest, ref, stride, height); ++} ++ ++static void MC_put_xy_8_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height) ++{ ++ MC_put_xy_8_c(dest, ref, stride, height); ++} ++ ++static void MC_avg_o_16_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height) ++{ ++ MC_avg_o_16_c(dest, ref, stride, height); ++} ++ ++static void MC_avg_x_16_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height) ++{ ++ MC_avg_x_16_c(dest, ref, stride, height); ++} ++ ++static void MC_avg_y_16_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height) ++{ ++ MC_avg_y_16_c(dest, ref, stride, height); ++} ++ ++static void MC_avg_xy_16_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height) ++{ ++ MC_avg_xy_16_c(dest, ref, stride, height); ++} ++ ++static void MC_avg_o_8_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height) ++{ ++ MC_avg_o_8_c(dest, ref, stride, height); ++} ++ ++static void MC_avg_x_8_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height) ++{ ++ MC_avg_x_8_c(dest, ref, stride, height); ++} ++ ++static void MC_avg_y_8_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height) ++{ ++ MC_avg_y_8_c(dest, ref, stride, height); ++} ++ ++static void MC_avg_xy_8_arm (uint8_t * dest, const uint8_t * ref, ++ int stride, int height) ++{ ++ MC_avg_xy_8_c(dest, ref, stride, height); ++} ++ ++MPEG2_MC_EXTERN (arm) ++ ++#endif +Index: libmpeg2/motion_comp_arm_s.S +=================================================================== +--- libmpeg2/motion_comp_arm_s.S (revision 0) ++++ libmpeg2/motion_comp_arm_s.S (revision 0) +@@ -0,0 +1,322 @@ ++@ motion_comp_arm_s.S ++@ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp> ++@ ++@ This file is part of mpeg2dec, a free MPEG-2 video stream decoder. ++@ See http://libmpeg2.sourceforge.net/ for updates. ++@ ++@ mpeg2dec is free software; you can redistribute it and/or modify ++@ it under the terms of the GNU General Public License as published by ++@ the Free Software Foundation; either version 2 of the License, or ++@ (at your option) any later version. ++@ ++@ mpeg2dec is distributed in the hope that it will be useful, ++@ but WITHOUT ANY WARRANTY; without even the implied warranty of ++@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++@ GNU General Public License for more details. ++@ ++@ You should have received a copy of the GNU General Public License ++@ along with this program; if not, write to the Free Software ++@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ ++ .text ++ ++@ ---------------------------------------------------------------- ++ .align ++ .global MC_put_o_16_arm ++MC_put_o_16_arm: ++ @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) ++ pld [r1] ++ stmfd sp!, {r4-r11, lr} @ R14 is also called LR ++ and r4, r1, #3 ++ adr r5, MC_put_o_16_arm_align_jt ++ add r5, r5, r4, lsl #2 ++ ldr pc, [r5] ++ ++MC_put_o_16_arm_align0: ++ ldmia r1, {r4-r7} ++ add r1, r1, r2 ++ pld [r1] ++ stmia r0, {r4-r7} ++ subs r3, r3, #1 ++ add r0, r0, r2 ++ bne MC_put_o_16_arm_align0 ++ ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. ++ ++.macro PROC shift ++ ldmia r1, {r4-r8} ++ add r1, r1, r2 ++ mov r9, r4, lsr #(\shift) ++ pld [r1] ++ mov r10, r5, lsr #(\shift) ++ orr r9, r9, r5, lsl #(32-\shift) ++ mov r11, r6, lsr #(\shift) ++ orr r10, r10, r6, lsl #(32-\shift) ++ mov r12, r7, lsr #(\shift) ++ orr r11, r11, r7, lsl #(32-\shift) ++ orr r12, r12, r8, lsl #(32-\shift) ++ stmia r0, {r9-r12} ++ subs r3, r3, #1 ++ add r0, r0, r2 ++.endm ++ ++MC_put_o_16_arm_align1: ++ and r1, r1, #0xFFFFFFFC ++1: PROC(8) ++ bne 1b ++ ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. ++MC_put_o_16_arm_align2: ++ and r1, r1, #0xFFFFFFFC ++1: PROC(16) ++ bne 1b ++ ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. ++MC_put_o_16_arm_align3: ++ and r1, r1, #0xFFFFFFFC ++1: PROC(24) ++ bne 1b ++ ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. ++MC_put_o_16_arm_align_jt: ++ .word MC_put_o_16_arm_align0 ++ .word MC_put_o_16_arm_align1 ++ .word MC_put_o_16_arm_align2 ++ .word MC_put_o_16_arm_align3 ++ ++@ ---------------------------------------------------------------- ++ .align ++ .global MC_put_o_8_arm ++MC_put_o_8_arm: ++ @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) ++ pld [r1] ++ stmfd sp!, {r4-r10, lr} @ R14 is also called LR ++ and r4, r1, #3 ++ adr r5, MC_put_o_8_arm_align_jt ++ add r5, r5, r4, lsl #2 ++ ldr pc, [r5] ++MC_put_o_8_arm_align0: ++ ldmia r1, {r4-r5} ++ add r1, r1, r2 ++ pld [r1] ++ stmia r0, {r4-r5} ++ add r0, r0, r2 ++ subs r3, r3, #1 ++ bne MC_put_o_8_arm_align0 ++ ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. ++ ++.macro PROC8 shift ++ ldmia r1, {r4-r6} ++ add r1, r1, r2 ++ mov r9, r4, lsr #(\shift) ++ pld [r1] ++ mov r10, r5, lsr #(\shift) ++ orr r9, r9, r5, lsl #(32-\shift) ++ orr r10, r10, r6, lsl #(32-\shift) ++ stmia r0, {r9-r10} ++ subs r3, r3, #1 ++ add r0, r0, r2 ++.endm ++ ++MC_put_o_8_arm_align1: ++ and r1, r1, #0xFFFFFFFC ++1: PROC8(8) ++ bne 1b ++ ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. ++ ++MC_put_o_8_arm_align2: ++ and r1, r1, #0xFFFFFFFC ++1: PROC8(16) ++ bne 1b ++ ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. ++ ++MC_put_o_8_arm_align3: ++ and r1, r1, #0xFFFFFFFC ++1: PROC8(24) ++ bne 1b ++ ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. ++ ++MC_put_o_8_arm_align_jt: ++ .word MC_put_o_8_arm_align0 ++ .word MC_put_o_8_arm_align1 ++ .word MC_put_o_8_arm_align2 ++ .word MC_put_o_8_arm_align3 ++ ++@ ---------------------------------------------------------------- ++.macro AVG_PW rW1, rW2 ++ mov \rW2, \rW2, lsl #24 ++ orr \rW2, \rW2, \rW1, lsr #8 ++ eor r9, \rW1, \rW2 ++ and \rW2, \rW1, \rW2 ++ and r10, r9, r12 ++ add \rW2, \rW2, r10, lsr #1 ++ and r10, r9, r11 ++ add \rW2, \rW2, r10 ++.endm ++ ++ .align ++ .global MC_put_x_16_arm ++MC_put_x_16_arm: ++ @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) ++ pld [r1] ++ stmfd sp!, {r4-r11,lr} @ R14 is also called LR ++ and r4, r1, #3 ++ adr r5, MC_put_x_16_arm_align_jt ++ ldr r11, [r5] ++ mvn r12, r11 ++ add r5, r5, r4, lsl #2 ++ ldr pc, [r5, #4] ++ ++.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4 ++ mov \R0, \R0, lsr #(\shift) ++ orr \R0, \R0, \R1, lsl #(32 - \shift) ++ mov \R1, \R1, lsr #(\shift) ++ orr \R1, \R1, \R2, lsl #(32 - \shift) ++ mov \R2, \R2, lsr #(\shift) ++ orr \R2, \R2, \R3, lsl #(32 - \shift) ++ mov \R3, \R3, lsr #(\shift) ++ orr \R3, \R3, \R4, lsl #(32 - \shift) ++ mov \R4, \R4, lsr #(\shift) ++@ and \R4, \R4, #0xFF ++.endm ++ ++MC_put_x_16_arm_align0: ++ ldmia r1, {r4-r8} ++ add r1, r1, r2 ++ pld [r1] ++ AVG_PW r7, r8 ++ AVG_PW r6, r7 ++ AVG_PW r5, r6 ++ AVG_PW r4, r5 ++ stmia r0, {r5-r8} ++ subs r3, r3, #1 ++ add r0, r0, r2 ++ bne MC_put_x_16_arm_align0 ++ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. ++MC_put_x_16_arm_align1: ++ and r1, r1, #0xFFFFFFFC ++1: ldmia r1, {r4-r8} ++ add r1, r1, r2 ++ pld [r1] ++ ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8 ++ AVG_PW r7, r8 ++ AVG_PW r6, r7 ++ AVG_PW r5, r6 ++ AVG_PW r4, r5 ++ stmia r0, {r5-r8} ++ subs r3, r3, #1 ++ add r0, r0, r2 ++ bne 1b ++ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. ++MC_put_x_16_arm_align2: ++ and r1, r1, #0xFFFFFFFC ++1: ldmia r1, {r4-r8} ++ add r1, r1, r2 ++ pld [r1] ++ ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8 ++ AVG_PW r7, r8 ++ AVG_PW r6, r7 ++ AVG_PW r5, r6 ++ AVG_PW r4, r5 ++ stmia r0, {r5-r8} ++ subs r3, r3, #1 ++ add r0, r0, r2 ++ bne 1b ++ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. ++MC_put_x_16_arm_align3: ++ and r1, r1, #0xFFFFFFFC ++1: ldmia r1, {r4-r8} ++ add r1, r1, r2 ++ pld [r1] ++ ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8 ++ AVG_PW r7, r8 ++ AVG_PW r6, r7 ++ AVG_PW r5, r6 ++ AVG_PW r4, r5 ++ stmia r0, {r5-r8} ++ subs r3, r3, #1 ++ add r0, r0, r2 ++ bne 1b ++ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. ++MC_put_x_16_arm_align_jt: ++ .word 0x01010101 ++ .word MC_put_x_16_arm_align0 ++ .word MC_put_x_16_arm_align1 ++ .word MC_put_x_16_arm_align2 ++ .word MC_put_x_16_arm_align3 ++ ++@ ---------------------------------------------------------------- ++ .align ++ .global MC_put_x_8_arm ++MC_put_x_8_arm: ++ @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) ++ pld [r1] ++ stmfd sp!, {r4-r11,lr} @ R14 is also called LR ++ and r4, r1, #3 ++ adr r5, MC_put_x_8_arm_align_jt ++ ldr r11, [r5] ++ mvn r12, r11 ++ add r5, r5, r4, lsl #2 ++ ldr pc, [r5, #4] ++ ++.macro ADJ_ALIGN_DW shift, R0, R1, R2 ++ mov \R0, \R0, lsr #(\shift) ++ orr \R0, \R0, \R1, lsl #(32 - \shift) ++ mov \R1, \R1, lsr #(\shift) ++ orr \R1, \R1, \R2, lsl #(32 - \shift) ++ mov \R2, \R2, lsr #(\shift) ++@ and \R4, \R4, #0xFF ++.endm ++ ++MC_put_x_8_arm_align0: ++ ldmia r1, {r4-r6} ++ add r1, r1, r2 ++ pld [r1] ++ AVG_PW r5, r6 ++ AVG_PW r4, r5 ++ stmia r0, {r5-r6} ++ subs r3, r3, #1 ++ add r0, r0, r2 ++ bne MC_put_x_8_arm_align0 ++ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. ++MC_put_x_8_arm_align1: ++ and r1, r1, #0xFFFFFFFC ++1: ldmia r1, {r4-r6} ++ add r1, r1, r2 ++ pld [r1] ++ ADJ_ALIGN_DW 8, r4, r5, r6 ++ AVG_PW r5, r6 ++ AVG_PW r4, r5 ++ stmia r0, {r5-r6} ++ subs r3, r3, #1 ++ add r0, r0, r2 ++ bne 1b ++ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. ++MC_put_x_8_arm_align2: ++ and r1, r1, #0xFFFFFFFC ++1: ldmia r1, {r4-r6} ++ add r1, r1, r2 ++ pld [r1] ++ ADJ_ALIGN_DW 16, r4, r5, r6 ++ AVG_PW r5, r6 ++ AVG_PW r4, r5 ++ stmia r0, {r5-r6} ++ subs r3, r3, #1 ++ add r0, r0, r2 ++ bne 1b ++ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. ++MC_put_x_8_arm_align3: ++ and r1, r1, #0xFFFFFFFC ++1: ldmia r1, {r4-r6} ++ add r1, r1, r2 ++ pld [r1] ++ ADJ_ALIGN_DW 24, r4, r5, r6 ++ AVG_PW r5, r6 ++ AVG_PW r4, r5 ++ stmia r0, {r5-r6} ++ subs r3, r3, #1 ++ add r0, r0, r2 ++ bne 1b ++ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. ++MC_put_x_8_arm_align_jt: ++ .word 0x01010101 ++ .word MC_put_x_8_arm_align0 ++ .word MC_put_x_8_arm_align1 ++ .word MC_put_x_8_arm_align2 ++ .word MC_put_x_8_arm_align3 +Index: libmpeg2/motion_comp_iwmmxt.c +=================================================================== +--- libmpeg2/motion_comp_iwmmxt.c (revision 0) ++++ libmpeg2/motion_comp_iwmmxt.c (revision 0) +@@ -0,0 +1,59 @@ ++/* ++ * motion_comp_iwmmxt.c ++ * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp> ++ * ++ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. ++ * See http://libmpeg2.sourceforge.net/ for updates. ++ * ++ * mpeg2dec is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * mpeg2dec is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#include "config.h" ++ ++#if defined(ARCH_ARM) && defined(HAVE_IWMMXT) ++ ++#include <inttypes.h> ++ ++#include "mpeg2.h" ++#include "attributes.h" ++#include "mpeg2_internal.h" ++ ++/* defined in libavcodec */ ++ ++extern void put_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void put_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void put_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void put_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void put_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void put_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void put_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void put_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void avg_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void avg_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void avg_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void avg_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void avg_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void avg_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void avg_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++extern void avg_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); ++ ++mpeg2_mc_t mpeg2_mc_iwmmxt = { ++ {put_pixels16_iwmmxt, put_pixels16_x2_iwmmxt, put_pixels16_y2_iwmmxt, put_pixels16_xy2_iwmmxt, ++ put_pixels8_iwmmxt, put_pixels8_x2_iwmmxt, put_pixels8_y2_iwmmxt, put_pixels8_xy2_iwmmxt}, \ ++ {avg_pixels16_iwmmxt, avg_pixels16_x2_iwmmxt, avg_pixels16_y2_iwmmxt, avg_pixels16_xy2_iwmmxt, ++ avg_pixels8_iwmmxt, avg_pixels8_x2_iwmmxt, avg_pixels8_y2_iwmmxt, avg_pixels8_xy2_iwmmxt}, \ ++}; ++ ++#endif /* defined(ARCH_ARM) && defined(HAVE_IWMMXT) */