Mercurial > mplayer.hg
view libmpeg2/libmpeg-0.4.1.diff @ 23349:ec03399a42bb
little alignment fixes
author | ptt |
---|---|
date | Mon, 21 May 2007 16:01:04 +0000 |
parents | e42491f6fa84 |
children | d198ab45f7c9 |
line wrap: on
line source
--- include/attributes.h 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/attributes.h 2006-06-16 20:12:50.000000000 +0200 @@ -25,7 +29,7 @@ #ifdef ATTRIBUTE_ALIGNED_MAX #define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((ATTRIBUTE_ALIGNED_MAX < align) ? ATTRIBUTE_ALIGNED_MAX : align))) #else -#define ATTR_ALIGN(align) +#define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((16 < align) ? 16 : align))) #endif #ifdef HAVE_BUILTIN_EXPECT --- libmpeg2/cpu_accel.c 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/cpu_accel.c 2006-06-16 20:12:50.000000000 +0200 @@ -22,6 +26,7 @@ */ #include "config.h" +#include "cpudetect.h" #include <inttypes.h> @@ -30,9 +35,17 @@ #include "mpeg2_internal.h" #ifdef ACCEL_DETECT -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) + +/* MPlayer imports libmpeg2 as decoder, which detects MMX / 3DNow! + * instructions via assembly. However, it is regarded as duplicaed work + * in MPlayer, so that we enforce to use MPlayer's implementation. + */ +#define USE_MPLAYER_CPUDETECT + static inline uint32_t arch_accel (void) { +#if !defined(USE_MPLAYER_CPUDETECT) uint32_t eax, ebx, ecx, edx; int AMD; uint32_t caps; @@ -105,10 +118,24 @@ caps |= MPEG2_ACCEL_X86_MMXEXT; return caps; +#else /* USE_MPLAYER_CPUDETECT: Use MPlayer's cpu capability property */ + caps = 0; + if (gCpuCaps.hasMMX) + caps |= MPEG2_ACCEL_X86_MMX; + if (gCpuCaps.hasSSE2) + caps |= MPEG2_ACCEL_X86_SSE2; + if (gCpuCaps.hasMMX2) + caps |= MPEG2_ACCEL_X86_MMXEXT; + if (gCpuCaps.has3DNow) + caps |= MPEG2_ACCEL_X86_3DNOW; + + return caps; + +#endif /* USE_MPLAYER_CPUDETECT */ } -#endif /* ARCH_X86 */ +#endif /* ARCH_X86 || ARCH_X86_64 */ -#if defined(ARCH_PPC) || defined(ARCH_SPARC) +#if defined(ARCH_PPC) || (defined(ARCH_SPARC) && defined(HAVE_VIS)) #include <signal.h> #include <setjmp.h> @@ -166,10 +166,10 @@ canjump = 1; -#ifdef HAVE_ALTIVEC_H /* gnu */ -#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t" -#else /* apple */ +#if defined( __APPLE_CC__ ) && defined( __APPLE_ALTIVEC__ ) /* apple */ #define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t" +#else /* gnu */ +#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t" #endif asm volatile ("mtspr 256, %0\n\t" VAND (0, 0, 0) @@ -195,6 +222,7 @@ #ifdef ARCH_ALPHA static inline uint32_t arch_accel (void) { +#ifdef CAN_COMPILE_ALPHA_MVI uint64_t no_mvi; asm volatile ("amask %1, %0" @@ -202,6 +230,9 @@ : "rI" (256)); /* AMASK_MVI */ return no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA | MPEG2_ACCEL_ALPHA_MVI); +#else + return MPEG2_ACCEL_ALPHA; +#endif } #endif /* ARCH_ALPHA */ #endif /* ACCEL_DETECT */ @@ -212,7 +243,7 @@ accel = 0; #ifdef ACCEL_DETECT -#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) +#if defined (ARCH_X86) || defined (ARCH_X86_64) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) accel = arch_accel (); #endif #endif --- libmpeg2/cpu_state.c 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/cpu_state.c 2006-06-16 20:12:50.000000000 +0200 @@ -29,14 +33,14 @@ #include "mpeg2.h" #include "attributes.h" #include "mpeg2_internal.h" -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #include "mmx.h" #endif void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL; void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void state_restore_mmx (cpu_state_t * state) { emms (); @@ -48,18 +48,18 @@ #endif -#ifdef ARCH_PPC +#if defined(ARCH_PPC) && defined(HAVE_ALTIVEC) -#ifdef HAVE_ALTIVEC_H /* gnu */ -#define LI(a,b) "li " #a "," #b "\n\t" -#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" -#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t" -#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t" -#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t" -#else /* apple */ +#if defined( __APPLE_CC__ ) && defined( __APPLE_ALTIVEC__ ) /* apple */ #define LI(a,b) "li r" #a "," #b "\n\t" #define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t" #define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t" #define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t" #define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t" +#else /* gnu */ +#define LI(a,b) "li " #a "," #b "\n\t" +#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" +#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t" +#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t" +#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t" #endif static void state_save_altivec (cpu_state_t * state) @@ -115,12 +119,12 @@ void mpeg2_cpu_state_init (uint32_t accel) { -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) if (accel & MPEG2_ACCEL_X86_MMX) { mpeg2_cpu_state_restore = state_restore_mmx; } #endif -#ifdef ARCH_PPC +#if defined(ARCH_PPC) && defined(HAVE_ALTIVEC) if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { mpeg2_cpu_state_save = state_save_altivec; mpeg2_cpu_state_restore = state_restore_altivec; --- libmpeg2/decode.c 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/decode.c 2006-06-16 20:12:50.000000000 +0200 @@ -351,6 +355,15 @@ fbuf->buf[1] = buf[1]; fbuf->buf[2] = buf[2]; fbuf->id = id; + // HACK! FIXME! At first I frame, copy pointers to prediction frame too! + if (mpeg2dec->custom_fbuf && !mpeg2dec->fbuf[1]->buf[0]){ + mpeg2dec->fbuf[1]->buf[0]=buf[0]; + mpeg2dec->fbuf[1]->buf[1]=buf[1]; + mpeg2dec->fbuf[1]->buf[2]=buf[2]; + mpeg2dec->fbuf[1]->id=NULL; + } +// printf("libmpeg2: FBUF 0:%p 1:%p 2:%p\n", +// mpeg2dec->fbuf[0]->buf[0],mpeg2dec->fbuf[1]->buf[0],mpeg2dec->fbuf[2]->buf[0]); } void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf) --- libmpeg2/header.c 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/header.c 2006-06-16 20:12:50.000000000 +0200 @@ -100,6 +104,9 @@ mpeg2dec->decoder.convert = NULL; mpeg2dec->decoder.convert_id = NULL; mpeg2dec->picture = mpeg2dec->pictures; + memset(&mpeg2dec->fbuf_alloc[0].fbuf, 0, sizeof(mpeg2_fbuf_t)); + memset(&mpeg2dec->fbuf_alloc[1].fbuf, 0, sizeof(mpeg2_fbuf_t)); + memset(&mpeg2dec->fbuf_alloc[2].fbuf, 0, sizeof(mpeg2_fbuf_t)); mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf; mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf; mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf; @@ -551,6 +558,7 @@ if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) { picture->nb_fields = (buffer[3] & 2) ? 3 : 2; flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0; + flags |= (buffer[3] & 2) ? PIC_FLAG_REPEAT_FIRST_FIELD : 0; } else picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2; break; @@ -799,6 +807,7 @@ mpeg2dec->scaled[index] = mpeg2dec->q_scale_type; for (i = 0; i < 32; i++) { k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1); + decoder->quantizer_scales[i] = k; for (j = 0; j < 64; j++) decoder->quantizer_prescale[index][i][j] = k * mpeg2dec->quantizer_matrix[index][j]; --- libmpeg2/idct.c 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/idct.c 2006-06-16 20:12:50.000000000 +0200 @@ -239,12 +239,15 @@ void mpeg2_idct_init (uint32_t accel) { -#ifdef ARCH_X86 +#ifdef HAVE_MMX2 if (accel & MPEG2_ACCEL_X86_MMXEXT) { mpeg2_idct_copy = mpeg2_idct_copy_mmxext; mpeg2_idct_add = mpeg2_idct_add_mmxext; mpeg2_idct_mmx_init (); - } else if (accel & MPEG2_ACCEL_X86_MMX) { + } else +#endif +#ifdef HAVE_MMX + if (accel & MPEG2_ACCEL_X86_MMX) { mpeg2_idct_copy = mpeg2_idct_copy_mmx; mpeg2_idct_add = mpeg2_idct_add_mmx; mpeg2_idct_mmx_init (); @@ -254,11 +261,14 @@ } else #endif #ifdef ARCH_ALPHA +#ifdef CAN_COMPILE_ALPHA_MVI if (accel & MPEG2_ACCEL_ALPHA_MVI) { mpeg2_idct_copy = mpeg2_idct_copy_mvi; mpeg2_idct_add = mpeg2_idct_add_mvi; mpeg2_idct_alpha_init (); - } else if (accel & MPEG2_ACCEL_ALPHA) { + } else +#endif + if (accel & MPEG2_ACCEL_ALPHA) { int i; mpeg2_idct_copy = mpeg2_idct_copy_alpha; --- libmpeg2/idct_alpha.c 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/idct_alpha.c 2006-06-16 20:12:50.000000000 +0200 @@ -157,6 +161,7 @@ block[8*7] = (a0 - b0) >> 17; } +#ifdef CAN_COMPILE_ALPHA_MVI void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride) { uint64_t clampmask; @@ -289,6 +294,7 @@ stq (p7, dest + 7 * stride); } } +#endif void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, const int stride) { --- libmpeg2/idct_mmx.c 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/idct_mmx.c 2006-06-16 20:12:50.000000000 +0200 @@ -23,7 +27,7 @@ #include "config.h" -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #include <inttypes.h> --- libmpeg2/motion_comp.c 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/motion_comp.c 2006-06-16 20:12:50.000000000 +0200 @@ -37,16 +37,22 @@ void mpeg2_mc_init (uint32_t accel) { -#ifdef ARCH_X86 +#ifdef HAVE_MMX2 if (accel & MPEG2_ACCEL_X86_MMXEXT) mpeg2_mc = mpeg2_mc_mmxext; - else if (accel & MPEG2_ACCEL_X86_3DNOW) + else +#endif +#ifdef HAVE_3DNOW + if (accel & MPEG2_ACCEL_X86_3DNOW) mpeg2_mc = mpeg2_mc_3dnow; - else if (accel & MPEG2_ACCEL_X86_MMX) + else +#endif +#ifdef HAVE_MMX + if (accel & MPEG2_ACCEL_X86_MMX) mpeg2_mc = mpeg2_mc_mmx; else #endif -#ifdef ARCH_PPC +#if defined(ARCH_PPC) && defined(HAVE_ALTIVEC) if (accel & MPEG2_ACCEL_PPC_ALTIVEC) mpeg2_mc = mpeg2_mc_altivec; else @@ -52,7 +62,7 @@ mpeg2_mc = mpeg2_mc_alpha; else #endif -#ifdef ARCH_SPARC +#if defined(ARCH_SPARC) && defined(HAVE_VIS) if (accel & MPEG2_ACCEL_SPARC_VIS) mpeg2_mc = mpeg2_mc_vis; else @@ -67,6 +67,16 @@ mpeg2_mc = mpeg2_mc_vis; else #endif +#ifdef ARCH_ARM + if (1 /*accel & MPEG2_ACCEL_ARM*/) { +#ifdef HAVE_IWMMXT + if (1 /*accel & MPEG2_ACCEL_ARM_IWMMXT*/) + mpeg2_mc = mpeg2_mc_iwmmxt; + else +#endif + mpeg2_mc = mpeg2_mc_arm; + } else +#endif mpeg2_mc = mpeg2_mc_c; } --- libmpeg2/motion_comp_mmx.c 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/motion_comp_mmx.c 2006-06-16 20:12:50.000000000 +0200 @@ -23,7 +27,7 @@ #include "config.h" -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #include <inttypes.h> --- include/mpeg2.h 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/mpeg2.h 2006-06-16 20:12:50.000000000 +0200 @@ -82,6 +86,7 @@ #define PIC_FLAG_COMPOSITE_DISPLAY 32 #define PIC_FLAG_SKIP 64 #define PIC_FLAG_TAGS 128 +#define PIC_FLAG_REPEAT_FIRST_FIELD 256 #define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000 typedef struct mpeg2_picture_s { @@ -154,6 +159,7 @@ #define MPEG2_ACCEL_X86_MMX 1 #define MPEG2_ACCEL_X86_3DNOW 2 #define MPEG2_ACCEL_X86_MMXEXT 4 +#define MPEG2_ACCEL_X86_SSE2 8 #define MPEG2_ACCEL_PPC_ALTIVEC 1 #define MPEG2_ACCEL_ALPHA 1 #define MPEG2_ACCEL_ALPHA_MVI 2 --- libmpeg2/mpeg2_internal.h 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/mpeg2_internal.h 2006-06-16 20:12:50.000000000 +0200 @@ -144,6 +148,12 @@ int second_field; int mpeg1; + + /* for MPlayer: */ + int quantizer_scales[32]; + int quantizer_scale; + char* quant_store; + int quant_stride; }; typedef struct { @@ -214,6 +224,10 @@ int8_t q_scale_type, scaled[4]; uint8_t quantizer_matrix[4][64]; uint8_t new_quantizer_matrix[4][64]; + + /* for MPlayer: */ + unsigned char *pending_buffer; + int pending_length; }; typedef struct { @@ -312,3 +312,5 @@ extern mpeg2_mc_t mpeg2_mc_altivec; extern mpeg2_mc_t mpeg2_mc_alpha; extern mpeg2_mc_t mpeg2_mc_vis; +extern mpeg2_mc_t mpeg2_mc_arm; +extern mpeg2_mc_t mpeg2_mc_iwmmxt; --- libmpeg2/slice.c 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/slice.c 2006-06-16 20:12:50.000000000 +0200 @@ -142,6 +146,7 @@ quantizer_scale_code = UBITS (bit_buf, 5); DUMPBITS (bit_buf, bits, 5); + decoder->quantizer_scale = decoder->quantizer_scales[quantizer_scale_code]; decoder->quantizer_matrix[0] = decoder->quantizer_prescale[0][quantizer_scale_code]; @@ -1568,6 +1569,18 @@ #define NEXT_MACROBLOCK \ do { \ + if(decoder->quant_store) { \ + if (decoder->picture_structure == TOP_FIELD) \ + decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \ + +(decoder->offset>>4)] = decoder->quantizer_scale; \ + else if (decoder->picture_structure == BOTTOM_FIELD) \ + decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \ + + decoder->quant_stride \ + +(decoder->offset>>4)] = decoder->quantizer_scale; \ + else \ + decoder->quant_store[decoder->quant_stride*(decoder->v_offset>>4) \ + +(decoder->offset>>4)] = decoder->quantizer_scale; \ + } \ decoder->offset += 16; \ if (decoder->offset == decoder->width) { \ do { /* just so we can use the break statement */ \ @@ -1604,6 +1604,12 @@ } \ } while (0) +static void motion_dummy (mpeg2_decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ +} + void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]) { @@ -1661,7 +1667,9 @@ if (decoder->mpeg1) { decoder->motion_parser[0] = motion_zero_420; + decoder->motion_parser[MC_FIELD] = motion_dummy; decoder->motion_parser[MC_FRAME] = motion_mp1; + decoder->motion_parser[MC_DMV] = motion_dummy; decoder->motion_parser[4] = motion_reuse_420; } else if (decoder->picture_structure == FRAME_PICTURE) { if (decoder->chroma_format == 0) { --- libmpeg2/idct.c 2006-06-16 20:12:26.000000000 +0200 +++ libmpeg2/idct.c 2006-06-16 20:12:50.000000000 +0200 @@ -253,7 +253,7 @@ mpeg2_idct_mmx_init (); } else #endif -#ifdef ARCH_PPC +#if defined(ARCH_PPC) && defined(HAVE_ALTIVEC) if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { mpeg2_idct_copy = mpeg2_idct_copy_altivec; mpeg2_idct_add = mpeg2_idct_add_altivec; --- libmpeg2/idct_altivec.c 2004/08/02 11:26:43 12933 +++ libmpeg2/idct_altivec.c 2005/05/15 20:11:34 15484 @@ -41,7 +41,7 @@ typedef vector signed int vector_s32_t; typedef vector unsigned int vector_u32_t; -#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) +#if defined( HAVE_ALTIVEC_H ) && !defined( __APPLE_ALTIVEC__ ) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) /* work around gcc <3.3 vec_mergel bug */ static inline vector_s16_t my_vec_mergel (vector_s16_t const A, vector_s16_t const B) @@ -56,10 +56,10 @@ #define vec_mergel my_vec_mergel #endif -#ifdef HAVE_ALTIVEC_H /* gnu */ -#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h} -#else /* apple */ +#if defined( __APPLE_CC__ ) && defined( __APPLE_ALTIVEC__ ) /* apple */ #define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h) +#else /* gnu */ +#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h} #endif static const vector_s16_t constants ATTR_ALIGN(16) = Index: libmpeg2/motion_comp_arm.c =================================================================== --- libmpeg2/motion_comp_arm.c (revision 0) +++ libmpeg2/motion_comp_arm.c (revision 0) @@ -0,0 +1,187 @@ +/* + * motion_comp_arm.c + * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_ARM + +#include <inttypes.h> + +#include "mpeg2.h" +#include "attributes.h" +#include "mpeg2_internal.h" + +#define avg2(a,b) ((a+b+1)>>1) +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) + +#define predict_o(i) (ref[i]) +#define predict_x(i) (avg2 (ref[i], ref[i+1])) +#define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) +#define predict_xy(i) (avg4 (ref[i], ref[i+1], \ + (ref+stride)[i], (ref+stride)[i+1])) + +#define put(predictor,i) dest[i] = predictor (i) +#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) + +/* mc function template */ + +#define MC_FUNC(op,xy) \ +static void inline MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + op (predict_##xy, 8); \ + op (predict_##xy, 9); \ + op (predict_##xy, 10); \ + op (predict_##xy, 11); \ + op (predict_##xy, 12); \ + op (predict_##xy, 13); \ + op (predict_##xy, 14); \ + op (predict_##xy, 15); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} \ +static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} \ +/* definitions of the actual mc functions */ + +MC_FUNC (put,o) +MC_FUNC (avg,o) +MC_FUNC (put,x) +MC_FUNC (avg,x) +MC_FUNC (put,y) +MC_FUNC (avg,y) +MC_FUNC (put,xy) +MC_FUNC (avg,xy) + + +extern void MC_put_o_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height); + +extern void MC_put_x_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height); + + +static void MC_put_y_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_y_16_c(dest, ref, stride, height); +} + +static void MC_put_xy_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_xy_16_c(dest, ref, stride, height); +} + +extern void MC_put_o_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height); + +extern void MC_put_x_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height); + +static void MC_put_y_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_y_8_c(dest, ref, stride, height); +} + +static void MC_put_xy_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_xy_8_c(dest, ref, stride, height); +} + +static void MC_avg_o_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_o_16_c(dest, ref, stride, height); +} + +static void MC_avg_x_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_x_16_c(dest, ref, stride, height); +} + +static void MC_avg_y_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_y_16_c(dest, ref, stride, height); +} + +static void MC_avg_xy_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_16_c(dest, ref, stride, height); +} + +static void MC_avg_o_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_o_8_c(dest, ref, stride, height); +} + +static void MC_avg_x_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_x_8_c(dest, ref, stride, height); +} + +static void MC_avg_y_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_y_8_c(dest, ref, stride, height); +} + +static void MC_avg_xy_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_8_c(dest, ref, stride, height); +} + +MPEG2_MC_EXTERN (arm) + +#endif Index: libmpeg2/motion_comp_arm_s.S =================================================================== --- libmpeg2/motion_comp_arm_s.S (revision 0) +++ libmpeg2/motion_comp_arm_s.S (revision 0) @@ -0,0 +1,322 @@ +@ motion_comp_arm_s.S +@ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp> +@ +@ This file is part of mpeg2dec, a free MPEG-2 video stream decoder. +@ See http://libmpeg2.sourceforge.net/ for updates. +@ +@ mpeg2dec is free software; you can redistribute it and/or modify +@ it under the terms of the GNU General Public License as published by +@ the Free Software Foundation; either version 2 of the License, or +@ (at your option) any later version. +@ +@ mpeg2dec is distributed in the hope that it will be useful, +@ but WITHOUT ANY WARRANTY; without even the implied warranty of +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +@ GNU General Public License for more details. +@ +@ You should have received a copy of the GNU General Public License +@ along with this program; if not, write to the Free Software +@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + .text + +@ ---------------------------------------------------------------- + .align + .global MC_put_o_16_arm +MC_put_o_16_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) + pld [r1] + stmfd sp!, {r4-r11, lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_o_16_arm_align_jt + add r5, r5, r4, lsl #2 + ldr pc, [r5] + +MC_put_o_16_arm_align0: + ldmia r1, {r4-r7} + add r1, r1, r2 + pld [r1] + stmia r0, {r4-r7} + subs r3, r3, #1 + add r0, r0, r2 + bne MC_put_o_16_arm_align0 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. + +.macro PROC shift + ldmia r1, {r4-r8} + add r1, r1, r2 + mov r9, r4, lsr #(\shift) + pld [r1] + mov r10, r5, lsr #(\shift) + orr r9, r9, r5, lsl #(32-\shift) + mov r11, r6, lsr #(\shift) + orr r10, r10, r6, lsl #(32-\shift) + mov r12, r7, lsr #(\shift) + orr r11, r11, r7, lsl #(32-\shift) + orr r12, r12, r8, lsl #(32-\shift) + stmia r0, {r9-r12} + subs r3, r3, #1 + add r0, r0, r2 +.endm + +MC_put_o_16_arm_align1: + and r1, r1, #0xFFFFFFFC +1: PROC(8) + bne 1b + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. +MC_put_o_16_arm_align2: + and r1, r1, #0xFFFFFFFC +1: PROC(16) + bne 1b + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. +MC_put_o_16_arm_align3: + and r1, r1, #0xFFFFFFFC +1: PROC(24) + bne 1b + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. +MC_put_o_16_arm_align_jt: + .word MC_put_o_16_arm_align0 + .word MC_put_o_16_arm_align1 + .word MC_put_o_16_arm_align2 + .word MC_put_o_16_arm_align3 + +@ ---------------------------------------------------------------- + .align + .global MC_put_o_8_arm +MC_put_o_8_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) + pld [r1] + stmfd sp!, {r4-r10, lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_o_8_arm_align_jt + add r5, r5, r4, lsl #2 + ldr pc, [r5] +MC_put_o_8_arm_align0: + ldmia r1, {r4-r5} + add r1, r1, r2 + pld [r1] + stmia r0, {r4-r5} + add r0, r0, r2 + subs r3, r3, #1 + bne MC_put_o_8_arm_align0 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. + +.macro PROC8 shift + ldmia r1, {r4-r6} + add r1, r1, r2 + mov r9, r4, lsr #(\shift) + pld [r1] + mov r10, r5, lsr #(\shift) + orr r9, r9, r5, lsl #(32-\shift) + orr r10, r10, r6, lsl #(32-\shift) + stmia r0, {r9-r10} + subs r3, r3, #1 + add r0, r0, r2 +.endm + +MC_put_o_8_arm_align1: + and r1, r1, #0xFFFFFFFC +1: PROC8(8) + bne 1b + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. + +MC_put_o_8_arm_align2: + and r1, r1, #0xFFFFFFFC +1: PROC8(16) + bne 1b + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. + +MC_put_o_8_arm_align3: + and r1, r1, #0xFFFFFFFC +1: PROC8(24) + bne 1b + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. + +MC_put_o_8_arm_align_jt: + .word MC_put_o_8_arm_align0 + .word MC_put_o_8_arm_align1 + .word MC_put_o_8_arm_align2 + .word MC_put_o_8_arm_align3 + +@ ---------------------------------------------------------------- +.macro AVG_PW rW1, rW2 + mov \rW2, \rW2, lsl #24 + orr \rW2, \rW2, \rW1, lsr #8 + eor r9, \rW1, \rW2 + and \rW2, \rW1, \rW2 + and r10, r9, r12 + add \rW2, \rW2, r10, lsr #1 + and r10, r9, r11 + add \rW2, \rW2, r10 +.endm + + .align + .global MC_put_x_16_arm +MC_put_x_16_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) + pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_x_16_arm_align_jt + ldr r11, [r5] + mvn r12, r11 + add r5, r5, r4, lsl #2 + ldr pc, [r5, #4] + +.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4 + mov \R0, \R0, lsr #(\shift) + orr \R0, \R0, \R1, lsl #(32 - \shift) + mov \R1, \R1, lsr #(\shift) + orr \R1, \R1, \R2, lsl #(32 - \shift) + mov \R2, \R2, lsr #(\shift) + orr \R2, \R2, \R3, lsl #(32 - \shift) + mov \R3, \R3, lsr #(\shift) + orr \R3, \R3, \R4, lsl #(32 - \shift) + mov \R4, \R4, lsr #(\shift) +@ and \R4, \R4, #0xFF +.endm + +MC_put_x_16_arm_align0: + ldmia r1, {r4-r8} + add r1, r1, r2 + pld [r1] + AVG_PW r7, r8 + AVG_PW r6, r7 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r8} + subs r3, r3, #1 + add r0, r0, r2 + bne MC_put_x_16_arm_align0 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_16_arm_align1: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r8} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8 + AVG_PW r7, r8 + AVG_PW r6, r7 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r8} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_16_arm_align2: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r8} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8 + AVG_PW r7, r8 + AVG_PW r6, r7 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r8} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_16_arm_align3: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r8} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8 + AVG_PW r7, r8 + AVG_PW r6, r7 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r8} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_16_arm_align_jt: + .word 0x01010101 + .word MC_put_x_16_arm_align0 + .word MC_put_x_16_arm_align1 + .word MC_put_x_16_arm_align2 + .word MC_put_x_16_arm_align3 + +@ ---------------------------------------------------------------- + .align + .global MC_put_x_8_arm +MC_put_x_8_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) + pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_x_8_arm_align_jt + ldr r11, [r5] + mvn r12, r11 + add r5, r5, r4, lsl #2 + ldr pc, [r5, #4] + +.macro ADJ_ALIGN_DW shift, R0, R1, R2 + mov \R0, \R0, lsr #(\shift) + orr \R0, \R0, \R1, lsl #(32 - \shift) + mov \R1, \R1, lsr #(\shift) + orr \R1, \R1, \R2, lsl #(32 - \shift) + mov \R2, \R2, lsr #(\shift) +@ and \R4, \R4, #0xFF +.endm + +MC_put_x_8_arm_align0: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r6} + subs r3, r3, #1 + add r0, r0, r2 + bne MC_put_x_8_arm_align0 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_8_arm_align1: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DW 8, r4, r5, r6 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r6} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_8_arm_align2: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DW 16, r4, r5, r6 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r6} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_8_arm_align3: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DW 24, r4, r5, r6 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r6} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_8_arm_align_jt: + .word 0x01010101 + .word MC_put_x_8_arm_align0 + .word MC_put_x_8_arm_align1 + .word MC_put_x_8_arm_align2 + .word MC_put_x_8_arm_align3 Index: libmpeg2/motion_comp_iwmmxt.c =================================================================== --- libmpeg2/motion_comp_iwmmxt.c (revision 0) +++ libmpeg2/motion_comp_iwmmxt.c (revision 0) @@ -0,0 +1,61 @@ +/* + * motion_comp_iwmmxt.c + * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_ARM +#ifdef HAVE_IWMMXT + +#include <inttypes.h> + +#include "mpeg2.h" +#include "attributes.h" +#include "mpeg2_internal.h" + +/* defined in libavcodec */ + +extern void put_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void put_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void put_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void put_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void put_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void put_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void put_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void put_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void avg_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void avg_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void avg_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void avg_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void avg_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void avg_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void avg_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); +extern void avg_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height); + +mpeg2_mc_t mpeg2_mc_iwmmxt = { + {put_pixels16_iwmmxt, put_pixels16_x2_iwmmxt, put_pixels16_y2_iwmmxt, put_pixels16_xy2_iwmmxt, + put_pixels8_iwmmxt, put_pixels8_x2_iwmmxt, put_pixels8_y2_iwmmxt, put_pixels8_xy2_iwmmxt}, \ + {avg_pixels16_iwmmxt, avg_pixels16_x2_iwmmxt, avg_pixels16_y2_iwmmxt, avg_pixels16_xy2_iwmmxt, + avg_pixels8_iwmmxt, avg_pixels8_x2_iwmmxt, avg_pixels8_y2_iwmmxt, avg_pixels8_xy2_iwmmxt}, \ +}; + +#endif +#endif Index: libmpeg2/Makefile =================================================================== --- libmpeg2/Makefile (revision 23253) +++ libmpeg2/Makefile (working copy) @@ -15,5 +15,6 @@ SRCS_COMMON-$(TARGET_ALTIVEC) += motion_comp_altivec.c idct_altivec.c SRCS_COMMON-$(TARGET_VIS) += motion_comp_vis.c SRCS_COMMON-$(TARGET_ARCH_ALPHA) += idct_alpha.c motion_comp_alpha.c +SRCS_COMMON-$(TARGET_ARCH_ARMV4L) += motion_comp_arm.c motion_comp_iwmmxt.c motion_comp_arm_s.S include ../mpcommon.mak