Mercurial > libavcodec.hg
view mathops.h @ 4988:689490842cf5 libavcodec
factor sum_abs_dctelem out of dct_sad, and simd it.
sum_abs_dctelem_* alone:
core2: c=186 mmx2=39 sse2=21 ssse3=13 (cycles)
k8: c=163 mmx2=33 sse2=31
p4: c=370 mmx2=60 sse2=60
dct_sad including sum_abs_dctelem_*:
core2: c=405 mmx2=258 sse2=240 ssse3=232
k8: c=624 mmx2=394 sse2=392
p4: c=849 mmx2=556 sse2=556
author | lorenm |
---|---|
date | Sat, 12 May 2007 02:41:25 +0000 |
parents | d6f83e2f8804 |
children | 4394344397d8 |
line wrap: on
line source
/* * simple math operations * Copyright (c) 2001, 2002 Fabrice Bellard. * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef MATHOPS_H #define MATHOPS_H #ifdef ARCH_X86_32 #include "i386/mathops.h" #elif defined(ARCH_ARMV4L) #include "armv4l/mathops.h" #elif defined(ARCH_PPC) #include "ppc/mathops.h" #endif /* generic implementation */ #ifndef MULL # define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS) #endif #ifndef MULH //gcc 3.4 creates an incredibly bloated mess out of this //# define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32) static av_always_inline int MULH(int a, int b){ return ((int64_t)(a) * (int64_t)(b))>>32; } #endif #ifndef MUL64 # define MUL64(a,b) ((int64_t)(a) * (int64_t)(b)) #endif /* signed 16x16 -> 32 multiply add accumulate */ #ifndef MAC16 # define MAC16(rt, ra, rb) rt += (ra) * (rb) #endif /* signed 16x16 -> 32 multiply */ #ifndef MUL16 # define MUL16(ra, rb) ((ra) * (rb)) #endif #endif //MATHOPS_H