view mathops.h @ 4988:689490842cf5 libavcodec

factor sum_abs_dctelem out of dct_sad, and simd it. sum_abs_dctelem_* alone: core2: c=186 mmx2=39 sse2=21 ssse3=13 (cycles) k8: c=163 mmx2=33 sse2=31 p4: c=370 mmx2=60 sse2=60 dct_sad including sum_abs_dctelem_*: core2: c=405 mmx2=258 sse2=240 ssse3=232 k8: c=624 mmx2=394 sse2=392 p4: c=849 mmx2=556 sse2=556
author lorenm
date Sat, 12 May 2007 02:41:25 +0000
parents d6f83e2f8804
children 4394344397d8
line wrap: on
line source

/*
 * simple math operations
 * Copyright (c) 2001, 2002 Fabrice Bellard.
 * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
#ifndef MATHOPS_H
#define MATHOPS_H

#ifdef ARCH_X86_32

#include "i386/mathops.h"

#elif defined(ARCH_ARMV4L)

#include "armv4l/mathops.h"

#elif defined(ARCH_PPC)

#include "ppc/mathops.h"

#endif

/* generic implementation */

#ifndef MULL
#   define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS)
#endif

#ifndef MULH
//gcc 3.4 creates an incredibly bloated mess out of this
//#    define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32)

static av_always_inline int MULH(int a, int b){
    return ((int64_t)(a) * (int64_t)(b))>>32;
}
#endif

#ifndef MUL64
#   define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
#endif

/* signed 16x16 -> 32 multiply add accumulate */
#ifndef MAC16
#   define MAC16(rt, ra, rb) rt += (ra) * (rb)
#endif

/* signed 16x16 -> 32 multiply */
#ifndef MUL16
#   define MUL16(ra, rb) ((ra) * (rb))
#endif

#endif //MATHOPS_H