libavcodec.hg: avr32/mathops.h annotate

annotate avr32/mathops.h @ 12197:fbf4d5b1b664 libavcodec

Remove FF_MM_SSE2/3 flags for CPUs where this is generally not faster than regular MMX code. Examples of this are the Core1 CPU. Instead, set a new flag, FF_MM_SSE2/3SLOW, which can be checked for particular SSE2/3 functions that have been checked specifically on such CPUs and are actually faster than their MMX counterparts. In addition, use this flag to enable particular VP8 and LPC SSE2 functions that are faster than their MMX counterparts. Based on a patch by Loren Merritt <lorenm AT u washington edu>.

author	rbultje
date	Mon, 19 Jul 2010 22:38:23 +0000
parents	25136467a218
children

rev	line source
10065 42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	1 /*
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	2 * Simple math operations
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	3 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	4 *
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	5 * This file is part of FFmpeg.
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	6 *
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	7 * FFmpeg is free software; you can redistribute it and/or
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	8 * modify it under the terms of the GNU Lesser General Public
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	9 * License as published by the Free Software Foundation; either
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	10 * version 2.1 of the License, or (at your option) any later version.
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	11 *
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	12 * FFmpeg is distributed in the hope that it will be useful,
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	15 * Lesser General Public License for more details.
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	16 *
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	17 * You should have received a copy of the GNU Lesser General Public
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	18 * License along with FFmpeg; if not, write to the Free Software
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	20 */
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	21
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	22 #ifndef AVCODEC_AVR32_MATHOPS_H
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	23 #define AVCODEC_AVR32_MATHOPS_H
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	24
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	25 #include <stdint.h>
10080 25136467a218 Add necessary #include for config.h. diego parents: 10065 diff changeset	26 #include "config.h"
10065 42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	27 #include "libavutil/common.h"
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	28
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	29 #if HAVE_INLINE_ASM
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	30
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	31 #define MULL MULL
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	32 static inline av_const int MULL(int a, int b, unsigned shift)
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	33 {
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	34 union { int64_t x; int hl[2]; } x;
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	35 __asm__ ("muls.d %0, %1, %2 \n\t"
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	36 "lsr %0, %3 \n\t"
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	37 "or %0, %0, %m0<<%4 \n\t"
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	38 : "=r"(x) : "r"(b), "r"(a), "i"(shift), "i"(32-shift));
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	39 return x.hl[1];
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	40 }
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	41
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	42 #define MULH MULH
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	43 static inline av_const int MULH(int a, int b)
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	44 {
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	45 union { int64_t x; int hl[2]; } x;
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	46 __asm__ ("muls.d %0, %1, %2" : "=r"(x.x) : "r"(a), "r"(b));
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	47 return x.hl[0];
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	48 }
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	49
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	50 #define MUL64 MUL64
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	51 static inline av_const int64_t MUL64(int a, int b)
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	52 {
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	53 int64_t x;
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	54 __asm__ ("muls.d %0, %1, %2" : "=r"(x) : "r"(a), "r"(b));
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	55 return x;
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	56 }
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	57
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	58 static inline av_const int64_t MAC64(int64_t d, int a, int b)
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	59 {
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	60 __asm__ ("macs.d %0, %1, %2" : "+r"(d) : "r"(a), "r"(b));
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	61 return d;
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	62 }
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	63 #define MAC64(d, a, b) ((d) = MAC64(d, a, b))
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	64 #define MLS64(d, a, b) MAC64(d, -(a), b)
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	65
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	66 static inline av_const int MAC16(int d, int a, int b)
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	67 {
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	68 __asm__ ("machh.w %0, %1:b, %2:b" : "+r"(d) : "r"(a), "r"(b));
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	69 return d;
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	70 }
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	71 #define MAC16(d, a, b) ((d) = MAC16(d, a, b))
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	72 #define MLS16(d, a, b) MAC16(d, -(a), b)
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	73
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	74 #define MUL16 MUL16
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	75 static inline av_const int MUL16(int a, int b)
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	76 {
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	77 int d;
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	78 __asm__ ("mulhh.w %0, %1:b, %2:b" : "=r"(d) : "r"(a), "r"(b));
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	79 return d;
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	80 }
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	81
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	82 #define mid_pred mid_pred
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	83 static inline av_const int mid_pred(int a, int b, int c)
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	84 {
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	85 int m;
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	86 __asm__ ("mov %0, %2 \n\t"
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	87 "cp.w %1, %2 \n\t"
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	88 "movgt %0, %1 \n\t"
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	89 "movgt %1, %2 \n\t"
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	90 "cp.w %1, %3 \n\t"
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	91 "movle %1, %3 \n\t"
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	92 "cp.w %0, %1 \n\t"
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	93 "movgt %0, %1 \n\t"
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	94 : "=&r"(m), "+r"(a)
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	95 : "r"(b), "r"(c));
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	96 return m;
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	97 }
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	98
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	99 #endif /* HAVE_INLINE_ASM */
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	100
42a126c78744 AVR32: optimised mathops.h mru parents: diff changeset	101 #endif /* AVCODEC_AVR32_MATHOPS_H */

Mercurial > libavcodec.hg

annotate avr32/mathops.h @ 12197:fbf4d5b1b664 libavcodec