annotate arm/mathops.h @ 9672:15276eb66180 libavcodec

LGPL version of ac3_decode_transform_coeffs_ch, ~12.4% faster.
author darkshikari
date Tue, 19 May 2009 21:29:21 +0000
parents 489def16f0c7
children 25136467a218
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
1 /*
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
2 * simple math operations
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
3 * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
15 * Lesser General Public License for more details.
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
16 *
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
20 */
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
21
8359
9281a8a9387a ARM: replace "armv4l" with "arm"
mru
parents: 8201
diff changeset
22 #ifndef AVCODEC_ARM_MATHOPS_H
9281a8a9387a ARM: replace "armv4l" with "arm"
mru
parents: 8201
diff changeset
23 #define AVCODEC_ARM_MATHOPS_H
5163
9ecbfc0c82bf add multiple inclusion guards to headers
mru
parents: 3947
diff changeset
24
8084
8547a4ae101b Add missing headers to pass 'make checkheaders'.
diego
parents: 8031
diff changeset
25 #include <stdint.h>
8547a4ae101b Add missing headers to pass 'make checkheaders'.
diego
parents: 8031
diff changeset
26 #include "libavutil/common.h"
8547a4ae101b Add missing headers to pass 'make checkheaders'.
diego
parents: 8031
diff changeset
27
9141
489def16f0c7 ARM: disable inline asm for armcc
mru
parents: 9079
diff changeset
28 #if HAVE_INLINE_ASM
489def16f0c7 ARM: disable inline asm for armcc
mru
parents: 9079
diff changeset
29
8112
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
30 # define MULL MULL
8201
c6e2ffef3797 Add shift argument to MULL() macro
mru
parents: 8115
diff changeset
31 static inline av_const int MULL(int a, int b, unsigned shift)
8112
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
32 {
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
33 int lo, hi;
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
34 __asm__("smull %0, %1, %2, %3 \n\t"
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
35 "mov %0, %0, lsr %4 \n\t"
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
36 "add %1, %0, %1, lsl %5 \n\t"
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
37 : "=&r"(lo), "=&r"(hi)
8676
7fcf95230c28 ARM: allow register operands for shifts in MULL()
mru
parents: 8590
diff changeset
38 : "r"(b), "r"(a), "ir"(shift), "ir"(32-shift));
8112
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
39 return hi;
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
40 }
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
41
8113
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
42 #define MULH MULH
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8359
diff changeset
43 #if HAVE_ARMV6
7280
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
44 static inline av_const int MULH(int a, int b)
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
45 {
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
46 int r;
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7760
diff changeset
47 __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
7280
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
48 return r;
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
49 }
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
50 #else
8113
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
51 static inline av_const int MULH(int a, int b)
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
52 {
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
53 int lo, hi;
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
54 __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
55 return hi;
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
56 }
7280
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
57 #endif
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
58
7281
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
59 static inline av_const int64_t MUL64(int a, int b)
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
60 {
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
61 union { uint64_t x; unsigned hl[2]; } x;
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7760
diff changeset
62 __asm__ ("smull %0, %1, %2, %3"
8111
97b08ce5d507 ARM: mathops.h whitespace cosmetics
mru
parents: 8105
diff changeset
63 : "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b));
7281
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
64 return x.x;
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
65 }
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
66 #define MUL64 MUL64
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
67
7282
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
68 static inline av_const int64_t MAC64(int64_t d, int a, int b)
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
69 {
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
70 union { uint64_t x; unsigned hl[2]; } x = { d };
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7760
diff changeset
71 __asm__ ("smlal %0, %1, %2, %3"
8111
97b08ce5d507 ARM: mathops.h whitespace cosmetics
mru
parents: 8105
diff changeset
72 : "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b));
7282
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
73 return x.x;
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
74 }
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
75 #define MAC64(d, a, b) ((d) = MAC64(d, a, b))
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
76 #define MLS64(d, a, b) MAC64(d, -(a), b)
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
77
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8359
diff changeset
78 #if HAVE_ARMV5TE
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
79
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
80 /* signed 16x16 -> 32 multiply add accumulate */
8114
1231a7ddd932 ARM: prettify MAC16() macro
mru
parents: 8113
diff changeset
81 # define MAC16(rt, ra, rb) \
1231a7ddd932 ARM: prettify MAC16() macro
mru
parents: 8113
diff changeset
82 __asm__ ("smlabb %0, %1, %2, %0" : "+r"(rt) : "r"(ra), "r"(rb));
1231a7ddd932 ARM: prettify MAC16() macro
mru
parents: 8113
diff changeset
83
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
84 /* signed 16x16 -> 32 multiply */
8115
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
85 # define MUL16 MUL16
9079
37dd457573a4 ARM: fix missing MUL16() return type
mru
parents: 8677
diff changeset
86 static inline av_const int MUL16(int ra, int rb)
8115
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
87 {
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
88 int rt;
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
89 __asm__ ("smulbb %0, %1, %2" : "=r"(rt) : "r"(ra), "r"(rb));
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
90 return rt;
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
91 }
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
92
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
93 #endif
5163
9ecbfc0c82bf add multiple inclusion guards to headers
mru
parents: 3947
diff changeset
94
8677
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
95 #define mid_pred mid_pred
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
96 static inline av_const int mid_pred(int a, int b, int c)
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
97 {
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
98 int m;
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
99 __asm__ volatile (
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
100 "mov %0, %2 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
101 "cmp %1, %2 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
102 "movgt %0, %1 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
103 "movgt %1, %2 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
104 "cmp %1, %3 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
105 "movle %1, %3 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
106 "cmp %0, %1 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
107 "movgt %0, %1 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
108 : "=&r"(m), "+r"(a)
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
109 : "r"(b), "r"(c));
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
110 return m;
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
111 }
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
112
9141
489def16f0c7 ARM: disable inline asm for armcc
mru
parents: 9079
diff changeset
113 #endif /* HAVE_INLINE_ASM */
489def16f0c7 ARM: disable inline asm for armcc
mru
parents: 9079
diff changeset
114
8359
9281a8a9387a ARM: replace "armv4l" with "arm"
mru
parents: 8201
diff changeset
115 #endif /* AVCODEC_ARM_MATHOPS_H */