annotate arm/mathops.h @ 10509:cdf5b1ed3500 libavcodec

Add VDPAU hardware accelerated decoding for MPEG-4 ASP which can be used by video players. Original patch by NVIDIA corporation.
author cehoyos
date Tue, 10 Nov 2009 18:52:39 +0000
parents 25136467a218
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
1 /*
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
2 * simple math operations
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
3 * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
15 * Lesser General Public License for more details.
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
16 *
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
20 */
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
21
8359
9281a8a9387a ARM: replace "armv4l" with "arm"
mru
parents: 8201
diff changeset
22 #ifndef AVCODEC_ARM_MATHOPS_H
9281a8a9387a ARM: replace "armv4l" with "arm"
mru
parents: 8201
diff changeset
23 #define AVCODEC_ARM_MATHOPS_H
5163
9ecbfc0c82bf add multiple inclusion guards to headers
mru
parents: 3947
diff changeset
24
8084
8547a4ae101b Add missing headers to pass 'make checkheaders'.
diego
parents: 8031
diff changeset
25 #include <stdint.h>
10080
25136467a218 Add necessary #include for config.h.
diego
parents: 9141
diff changeset
26 #include "config.h"
8084
8547a4ae101b Add missing headers to pass 'make checkheaders'.
diego
parents: 8031
diff changeset
27 #include "libavutil/common.h"
8547a4ae101b Add missing headers to pass 'make checkheaders'.
diego
parents: 8031
diff changeset
28
9141
489def16f0c7 ARM: disable inline asm for armcc
mru
parents: 9079
diff changeset
29 #if HAVE_INLINE_ASM
489def16f0c7 ARM: disable inline asm for armcc
mru
parents: 9079
diff changeset
30
8112
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
31 # define MULL MULL
8201
c6e2ffef3797 Add shift argument to MULL() macro
mru
parents: 8115
diff changeset
32 static inline av_const int MULL(int a, int b, unsigned shift)
8112
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
33 {
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
34 int lo, hi;
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
35 __asm__("smull %0, %1, %2, %3 \n\t"
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
36 "mov %0, %0, lsr %4 \n\t"
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
37 "add %1, %0, %1, lsl %5 \n\t"
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
38 : "=&r"(lo), "=&r"(hi)
8676
7fcf95230c28 ARM: allow register operands for shifts in MULL()
mru
parents: 8590
diff changeset
39 : "r"(b), "r"(a), "ir"(shift), "ir"(32-shift));
8112
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
40 return hi;
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
41 }
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
42
8113
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
43 #define MULH MULH
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8359
diff changeset
44 #if HAVE_ARMV6
7280
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
45 static inline av_const int MULH(int a, int b)
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
46 {
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
47 int r;
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7760
diff changeset
48 __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
7280
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
49 return r;
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
50 }
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
51 #else
8113
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
52 static inline av_const int MULH(int a, int b)
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
53 {
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
54 int lo, hi;
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
55 __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
56 return hi;
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
57 }
7280
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
58 #endif
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
59
7281
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
60 static inline av_const int64_t MUL64(int a, int b)
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
61 {
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
62 union { uint64_t x; unsigned hl[2]; } x;
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7760
diff changeset
63 __asm__ ("smull %0, %1, %2, %3"
8111
97b08ce5d507 ARM: mathops.h whitespace cosmetics
mru
parents: 8105
diff changeset
64 : "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b));
7281
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
65 return x.x;
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
66 }
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
67 #define MUL64 MUL64
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
68
7282
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
69 static inline av_const int64_t MAC64(int64_t d, int a, int b)
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
70 {
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
71 union { uint64_t x; unsigned hl[2]; } x = { d };
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7760
diff changeset
72 __asm__ ("smlal %0, %1, %2, %3"
8111
97b08ce5d507 ARM: mathops.h whitespace cosmetics
mru
parents: 8105
diff changeset
73 : "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b));
7282
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
74 return x.x;
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
75 }
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
76 #define MAC64(d, a, b) ((d) = MAC64(d, a, b))
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
77 #define MLS64(d, a, b) MAC64(d, -(a), b)
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
78
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8359
diff changeset
79 #if HAVE_ARMV5TE
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
80
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
81 /* signed 16x16 -> 32 multiply add accumulate */
8114
1231a7ddd932 ARM: prettify MAC16() macro
mru
parents: 8113
diff changeset
82 # define MAC16(rt, ra, rb) \
1231a7ddd932 ARM: prettify MAC16() macro
mru
parents: 8113
diff changeset
83 __asm__ ("smlabb %0, %1, %2, %0" : "+r"(rt) : "r"(ra), "r"(rb));
1231a7ddd932 ARM: prettify MAC16() macro
mru
parents: 8113
diff changeset
84
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
85 /* signed 16x16 -> 32 multiply */
8115
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
86 # define MUL16 MUL16
9079
37dd457573a4 ARM: fix missing MUL16() return type
mru
parents: 8677
diff changeset
87 static inline av_const int MUL16(int ra, int rb)
8115
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
88 {
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
89 int rt;
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
90 __asm__ ("smulbb %0, %1, %2" : "=r"(rt) : "r"(ra), "r"(rb));
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
91 return rt;
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
92 }
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
93
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
94 #endif
5163
9ecbfc0c82bf add multiple inclusion guards to headers
mru
parents: 3947
diff changeset
95
8677
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
96 #define mid_pred mid_pred
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
97 static inline av_const int mid_pred(int a, int b, int c)
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
98 {
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
99 int m;
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
100 __asm__ volatile (
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
101 "mov %0, %2 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
102 "cmp %1, %2 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
103 "movgt %0, %1 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
104 "movgt %1, %2 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
105 "cmp %1, %3 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
106 "movle %1, %3 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
107 "cmp %0, %1 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
108 "movgt %0, %1 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
109 : "=&r"(m), "+r"(a)
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
110 : "r"(b), "r"(c));
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
111 return m;
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
112 }
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
113
9141
489def16f0c7 ARM: disable inline asm for armcc
mru
parents: 9079
diff changeset
114 #endif /* HAVE_INLINE_ASM */
489def16f0c7 ARM: disable inline asm for armcc
mru
parents: 9079
diff changeset
115
8359
9281a8a9387a ARM: replace "armv4l" with "arm"
mru
parents: 8201
diff changeset
116 #endif /* AVCODEC_ARM_MATHOPS_H */