annotate arm/mathops.h @ 10952:ea8f891d997d libavcodec

H264 DXVA2 implementation It allows VLD H264 decoding using DXVA2 (GPU assisted decoding API under VISTA and Windows 7). It is implemented by using AVHWAccel API. It has been tested successfully for some time in VLC using an nvidia card on Windows 7. To compile it, you need to have the system header dxva2api.h (either from microsoft or using http://downloads.videolan.org/pub/videolan/testing/contrib/dxva2api.h) The generated libavcodec.dll does not depend directly on any new lib as the necessary objects are given by the application using FFmpeg.
author fenrir
date Wed, 20 Jan 2010 18:54:51 +0000
parents 25136467a218
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
1 /*
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
2 * simple math operations
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
3 * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
15 * Lesser General Public License for more details.
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
16 *
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3767
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
20 */
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
21
8359
9281a8a9387a ARM: replace "armv4l" with "arm"
mru
parents: 8201
diff changeset
22 #ifndef AVCODEC_ARM_MATHOPS_H
9281a8a9387a ARM: replace "armv4l" with "arm"
mru
parents: 8201
diff changeset
23 #define AVCODEC_ARM_MATHOPS_H
5163
9ecbfc0c82bf add multiple inclusion guards to headers
mru
parents: 3947
diff changeset
24
8084
8547a4ae101b Add missing headers to pass 'make checkheaders'.
diego
parents: 8031
diff changeset
25 #include <stdint.h>
10080
25136467a218 Add necessary #include for config.h.
diego
parents: 9141
diff changeset
26 #include "config.h"
8084
8547a4ae101b Add missing headers to pass 'make checkheaders'.
diego
parents: 8031
diff changeset
27 #include "libavutil/common.h"
8547a4ae101b Add missing headers to pass 'make checkheaders'.
diego
parents: 8031
diff changeset
28
9141
489def16f0c7 ARM: disable inline asm for armcc
mru
parents: 9079
diff changeset
29 #if HAVE_INLINE_ASM
489def16f0c7 ARM: disable inline asm for armcc
mru
parents: 9079
diff changeset
30
8112
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
31 # define MULL MULL
8201
c6e2ffef3797 Add shift argument to MULL() macro
mru
parents: 8115
diff changeset
32 static inline av_const int MULL(int a, int b, unsigned shift)
8112
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
33 {
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
34 int lo, hi;
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
35 __asm__("smull %0, %1, %2, %3 \n\t"
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
36 "mov %0, %0, lsr %4 \n\t"
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
37 "add %1, %0, %1, lsl %5 \n\t"
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
38 : "=&r"(lo), "=&r"(hi)
8676
7fcf95230c28 ARM: allow register operands for shifts in MULL()
mru
parents: 8590
diff changeset
39 : "r"(b), "r"(a), "ir"(shift), "ir"(32-shift));
8112
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
40 return hi;
954dd6e341ce ARM: change MULL() macro to inline function
mru
parents: 8111
diff changeset
41 }
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
42
8113
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
43 #define MULH MULH
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8359
diff changeset
44 #if HAVE_ARMV6
7280
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
45 static inline av_const int MULH(int a, int b)
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
46 {
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
47 int r;
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7760
diff changeset
48 __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
7280
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
49 return r;
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
50 }
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
51 #else
8113
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
52 static inline av_const int MULH(int a, int b)
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
53 {
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
54 int lo, hi;
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
55 __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
56 return hi;
aa55fd152068 ARM: change MULH() macro to inline function
mru
parents: 8112
diff changeset
57 }
7280
c8b0366e066f ARM: ARMv6 optimised MULH
mru
parents: 5830
diff changeset
58 #endif
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
59
7281
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
60 static inline av_const int64_t MUL64(int a, int b)
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
61 {
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
62 union { uint64_t x; unsigned hl[2]; } x;
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7760
diff changeset
63 __asm__ ("smull %0, %1, %2, %3"
8111
97b08ce5d507 ARM: mathops.h whitespace cosmetics
mru
parents: 8105
diff changeset
64 : "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b));
7281
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
65 return x.x;
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
66 }
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
67 #define MUL64 MUL64
747908449de0 ARM: optimised MUL64
mru
parents: 7280
diff changeset
68
7282
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
69 static inline av_const int64_t MAC64(int64_t d, int a, int b)
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
70 {
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
71 union { uint64_t x; unsigned hl[2]; } x = { d };
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7760
diff changeset
72 __asm__ ("smlal %0, %1, %2, %3"
8111
97b08ce5d507 ARM: mathops.h whitespace cosmetics
mru
parents: 8105
diff changeset
73 : "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b));
7282
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
74 return x.x;
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
75 }
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
76 #define MAC64(d, a, b) ((d) = MAC64(d, a, b))
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
77 #define MLS64(d, a, b) MAC64(d, -(a), b)
dc5a334c758b ARM: optimised MAC64 and MLS64
mru
parents: 7281
diff changeset
78
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8359
diff changeset
79 #if HAVE_ARMV5TE
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
80
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
81 /* signed 16x16 -> 32 multiply add accumulate */
8114
1231a7ddd932 ARM: prettify MAC16() macro
mru
parents: 8113
diff changeset
82 # define MAC16(rt, ra, rb) \
1231a7ddd932 ARM: prettify MAC16() macro
mru
parents: 8113
diff changeset
83 __asm__ ("smlabb %0, %1, %2, %0" : "+r"(rt) : "r"(ra), "r"(rb));
1231a7ddd932 ARM: prettify MAC16() macro
mru
parents: 8113
diff changeset
84
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
85 /* signed 16x16 -> 32 multiply */
8115
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
86 # define MUL16 MUL16
9079
37dd457573a4 ARM: fix missing MUL16() return type
mru
parents: 8677
diff changeset
87 static inline av_const int MUL16(int ra, int rb)
8115
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
88 {
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
89 int rt;
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
90 __asm__ ("smulbb %0, %1, %2" : "=r"(rt) : "r"(ra), "r"(rb));
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
91 return rt;
e61cc20bad68 ARM: change MUL16() macro to inline function
mru
parents: 8114
diff changeset
92 }
3733
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
93
d1b5acd0b680 New single instruction math operation header
lu_zero
parents:
diff changeset
94 #endif
5163
9ecbfc0c82bf add multiple inclusion guards to headers
mru
parents: 3947
diff changeset
95
8677
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
96 #define mid_pred mid_pred
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
97 static inline av_const int mid_pred(int a, int b, int c)
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
98 {
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
99 int m;
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
100 __asm__ volatile (
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
101 "mov %0, %2 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
102 "cmp %1, %2 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
103 "movgt %0, %1 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
104 "movgt %1, %2 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
105 "cmp %1, %3 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
106 "movle %1, %3 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
107 "cmp %0, %1 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
108 "movgt %0, %1 \n\t"
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
109 : "=&r"(m), "+r"(a)
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
110 : "r"(b), "r"(c));
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
111 return m;
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
112 }
3c484b73ca73 ARM: optimised mid_pred()
mru
parents: 8676
diff changeset
113
9141
489def16f0c7 ARM: disable inline asm for armcc
mru
parents: 9079
diff changeset
114 #endif /* HAVE_INLINE_ASM */
489def16f0c7 ARM: disable inline asm for armcc
mru
parents: 9079
diff changeset
115
8359
9281a8a9387a ARM: replace "armv4l" with "arm"
mru
parents: 8201
diff changeset
116 #endif /* AVCODEC_ARM_MATHOPS_H */