Mercurial > libavcodec.hg
annotate arm/mpegvideo_armv5te_s.S @ 10952:ea8f891d997d libavcodec
H264 DXVA2 implementation
It allows VLD H264 decoding using DXVA2 (GPU assisted decoding API under
VISTA and Windows 7).
It is implemented by using AVHWAccel API. It has been tested successfully
for some time in VLC using an nvidia card on Windows 7.
To compile it, you need to have the system header dxva2api.h (either from
microsoft or using http://downloads.videolan.org/pub/videolan/testing/contrib/dxva2api.h)
The generated libavcodec.dll does not depend directly on any new lib as
the necessary objects are given by the application using FFmpeg.
author | fenrir |
---|---|
date | Wed, 20 Jan 2010 18:54:51 +0000 |
parents | 9281a8a9387a |
children | 361a5fcb4393 |
rev | line source |
---|---|
8197
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
1 /* |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
2 * Optimization of some functions from mpegvideo.c for armv5te |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
3 * Copyright (c) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net> |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
4 * |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
5 * This file is part of FFmpeg. |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
6 * |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
9 * License as published by the Free Software Foundation; either |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
11 * |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
15 * Lesser General Public License for more details. |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
16 * |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
20 */ |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
21 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
22 #include "config.h" |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
23 #include "asm.S" |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
24 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
25 /* |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
26 * Special optimized version of dct_unquantize_h263_helper_c, it |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
27 * requires the block to be at least 8 bytes aligned, and may process |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
28 * more elements than requested. But it is guaranteed to never |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
29 * process more than 64 elements provided that count argument is <= 64, |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
30 * so it is safe. This function is optimized for a common distribution |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
31 * of values for nCoeffs (they are mostly multiple of 8 plus one or |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
32 * two extra elements). So this function processes data as 8 elements |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
33 * per loop iteration and contains optional 2 elements processing in |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
34 * the end. |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
35 * |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
36 * Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770) |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
37 */ |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
38 function ff_dct_unquantize_h263_armv5te, export=1 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
39 push {r4-r9,lr} |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
40 mov ip, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
41 subs r3, r3, #2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
42 ble 2f |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
43 ldrd r4, [r0, #0] |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
44 1: |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
45 ldrd r6, [r0, #8] |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
46 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
47 rsbs r9, ip, r4, asr #16 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
48 addgt r9, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
49 rsblt r9, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
50 smlatbne r9, r4, r1, r9 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
51 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
52 rsbs lr, ip, r5, asr #16 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
53 addgt lr, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
54 rsblt lr, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
55 smlatbne lr, r5, r1, lr |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
56 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
57 rsbs r8, ip, r4, asl #16 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
58 addgt r8, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
59 rsblt r8, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
60 smlabbne r4, r4, r1, r8 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
61 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
62 rsbs r8, ip, r5, asl #16 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
63 addgt r8, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
64 rsblt r8, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
65 smlabbne r5, r5, r1, r8 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
66 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
67 strh r4, [r0], #2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
68 strh r9, [r0], #2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
69 strh r5, [r0], #2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
70 strh lr, [r0], #2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
71 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
72 rsbs r9, ip, r6, asr #16 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
73 addgt r9, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
74 rsblt r9, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
75 smlatbne r9, r6, r1, r9 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
76 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
77 rsbs lr, ip, r7, asr #16 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
78 addgt lr, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
79 rsblt lr, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
80 smlatbne lr, r7, r1, lr |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
81 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
82 rsbs r8, ip, r6, asl #16 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
83 addgt r8, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
84 rsblt r8, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
85 smlabbne r6, r6, r1, r8 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
86 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
87 rsbs r8, ip, r7, asl #16 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
88 addgt r8, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
89 rsblt r8, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
90 smlabbne r7, r7, r1, r8 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
91 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
92 strh r6, [r0], #2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
93 strh r9, [r0], #2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
94 strh r7, [r0], #2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
95 strh lr, [r0], #2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
96 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
97 subs r3, r3, #8 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
98 ldrgtd r4, [r0, #0] /* load data early to avoid load/use pipeline stall */ |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
99 bgt 1b |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
100 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
101 adds r3, r3, #2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
102 pople {r4-r9,pc} |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
103 2: |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
104 ldrsh r9, [r0, #0] |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
105 ldrsh lr, [r0, #2] |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
106 mov r8, r2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
107 cmp r9, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
108 rsblt r8, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
109 smlabbne r9, r9, r1, r8 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
110 mov r8, r2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
111 cmp lr, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
112 rsblt r8, r2, #0 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
113 smlabbne lr, lr, r1, r8 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
114 strh r9, [r0], #2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
115 strh lr, [r0], #2 |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
116 pop {r4-r9,pc} |
06acc3ab4bdc
ARM: move dct_unquantize_h263_*_armv5te asm to separate file
mru
parents:
diff
changeset
|
117 .endfunc |