annotate arm/vp56dsp_neon.S @ 12483:0159a19bfff7 libavcodec

aacdec: Rework channel mapping compatibility hacks. For a PCE based configuration map the channels solely based on tags. For an indexed configuration map the channels solely based on position. This works with all known exotic samples including al17, elem_id0, bad_concat, and lfe_is_sce.
author alexc
date Fri, 10 Sep 2010 18:01:48 +0000
parents 1c6d78234e67
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11666
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
1 /*
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
2 * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
3 *
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
4 * This file is part of FFmpeg.
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
5 *
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
10 *
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
14 * Lesser General Public License for more details.
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
15 *
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
19 */
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
20
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
21 #include "asm.S"
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
22
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
23 .macro vp6_edge_filter
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
24 vdup.16 q3, r2 @ t
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
25 vmov.i16 q13, #1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
26 vsubl.u8 q0, d20, d18 @ p[ 0] - p[-s]
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
27 vsubl.u8 q1, d16, d22 @ p[-2*s] - p[ s]
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
28 vsubl.u8 q14, d21, d19
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
29 vsubl.u8 q15, d17, d23
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
30 vadd.i16 q2, q0, q0 @ 2*(p[0]-p[-s])
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
31 vadd.i16 d29, d28, d28
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
32 vadd.i16 q0, q0, q1 @ p[0]-p[-s] + p[-2*s]-p[s]
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
33 vadd.i16 d28, d28, d30
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
34 vadd.i16 q0, q0, q2 @ 3*(p[0]-p[-s]) + p[-2*s]-p[s]
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
35 vadd.i16 d28, d28, d29
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
36 vrshr.s16 q0, q0, #3 @ v
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
37 vrshr.s16 d28, d28, #3
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
38 vsub.i16 q8, q3, q13 @ t-1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
39 vabs.s16 q1, q0 @ V
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
40 vshr.s16 q2, q0, #15 @ s
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
41 vabs.s16 d30, d28
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
42 vshr.s16 d29, d28, #15
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
43 vsub.i16 q12, q1, q3 @ V-t
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
44 vsub.i16 d31, d30, d6
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
45 vsub.i16 q12, q12, q13 @ V-t-1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
46 vsub.i16 d31, d31, d26
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
47 vcge.u16 q12, q12, q8 @ V-t-1 >= t-1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
48 vcge.u16 d31, d31, d16
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
49 vadd.i16 q13, q3, q3 @ 2*t
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
50 vadd.i16 d16, d6, d6
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
51 vsub.i16 q13, q13, q1 @ 2*t - V
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
52 vsub.i16 d16, d16, d30
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
53 vadd.i16 q13, q13, q2 @ += s
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
54 vadd.i16 d16, d16, d29
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
55 veor q13, q13, q2 @ ^= s
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
56 veor d16, d16, d29
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
57 vbif q0, q13, q12
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
58 vbif d28, d16, d31
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
59 vmovl.u8 q1, d20
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
60 vmovl.u8 q15, d21
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
61 vaddw.u8 q2, q0, d18
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
62 vaddw.u8 q3, q14, d19
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
63 vsub.i16 q1, q1, q0
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
64 vsub.i16 d30, d30, d28
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
65 vqmovun.s16 d18, q2
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
66 vqmovun.s16 d19, q3
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
67 vqmovun.s16 d20, q1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
68 vqmovun.s16 d21, q15
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
69 .endm
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
70
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
71 function ff_vp6_edge_filter_ver_neon, export=1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
72 sub r0, r0, r1, lsl #1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
73 vld1.8 {q8}, [r0], r1 @ p[-2*s]
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
74 vld1.8 {q9}, [r0], r1 @ p[-s]
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
75 vld1.8 {q10}, [r0], r1 @ p[0]
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
76 vld1.8 {q11}, [r0] @ p[s]
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
77 vp6_edge_filter
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
78 sub r0, r0, r1, lsl #1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
79 sub r1, r1, #8
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
80 vst1.8 {d18}, [r0]!
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
81 vst1.32 {d19[0]}, [r0], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
82 vst1.8 {d20}, [r0]!
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
83 vst1.32 {d21[0]}, [r0]
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
84 bx lr
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
85 endfunc
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
86
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
87 function ff_vp6_edge_filter_hor_neon, export=1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
88 sub r3, r0, #1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
89 sub r0, r0, #2
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
90 vld1.32 {d16[0]}, [r0], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
91 vld1.32 {d18[0]}, [r0], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
92 vld1.32 {d20[0]}, [r0], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
93 vld1.32 {d22[0]}, [r0], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
94 vld1.32 {d16[1]}, [r0], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
95 vld1.32 {d18[1]}, [r0], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
96 vld1.32 {d20[1]}, [r0], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
97 vld1.32 {d22[1]}, [r0], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
98 vld1.32 {d17[0]}, [r0], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
99 vld1.32 {d19[0]}, [r0], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
100 vld1.32 {d21[0]}, [r0], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
101 vld1.32 {d23[0]}, [r0], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
102 vtrn.8 q8, q9
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
103 vtrn.8 q10, q11
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
104 vtrn.16 q8, q10
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
105 vtrn.16 q9, q11
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
106 vp6_edge_filter
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
107 vtrn.8 q9, q10
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
108 vst1.16 {d18[0]}, [r3], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
109 vst1.16 {d20[0]}, [r3], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
110 vst1.16 {d18[1]}, [r3], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
111 vst1.16 {d20[1]}, [r3], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
112 vst1.16 {d18[2]}, [r3], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
113 vst1.16 {d20[2]}, [r3], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
114 vst1.16 {d18[3]}, [r3], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
115 vst1.16 {d20[3]}, [r3], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
116 vst1.16 {d19[0]}, [r3], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
117 vst1.16 {d21[0]}, [r3], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
118 vst1.16 {d19[1]}, [r3], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
119 vst1.16 {d21[1]}, [r3], r1
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
120 bx lr
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents:
diff changeset
121 endfunc