Mercurial > libavcodec.hg
comparison arm/vp56dsp_neon.S @ 11666:1c6d78234e67 libavcodec
ARM: NEON optimised VP6 edge filter
author | mru |
---|---|
date | Fri, 30 Apr 2010 21:30:27 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
11665:85ee3d14b906 | 11666:1c6d78234e67 |
---|---|
1 /* | |
2 * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> | |
3 * | |
4 * This file is part of FFmpeg. | |
5 * | |
6 * FFmpeg is free software; you can redistribute it and/or | |
7 * modify it under the terms of the GNU Lesser General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2.1 of the License, or (at your option) any later version. | |
10 * | |
11 * FFmpeg is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
17 * License along with FFmpeg; if not, write to the Free Software | |
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 */ | |
20 | |
21 #include "asm.S" | |
22 | |
23 .macro vp6_edge_filter | |
24 vdup.16 q3, r2 @ t | |
25 vmov.i16 q13, #1 | |
26 vsubl.u8 q0, d20, d18 @ p[ 0] - p[-s] | |
27 vsubl.u8 q1, d16, d22 @ p[-2*s] - p[ s] | |
28 vsubl.u8 q14, d21, d19 | |
29 vsubl.u8 q15, d17, d23 | |
30 vadd.i16 q2, q0, q0 @ 2*(p[0]-p[-s]) | |
31 vadd.i16 d29, d28, d28 | |
32 vadd.i16 q0, q0, q1 @ p[0]-p[-s] + p[-2*s]-p[s] | |
33 vadd.i16 d28, d28, d30 | |
34 vadd.i16 q0, q0, q2 @ 3*(p[0]-p[-s]) + p[-2*s]-p[s] | |
35 vadd.i16 d28, d28, d29 | |
36 vrshr.s16 q0, q0, #3 @ v | |
37 vrshr.s16 d28, d28, #3 | |
38 vsub.i16 q8, q3, q13 @ t-1 | |
39 vabs.s16 q1, q0 @ V | |
40 vshr.s16 q2, q0, #15 @ s | |
41 vabs.s16 d30, d28 | |
42 vshr.s16 d29, d28, #15 | |
43 vsub.i16 q12, q1, q3 @ V-t | |
44 vsub.i16 d31, d30, d6 | |
45 vsub.i16 q12, q12, q13 @ V-t-1 | |
46 vsub.i16 d31, d31, d26 | |
47 vcge.u16 q12, q12, q8 @ V-t-1 >= t-1 | |
48 vcge.u16 d31, d31, d16 | |
49 vadd.i16 q13, q3, q3 @ 2*t | |
50 vadd.i16 d16, d6, d6 | |
51 vsub.i16 q13, q13, q1 @ 2*t - V | |
52 vsub.i16 d16, d16, d30 | |
53 vadd.i16 q13, q13, q2 @ += s | |
54 vadd.i16 d16, d16, d29 | |
55 veor q13, q13, q2 @ ^= s | |
56 veor d16, d16, d29 | |
57 vbif q0, q13, q12 | |
58 vbif d28, d16, d31 | |
59 vmovl.u8 q1, d20 | |
60 vmovl.u8 q15, d21 | |
61 vaddw.u8 q2, q0, d18 | |
62 vaddw.u8 q3, q14, d19 | |
63 vsub.i16 q1, q1, q0 | |
64 vsub.i16 d30, d30, d28 | |
65 vqmovun.s16 d18, q2 | |
66 vqmovun.s16 d19, q3 | |
67 vqmovun.s16 d20, q1 | |
68 vqmovun.s16 d21, q15 | |
69 .endm | |
70 | |
71 function ff_vp6_edge_filter_ver_neon, export=1 | |
72 sub r0, r0, r1, lsl #1 | |
73 vld1.8 {q8}, [r0], r1 @ p[-2*s] | |
74 vld1.8 {q9}, [r0], r1 @ p[-s] | |
75 vld1.8 {q10}, [r0], r1 @ p[0] | |
76 vld1.8 {q11}, [r0] @ p[s] | |
77 vp6_edge_filter | |
78 sub r0, r0, r1, lsl #1 | |
79 sub r1, r1, #8 | |
80 vst1.8 {d18}, [r0]! | |
81 vst1.32 {d19[0]}, [r0], r1 | |
82 vst1.8 {d20}, [r0]! | |
83 vst1.32 {d21[0]}, [r0] | |
84 bx lr | |
85 endfunc | |
86 | |
87 function ff_vp6_edge_filter_hor_neon, export=1 | |
88 sub r3, r0, #1 | |
89 sub r0, r0, #2 | |
90 vld1.32 {d16[0]}, [r0], r1 | |
91 vld1.32 {d18[0]}, [r0], r1 | |
92 vld1.32 {d20[0]}, [r0], r1 | |
93 vld1.32 {d22[0]}, [r0], r1 | |
94 vld1.32 {d16[1]}, [r0], r1 | |
95 vld1.32 {d18[1]}, [r0], r1 | |
96 vld1.32 {d20[1]}, [r0], r1 | |
97 vld1.32 {d22[1]}, [r0], r1 | |
98 vld1.32 {d17[0]}, [r0], r1 | |
99 vld1.32 {d19[0]}, [r0], r1 | |
100 vld1.32 {d21[0]}, [r0], r1 | |
101 vld1.32 {d23[0]}, [r0], r1 | |
102 vtrn.8 q8, q9 | |
103 vtrn.8 q10, q11 | |
104 vtrn.16 q8, q10 | |
105 vtrn.16 q9, q11 | |
106 vp6_edge_filter | |
107 vtrn.8 q9, q10 | |
108 vst1.16 {d18[0]}, [r3], r1 | |
109 vst1.16 {d20[0]}, [r3], r1 | |
110 vst1.16 {d18[1]}, [r3], r1 | |
111 vst1.16 {d20[1]}, [r3], r1 | |
112 vst1.16 {d18[2]}, [r3], r1 | |
113 vst1.16 {d20[2]}, [r3], r1 | |
114 vst1.16 {d18[3]}, [r3], r1 | |
115 vst1.16 {d20[3]}, [r3], r1 | |
116 vst1.16 {d19[0]}, [r3], r1 | |
117 vst1.16 {d21[0]}, [r3], r1 | |
118 vst1.16 {d19[1]}, [r3], r1 | |
119 vst1.16 {d21[1]}, [r3], r1 | |
120 bx lr | |
121 endfunc |