annotate arm/h264pred_neon.S @ 12483:0159a19bfff7 libavcodec

aacdec: Rework channel mapping compatibility hacks. For a PCE based configuration map the channels solely based on tags. For an indexed configuration map the channels solely based on position. This works with all known exotic samples including al17, elem_id0, bad_concat, and lfe_is_sce.
author alexc
date Fri, 10 Sep 2010 18:01:48 +0000
parents 361a5fcb4393
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
1 /*
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
3 *
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
4 * This file is part of FFmpeg.
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
5 *
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
10 *
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
14 * Lesser General Public License for more details.
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
15 *
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
19 */
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
20
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
21 #include "asm.S"
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
22
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
23 .macro ldcol.8 rd, rs, rt, n=8, hi=0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
24 .if \n == 8 || \hi == 0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
25 vld1.8 {\rd[0]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
26 vld1.8 {\rd[1]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
27 vld1.8 {\rd[2]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
28 vld1.8 {\rd[3]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
29 .endif
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
30 .if \n == 8 || \hi == 1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
31 vld1.8 {\rd[4]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
32 vld1.8 {\rd[5]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
33 vld1.8 {\rd[6]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
34 vld1.8 {\rd[7]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
35 .endif
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
36 .endm
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
37
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
38 .macro add16x8 dq, dl, dh, rl, rh
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
39 vaddl.u8 \dq, \rl, \rh
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
40 vadd.u16 \dl, \dl, \dh
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
41 vpadd.u16 \dl, \dl, \dl
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
42 vpadd.u16 \dl, \dl, \dl
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
43 .endm
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
44
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
45 function ff_pred16x16_128_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
46 vmov.i8 q0, #128
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
47 b .L_pred16x16_dc_end
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
48 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
49
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
50 function ff_pred16x16_top_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
51 sub r2, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
52 vld1.8 {q0}, [r2,:128]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
53 add16x8 q0, d0, d1, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
54 vrshrn.u16 d0, q0, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
55 vdup.8 q0, d0[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
56 b .L_pred16x16_dc_end
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
57 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
58
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
59 function ff_pred16x16_left_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
60 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
61 ldcol.8 d0, r2, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
62 ldcol.8 d1, r2, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
63 add16x8 q0, d0, d1, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
64 vrshrn.u16 d0, q0, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
65 vdup.8 q0, d0[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
66 b .L_pred16x16_dc_end
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
67 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
68
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
69 function ff_pred16x16_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
70 sub r2, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
71 vld1.8 {q0}, [r2,:128]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
72 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
73 ldcol.8 d2, r2, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
74 ldcol.8 d3, r2, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
75 vaddl.u8 q0, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
76 vaddl.u8 q1, d2, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
77 vadd.u16 q0, q0, q1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
78 vadd.u16 d0, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
79 vpadd.u16 d0, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
80 vpadd.u16 d0, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
81 vrshrn.u16 d0, q0, #5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
82 vdup.8 q0, d0[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
83 .L_pred16x16_dc_end:
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
84 mov r3, #8
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
85 6: vst1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
86 vst1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
87 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
88 bne 6b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
89 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
90 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
91
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
92 function ff_pred16x16_hor_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
93 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
94 mov r3, #16
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
95 1: vld1.8 {d0[],d1[]},[r2], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
96 vst1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
97 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
98 bne 1b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
99 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
100 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
101
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
102 function ff_pred16x16_vert_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
103 sub r0, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
104 vld1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
105 mov r3, #8
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
106 1: vst1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
107 vst1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
108 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
109 bne 1b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
110 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
111 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
112
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
113 function ff_pred16x16_plane_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
114 sub r3, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
115 add r2, r3, #8
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
116 sub r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
117 vld1.8 {d0}, [r3]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
118 vld1.8 {d2}, [r2,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
119 ldcol.8 d1, r3, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
120 add r3, r3, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
121 ldcol.8 d3, r3, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
122 vrev64.8 q0, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
123 vaddl.u8 q8, d2, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
124 vsubl.u8 q2, d2, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
125 vsubl.u8 q3, d3, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
126 movrel r3, p16weight
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
127 vld1.8 {q0}, [r3,:128]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
128 vmul.s16 q2, q2, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
129 vmul.s16 q3, q3, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
130 vadd.i16 d4, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
131 vadd.i16 d5, d6, d7
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
132 vpadd.i16 d4, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
133 vpadd.i16 d4, d4, d4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
134 vshl.i16 d5, d4, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
135 vaddl.s16 q2, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
136 vrshrn.s32 d4, q2, #6
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
137 mov r3, #0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
138 vtrn.16 d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
139 vadd.i16 d2, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
140 vshl.i16 d3, d2, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
141 vrev64.16 d16, d17
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
142 vsub.i16 d3, d3, d2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
143 vadd.i16 d16, d16, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
144 vshl.i16 d2, d16, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
145 vsub.i16 d2, d2, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
146 vshl.i16 d3, d4, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
147 vext.16 q0, q0, q0, #7
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
148 vsub.i16 d6, d5, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
149 vmov.16 d0[0], r3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
150 vmul.i16 q0, q0, d4[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
151 vdup.16 q1, d2[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
152 vdup.16 q2, d4[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
153 vdup.16 q3, d6[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
154 vshl.i16 q2, q2, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
155 vadd.i16 q1, q1, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
156 vadd.i16 q3, q3, q2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
157 mov r3, #16
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
158 1:
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
159 vqshrun.s16 d0, q1, #5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
160 vadd.i16 q1, q1, q2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
161 vqshrun.s16 d1, q1, #5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
162 vadd.i16 q1, q1, q3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
163 vst1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
164 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
165 bne 1b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
166 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
167 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
168
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
169 .section .rodata
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
170 .align 4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
171 p16weight:
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
172 .short 1,2,3,4,5,6,7,8
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
173
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
174 .text
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
175
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
176 function ff_pred8x8_hor_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
177 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
178 mov r3, #8
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
179 1: vld1.8 {d0[]}, [r2], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
180 vst1.8 {d0}, [r0,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
181 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
182 bne 1b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
183 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
184 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
185
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
186 function ff_pred8x8_vert_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
187 sub r0, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
188 vld1.8 {d0}, [r0,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
189 mov r3, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
190 1: vst1.8 {d0}, [r0,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
191 vst1.8 {d0}, [r0,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
192 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
193 bne 1b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
194 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
195 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
196
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
197 function ff_pred8x8_plane_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
198 sub r3, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
199 add r2, r3, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
200 sub r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
201 vld1.32 {d0[0]}, [r3]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
202 vld1.32 {d2[0]}, [r2,:32], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
203 ldcol.8 d0, r3, r1, 4, hi=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
204 add r3, r3, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
205 ldcol.8 d3, r3, r1, 4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
206 vaddl.u8 q8, d2, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
207 vrev32.8 d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
208 vtrn.32 d2, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
209 vsubl.u8 q2, d2, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
210 movrel r3, p16weight
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
211 vld1.16 {q0}, [r3,:128]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
212 vmul.s16 d4, d4, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
213 vmul.s16 d5, d5, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
214 vpadd.i16 d4, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
215 vpaddl.s16 d4, d4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
216 vshl.i32 d5, d4, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
217 vadd.s32 d4, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
218 vrshrn.s32 d4, q2, #5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
219 mov r3, #0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
220 vtrn.16 d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
221 vadd.i16 d2, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
222 vshl.i16 d3, d2, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
223 vrev64.16 d16, d16
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
224 vsub.i16 d3, d3, d2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
225 vadd.i16 d16, d16, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
226 vshl.i16 d2, d16, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
227 vsub.i16 d2, d2, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
228 vshl.i16 d3, d4, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
229 vext.16 q0, q0, q0, #7
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
230 vsub.i16 d6, d5, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
231 vmov.16 d0[0], r3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
232 vmul.i16 q0, q0, d4[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
233 vdup.16 q1, d2[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
234 vdup.16 q2, d4[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
235 vdup.16 q3, d6[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
236 vshl.i16 q2, q2, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
237 vadd.i16 q1, q1, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
238 vadd.i16 q3, q3, q2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
239 mov r3, #8
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
240 1:
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
241 vqshrun.s16 d0, q1, #5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
242 vadd.i16 q1, q1, q3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
243 vst1.8 {d0}, [r0,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
244 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
245 bne 1b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
246 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
247 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
248
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
249 function ff_pred8x8_128_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
250 vmov.i8 q0, #128
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
251 b .L_pred8x8_dc_end
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
252 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
253
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
254 function ff_pred8x8_top_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
255 sub r2, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
256 vld1.8 {d0}, [r2,:64]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
257 vpaddl.u8 d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
258 vpadd.u16 d0, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
259 vrshrn.u16 d0, q0, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
260 vdup.8 d1, d0[1]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
261 vdup.8 d0, d0[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
262 vtrn.32 d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
263 b .L_pred8x8_dc_end
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
264 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
265
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
266 function ff_pred8x8_left_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
267 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
268 ldcol.8 d0, r2, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
269 vpaddl.u8 d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
270 vpadd.u16 d0, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
271 vrshrn.u16 d0, q0, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
272 vdup.8 d1, d0[1]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
273 vdup.8 d0, d0[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
274 b .L_pred8x8_dc_end
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
275 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
276
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
277 function ff_pred8x8_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
278 sub r2, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
279 vld1.8 {d0}, [r2,:64]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
280 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
281 ldcol.8 d1, r2, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
282 vtrn.32 d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
283 vpaddl.u8 q0, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
284 vpadd.u16 d0, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
285 vpadd.u16 d1, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
286 vrshrn.u16 d2, q0, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
287 vrshrn.u16 d3, q0, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
288 vdup.8 d0, d2[4]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
289 vdup.8 d1, d3[3]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
290 vdup.8 d4, d3[2]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
291 vdup.8 d5, d2[5]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
292 vtrn.32 q0, q2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
293 .L_pred8x8_dc_end:
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
294 mov r3, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
295 add r2, r0, r1, lsl #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
296 6: vst1.8 {d0}, [r0,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
297 vst1.8 {d1}, [r2,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
298 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
299 bne 6b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
300 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
301 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
302
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
303 function ff_pred8x8_l0t_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
304 sub r2, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
305 vld1.8 {d0}, [r2,:64]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
306 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
307 ldcol.8 d1, r2, r1, 4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
308 vtrn.32 d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
309 vpaddl.u8 q0, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
310 vpadd.u16 d0, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
311 vpadd.u16 d1, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
312 vrshrn.u16 d2, q0, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
313 vrshrn.u16 d3, q0, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
314 vdup.8 d0, d2[4]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
315 vdup.8 d1, d3[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
316 vdup.8 q2, d3[2]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
317 vtrn.32 q0, q2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
318 b .L_pred8x8_dc_end
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
319 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
320
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
321 function ff_pred8x8_l00_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
322 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
323 ldcol.8 d0, r2, r1, 4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
324 vpaddl.u8 d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
325 vpadd.u16 d0, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
326 vrshrn.u16 d0, q0, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
327 vmov.i8 d1, #128
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
328 vdup.8 d0, d0[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
329 b .L_pred8x8_dc_end
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
330 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
331
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
332 function ff_pred8x8_0lt_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
333 sub r2, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
334 vld1.8 {d0}, [r2,:64]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
335 add r2, r0, r1, lsl #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
336 sub r2, r2, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
337 ldcol.8 d1, r2, r1, 4, hi=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
338 vtrn.32 d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
339 vpaddl.u8 q0, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
340 vpadd.u16 d0, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
341 vpadd.u16 d1, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
342 vrshrn.u16 d3, q0, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
343 vrshrn.u16 d2, q0, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
344 vdup.8 d0, d3[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
345 vdup.8 d1, d3[3]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
346 vdup.8 d4, d3[2]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
347 vdup.8 d5, d2[5]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
348 vtrn.32 q0, q2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
349 b .L_pred8x8_dc_end
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
350 endfunc
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
351
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
352 function ff_pred8x8_0l0_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
353 add r2, r0, r1, lsl #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
354 sub r2, r2, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
355 ldcol.8 d1, r2, r1, 4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
356 vpaddl.u8 d2, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
357 vpadd.u16 d2, d2, d2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
358 vrshrn.u16 d1, q1, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
359 vmov.i8 d0, #128
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
360 vdup.8 d1, d1[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
361 b .L_pred8x8_dc_end
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10623
diff changeset
362 endfunc