annotate arm/h264pred_neon.S @ 10826:59c35482189e libavcodec

Always initialize bit_depth_luma and bit_depth_chroma in the H264 decoder which allows their usage without checking profile_idc. Patch by Laurent Aimar (fenrir (AT) videolan org)
author jai_menon
date Sat, 09 Jan 2010 07:57:26 +0000
parents f52d07b169b4
children 361a5fcb4393
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10623
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
1 /*
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
3 *
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
4 * This file is part of FFmpeg.
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
5 *
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
10 *
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
14 * Lesser General Public License for more details.
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
15 *
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
19 */
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
20
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
21 #include "asm.S"
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
22
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
23 .macro ldcol.8 rd, rs, rt, n=8, hi=0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
24 .if \n == 8 || \hi == 0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
25 vld1.8 {\rd[0]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
26 vld1.8 {\rd[1]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
27 vld1.8 {\rd[2]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
28 vld1.8 {\rd[3]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
29 .endif
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
30 .if \n == 8 || \hi == 1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
31 vld1.8 {\rd[4]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
32 vld1.8 {\rd[5]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
33 vld1.8 {\rd[6]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
34 vld1.8 {\rd[7]}, [\rs], \rt
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
35 .endif
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
36 .endm
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
37
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
38 .macro add16x8 dq, dl, dh, rl, rh
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
39 vaddl.u8 \dq, \rl, \rh
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
40 vadd.u16 \dl, \dl, \dh
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
41 vpadd.u16 \dl, \dl, \dl
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
42 vpadd.u16 \dl, \dl, \dl
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
43 .endm
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
44
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
45 function ff_pred16x16_128_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
46 vmov.i8 q0, #128
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
47 b .L_pred16x16_dc_end
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
48 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
49
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
50 function ff_pred16x16_top_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
51 sub r2, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
52 vld1.8 {q0}, [r2,:128]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
53 add16x8 q0, d0, d1, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
54 vrshrn.u16 d0, q0, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
55 vdup.8 q0, d0[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
56 b .L_pred16x16_dc_end
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
57 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
58
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
59 function ff_pred16x16_left_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
60 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
61 ldcol.8 d0, r2, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
62 ldcol.8 d1, r2, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
63 add16x8 q0, d0, d1, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
64 vrshrn.u16 d0, q0, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
65 vdup.8 q0, d0[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
66 b .L_pred16x16_dc_end
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
67 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
68
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
69 function ff_pred16x16_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
70 sub r2, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
71 vld1.8 {q0}, [r2,:128]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
72 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
73 ldcol.8 d2, r2, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
74 ldcol.8 d3, r2, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
75 vaddl.u8 q0, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
76 vaddl.u8 q1, d2, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
77 vadd.u16 q0, q0, q1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
78 vadd.u16 d0, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
79 vpadd.u16 d0, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
80 vpadd.u16 d0, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
81 vrshrn.u16 d0, q0, #5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
82 vdup.8 q0, d0[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
83 .L_pred16x16_dc_end:
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
84 mov r3, #8
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
85 6: vst1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
86 vst1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
87 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
88 bne 6b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
89 bx lr
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
90 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
91
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
92 function ff_pred16x16_hor_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
93 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
94 mov r3, #16
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
95 1: vld1.8 {d0[],d1[]},[r2], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
96 vst1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
97 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
98 bne 1b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
99 bx lr
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
100 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
101
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
102 function ff_pred16x16_vert_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
103 sub r0, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
104 vld1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
105 mov r3, #8
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
106 1: vst1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
107 vst1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
108 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
109 bne 1b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
110 bx lr
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
111 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
112
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
113 function ff_pred16x16_plane_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
114 sub r3, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
115 add r2, r3, #8
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
116 sub r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
117 vld1.8 {d0}, [r3]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
118 vld1.8 {d2}, [r2,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
119 ldcol.8 d1, r3, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
120 add r3, r3, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
121 ldcol.8 d3, r3, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
122 vrev64.8 q0, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
123 vaddl.u8 q8, d2, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
124 vsubl.u8 q2, d2, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
125 vsubl.u8 q3, d3, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
126 movrel r3, p16weight
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
127 vld1.8 {q0}, [r3,:128]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
128 vmul.s16 q2, q2, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
129 vmul.s16 q3, q3, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
130 vadd.i16 d4, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
131 vadd.i16 d5, d6, d7
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
132 vpadd.i16 d4, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
133 vpadd.i16 d4, d4, d4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
134 vshl.i16 d5, d4, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
135 vaddl.s16 q2, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
136 vrshrn.s32 d4, q2, #6
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
137 mov r3, #0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
138 vtrn.16 d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
139 vadd.i16 d2, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
140 vshl.i16 d3, d2, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
141 vrev64.16 d16, d17
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
142 vsub.i16 d3, d3, d2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
143 vadd.i16 d16, d16, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
144 vshl.i16 d2, d16, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
145 vsub.i16 d2, d2, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
146 vshl.i16 d3, d4, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
147 vext.16 q0, q0, q0, #7
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
148 vsub.i16 d6, d5, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
149 vmov.16 d0[0], r3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
150 vmul.i16 q0, q0, d4[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
151 vdup.16 q1, d2[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
152 vdup.16 q2, d4[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
153 vdup.16 q3, d6[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
154 vshl.i16 q2, q2, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
155 vadd.i16 q1, q1, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
156 vadd.i16 q3, q3, q2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
157 mov r3, #16
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
158 1:
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
159 vqshrun.s16 d0, q1, #5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
160 vadd.i16 q1, q1, q2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
161 vqshrun.s16 d1, q1, #5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
162 vadd.i16 q1, q1, q3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
163 vst1.8 {q0}, [r0,:128], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
164 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
165 bne 1b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
166 bx lr
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
167 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
168
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
169 .section .rodata
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
170 .align 4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
171 p16weight:
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
172 .short 1,2,3,4,5,6,7,8
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
173
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
174 .text
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
175
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
176 function ff_pred8x8_hor_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
177 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
178 mov r3, #8
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
179 1: vld1.8 {d0[]}, [r2], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
180 vst1.8 {d0}, [r0,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
181 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
182 bne 1b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
183 bx lr
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
184 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
185
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
186 function ff_pred8x8_vert_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
187 sub r0, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
188 vld1.8 {d0}, [r0,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
189 mov r3, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
190 1: vst1.8 {d0}, [r0,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
191 vst1.8 {d0}, [r0,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
192 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
193 bne 1b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
194 bx lr
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
195 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
196
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
197 function ff_pred8x8_plane_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
198 sub r3, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
199 add r2, r3, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
200 sub r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
201 vld1.32 {d0[0]}, [r3]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
202 vld1.32 {d2[0]}, [r2,:32], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
203 ldcol.8 d0, r3, r1, 4, hi=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
204 add r3, r3, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
205 ldcol.8 d3, r3, r1, 4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
206 vaddl.u8 q8, d2, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
207 vrev32.8 d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
208 vtrn.32 d2, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
209 vsubl.u8 q2, d2, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
210 movrel r3, p16weight
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
211 vld1.16 {q0}, [r3,:128]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
212 vmul.s16 d4, d4, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
213 vmul.s16 d5, d5, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
214 vpadd.i16 d4, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
215 vpaddl.s16 d4, d4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
216 vshl.i32 d5, d4, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
217 vadd.s32 d4, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
218 vrshrn.s32 d4, q2, #5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
219 mov r3, #0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
220 vtrn.16 d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
221 vadd.i16 d2, d4, d5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
222 vshl.i16 d3, d2, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
223 vrev64.16 d16, d16
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
224 vsub.i16 d3, d3, d2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
225 vadd.i16 d16, d16, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
226 vshl.i16 d2, d16, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
227 vsub.i16 d2, d2, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
228 vshl.i16 d3, d4, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
229 vext.16 q0, q0, q0, #7
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
230 vsub.i16 d6, d5, d3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
231 vmov.16 d0[0], r3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
232 vmul.i16 q0, q0, d4[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
233 vdup.16 q1, d2[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
234 vdup.16 q2, d4[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
235 vdup.16 q3, d6[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
236 vshl.i16 q2, q2, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
237 vadd.i16 q1, q1, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
238 vadd.i16 q3, q3, q2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
239 mov r3, #8
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
240 1:
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
241 vqshrun.s16 d0, q1, #5
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
242 vadd.i16 q1, q1, q3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
243 vst1.8 {d0}, [r0,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
244 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
245 bne 1b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
246 bx lr
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
247 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
248
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
249 function ff_pred8x8_128_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
250 vmov.i8 q0, #128
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
251 b .L_pred8x8_dc_end
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
252 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
253
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
254 function ff_pred8x8_top_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
255 sub r2, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
256 vld1.8 {d0}, [r2,:64]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
257 vpaddl.u8 d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
258 vpadd.u16 d0, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
259 vrshrn.u16 d0, q0, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
260 vdup.8 d1, d0[1]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
261 vdup.8 d0, d0[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
262 vtrn.32 d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
263 b .L_pred8x8_dc_end
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
264 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
265
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
266 function ff_pred8x8_left_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
267 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
268 ldcol.8 d0, r2, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
269 vpaddl.u8 d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
270 vpadd.u16 d0, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
271 vrshrn.u16 d0, q0, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
272 vdup.8 d1, d0[1]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
273 vdup.8 d0, d0[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
274 b .L_pred8x8_dc_end
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
275 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
276
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
277 function ff_pred8x8_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
278 sub r2, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
279 vld1.8 {d0}, [r2,:64]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
280 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
281 ldcol.8 d1, r2, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
282 vtrn.32 d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
283 vpaddl.u8 q0, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
284 vpadd.u16 d0, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
285 vpadd.u16 d1, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
286 vrshrn.u16 d2, q0, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
287 vrshrn.u16 d3, q0, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
288 vdup.8 d0, d2[4]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
289 vdup.8 d1, d3[3]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
290 vdup.8 d4, d3[2]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
291 vdup.8 d5, d2[5]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
292 vtrn.32 q0, q2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
293 .L_pred8x8_dc_end:
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
294 mov r3, #4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
295 add r2, r0, r1, lsl #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
296 6: vst1.8 {d0}, [r0,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
297 vst1.8 {d1}, [r2,:64], r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
298 subs r3, r3, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
299 bne 6b
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
300 bx lr
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
301 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
302
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
303 function ff_pred8x8_l0t_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
304 sub r2, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
305 vld1.8 {d0}, [r2,:64]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
306 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
307 ldcol.8 d1, r2, r1, 4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
308 vtrn.32 d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
309 vpaddl.u8 q0, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
310 vpadd.u16 d0, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
311 vpadd.u16 d1, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
312 vrshrn.u16 d2, q0, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
313 vrshrn.u16 d3, q0, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
314 vdup.8 d0, d2[4]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
315 vdup.8 d1, d3[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
316 vdup.8 q2, d3[2]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
317 vtrn.32 q0, q2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
318 b .L_pred8x8_dc_end
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
319 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
320
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
321 function ff_pred8x8_l00_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
322 sub r2, r0, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
323 ldcol.8 d0, r2, r1, 4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
324 vpaddl.u8 d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
325 vpadd.u16 d0, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
326 vrshrn.u16 d0, q0, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
327 vmov.i8 d1, #128
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
328 vdup.8 d0, d0[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
329 b .L_pred8x8_dc_end
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
330 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
331
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
332 function ff_pred8x8_0lt_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
333 sub r2, r0, r1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
334 vld1.8 {d0}, [r2,:64]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
335 add r2, r0, r1, lsl #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
336 sub r2, r2, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
337 ldcol.8 d1, r2, r1, 4, hi=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
338 vtrn.32 d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
339 vpaddl.u8 q0, q0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
340 vpadd.u16 d0, d0, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
341 vpadd.u16 d1, d0, d0
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
342 vrshrn.u16 d3, q0, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
343 vrshrn.u16 d2, q0, #3
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
344 vdup.8 d0, d3[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
345 vdup.8 d1, d3[3]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
346 vdup.8 d4, d3[2]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
347 vdup.8 d5, d2[5]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
348 vtrn.32 q0, q2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
349 b .L_pred8x8_dc_end
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
350 .endfunc
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
351
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
352 function ff_pred8x8_0l0_dc_neon, export=1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
353 add r2, r0, r1, lsl #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
354 sub r2, r2, #1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
355 ldcol.8 d1, r2, r1, 4
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
356 vpaddl.u8 d2, d1
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
357 vpadd.u16 d2, d2, d2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
358 vrshrn.u16 d1, q1, #2
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
359 vmov.i8 d0, #128
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
360 vdup.8 d1, d1[0]
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
361 b .L_pred8x8_dc_end
f52d07b169b4 ARM: NEON optimised H264 16x16, 8x8 pred
mru
parents:
diff changeset
362 .endfunc