annotate armv4l/h264dsp_neon.S @ 8336:c8401acb05d1 libavcodec

ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
author mru
date Mon, 15 Dec 2008 22:12:41 +0000
parents
children d43b7f4c5c1c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8336
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
1 /*
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
3 *
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
4 * This file is part of FFmpeg.
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
5 *
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
10 *
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
14 * Lesser General Public License for more details.
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
15 *
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
19 */
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
20
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
21 #include "asm.S"
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
22
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
23 .fpu neon
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
24
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
25 /* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
26 .macro h264_chroma_mc8 avg=0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
27 push {r4-r7, lr}
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
28 ldrd r4, [sp, #20]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
29 .if \avg
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
30 mov lr, r0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
31 .endif
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
32 pld [r1]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
33 pld [r1, r2]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
34
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
35 muls r7, r4, r5
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
36 rsb r6, r7, r5, lsl #3
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
37 rsb ip, r7, r4, lsl #3
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
38 sub r4, r7, r4, lsl #3
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
39 sub r4, r4, r5, lsl #3
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
40 add r4, r4, #64
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
41
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
42 beq 2f
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
43
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
44 add r5, r1, r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
45
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
46 vdup.8 d0, r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
47 lsl r4, r2, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
48 vdup.8 d1, ip
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
49 vld1.64 {d4, d5}, [r1], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
50 vdup.8 d2, r6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
51 vld1.64 {d6, d7}, [r5], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
52 vdup.8 d3, r7
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
53
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
54 vext.8 d5, d4, d5, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
55 vext.8 d7, d6, d7, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
56
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
57 1: pld [r5]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
58 vmull.u8 q8, d4, d0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
59 vmlal.u8 q8, d5, d1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
60 vld1.64 {d4, d5}, [r1], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
61 vmlal.u8 q8, d6, d2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
62 vext.8 d5, d4, d5, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
63 vmlal.u8 q8, d7, d3
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
64 vmull.u8 q9, d6, d0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
65 subs r3, r3, #2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
66 vmlal.u8 q9, d7, d1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
67 vmlal.u8 q9, d4, d2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
68 vmlal.u8 q9, d5, d3
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
69 vrshrn.u16 d16, q8, #6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
70 vld1.64 {d6, d7}, [r5], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
71 pld [r1]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
72 vrshrn.u16 d17, q9, #6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
73 .if \avg
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
74 vld1.64 {d20}, [lr,:64], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
75 vld1.64 {d21}, [lr,:64], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
76 vrhadd.u8 q8, q8, q10
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
77 .endif
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
78 vext.8 d7, d6, d7, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
79 vst1.64 {d16}, [r0,:64], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
80 vst1.64 {d17}, [r0,:64], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
81 bgt 1b
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
82
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
83 pop {r4-r7, pc}
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
84
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
85 2: tst r6, r6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
86 add ip, ip, r6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
87 vdup.8 d0, r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
88 vdup.8 d1, ip
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
89
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
90 beq 4f
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
91
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
92 add r5, r1, r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
93 lsl r4, r2, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
94 vld1.64 {d4}, [r1], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
95 vld1.64 {d6}, [r5], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
96
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
97 3: pld [r5]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
98 vmull.u8 q8, d4, d0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
99 vmlal.u8 q8, d6, d1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
100 vld1.64 {d4}, [r1], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
101 vmull.u8 q9, d6, d0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
102 vmlal.u8 q9, d4, d1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
103 vld1.64 {d6}, [r5], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
104 vrshrn.u16 d16, q8, #6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
105 vrshrn.u16 d17, q9, #6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
106 .if \avg
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
107 vld1.64 {d20}, [lr,:64], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
108 vld1.64 {d21}, [lr,:64], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
109 vrhadd.u8 q8, q8, q10
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
110 .endif
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
111 subs r3, r3, #2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
112 pld [r1]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
113 vst1.64 {d16}, [r0,:64], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
114 vst1.64 {d17}, [r0,:64], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
115 bgt 3b
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
116
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
117 pop {r4-r7, pc}
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
118
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
119 4: vld1.64 {d4, d5}, [r1], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
120 vld1.64 {d6, d7}, [r1], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
121 vext.8 d5, d4, d5, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
122 vext.8 d7, d6, d7, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
123
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
124 5: pld [r1]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
125 subs r3, r3, #2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
126 vmull.u8 q8, d4, d0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
127 vmlal.u8 q8, d5, d1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
128 vld1.64 {d4, d5}, [r1], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
129 vmull.u8 q9, d6, d0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
130 vmlal.u8 q9, d7, d1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
131 pld [r1]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
132 vext.8 d5, d4, d5, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
133 vrshrn.u16 d16, q8, #6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
134 vrshrn.u16 d17, q9, #6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
135 .if \avg
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
136 vld1.64 {d20}, [lr,:64], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
137 vld1.64 {d21}, [lr,:64], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
138 vrhadd.u8 q8, q8, q10
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
139 .endif
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
140 vld1.64 {d6, d7}, [r1], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
141 vext.8 d7, d6, d7, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
142 vst1.64 {d16}, [r0,:64], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
143 vst1.64 {d17}, [r0,:64], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
144 bgt 5b
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
145
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
146 pop {r4-r7, pc}
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
147 .endm
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
148
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
149 /* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
150 .macro h264_chroma_mc4 avg=0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
151 push {r4-r7, lr}
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
152 ldrd r4, [sp, #20]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
153 .if \avg
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
154 mov lr, r0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
155 .endif
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
156 pld [r1]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
157 pld [r1, r2]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
158
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
159 muls r7, r4, r5
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
160 rsb r6, r7, r5, lsl #3
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
161 rsb ip, r7, r4, lsl #3
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
162 sub r4, r7, r4, lsl #3
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
163 sub r4, r4, r5, lsl #3
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
164 add r4, r4, #64
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
165
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
166 beq 2f
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
167
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
168 add r5, r1, r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
169
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
170 vdup.8 d0, r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
171 lsl r4, r2, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
172 vdup.8 d1, ip
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
173 vld1.64 {d4}, [r1], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
174 vdup.8 d2, r6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
175 vld1.64 {d6}, [r5], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
176 vdup.8 d3, r7
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
177
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
178 vext.8 d5, d4, d5, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
179 vext.8 d7, d6, d7, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
180 vtrn.32 d4, d5
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
181 vtrn.32 d6, d7
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
182
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
183 vtrn.32 d0, d1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
184 vtrn.32 d2, d3
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
185
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
186 1: pld [r5]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
187 vmull.u8 q8, d4, d0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
188 vmlal.u8 q8, d6, d2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
189 vld1.64 {d4}, [r1], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
190 vext.8 d5, d4, d5, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
191 vtrn.32 d4, d5
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
192 vmull.u8 q9, d6, d0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
193 vmlal.u8 q9, d4, d2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
194 vld1.64 {d6}, [r5], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
195 vadd.i16 d16, d16, d17
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
196 vadd.i16 d17, d18, d19
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
197 vrshrn.u16 d16, q8, #6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
198 subs r3, r3, #2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
199 pld [r1]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
200 .if \avg
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
201 vld1.32 {d20[0]}, [lr,:32], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
202 vld1.32 {d20[1]}, [lr,:32], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
203 vrhadd.u8 d16, d16, d20
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
204 .endif
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
205 vext.8 d7, d6, d7, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
206 vtrn.32 d6, d7
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
207 vst1.32 {d16[0]}, [r0,:32], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
208 vst1.32 {d16[1]}, [r0,:32], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
209 bgt 1b
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
210
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
211 pop {r4-r7, pc}
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
212
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
213 2: tst r6, r6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
214 add ip, ip, r6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
215 vdup.8 d0, r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
216 vdup.8 d1, ip
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
217 vtrn.32 d0, d1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
218
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
219 beq 4f
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
220
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
221 vext.32 d1, d0, d1, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
222 add r5, r1, r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
223 lsl r4, r2, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
224 vld1.32 {d4[0]}, [r1], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
225 vld1.32 {d4[1]}, [r5], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
226
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
227 3: pld [r5]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
228 vmull.u8 q8, d4, d0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
229 vld1.32 {d4[0]}, [r1], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
230 vmull.u8 q9, d4, d1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
231 vld1.32 {d4[1]}, [r5], r4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
232 vadd.i16 d16, d16, d17
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
233 vadd.i16 d17, d18, d19
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
234 vrshrn.u16 d16, q8, #6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
235 .if \avg
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
236 vld1.32 {d20[0]}, [lr,:32], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
237 vld1.32 {d20[1]}, [lr,:32], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
238 vrhadd.u8 d16, d16, d20
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
239 .endif
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
240 subs r3, r3, #2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
241 pld [r1]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
242 vst1.32 {d16[0]}, [r0,:32], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
243 vst1.32 {d16[1]}, [r0,:32], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
244 bgt 3b
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
245
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
246 pop {r4-r7, pc}
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
247
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
248 4: vld1.64 {d4}, [r1], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
249 vld1.64 {d6}, [r1], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
250 vext.8 d5, d4, d5, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
251 vext.8 d7, d6, d7, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
252 vtrn.32 d4, d5
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
253 vtrn.32 d6, d7
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
254
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
255 5: vmull.u8 q8, d4, d0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
256 vmull.u8 q9, d6, d0
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
257 subs r3, r3, #2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
258 vld1.64 {d4}, [r1], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
259 vext.8 d5, d4, d5, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
260 vtrn.32 d4, d5
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
261 vadd.i16 d16, d16, d17
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
262 vadd.i16 d17, d18, d19
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
263 pld [r1]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
264 vrshrn.u16 d16, q8, #6
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
265 .if \avg
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
266 vld1.32 {d20[0]}, [lr,:32], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
267 vld1.32 {d20[1]}, [lr,:32], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
268 vrhadd.u8 d16, d16, d20
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
269 .endif
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
270 vld1.64 {d6}, [r1], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
271 vext.8 d7, d6, d7, #1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
272 vtrn.32 d6, d7
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
273 pld [r1]
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
274 vst1.32 {d16[0]}, [r0,:32], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
275 vst1.32 {d16[1]}, [r0,:32], r2
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
276 bgt 5b
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
277
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
278 pop {r4-r7, pc}
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
279 .endm
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
280
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
281 .text
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
282 .align
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
283
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
284 function ff_put_h264_chroma_mc8_neon, export=1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
285 h264_chroma_mc8
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
286 .endfunc
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
287
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
288 function ff_avg_h264_chroma_mc8_neon, export=1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
289 h264_chroma_mc8 avg=1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
290 .endfunc
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
291
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
292 function ff_put_h264_chroma_mc4_neon, export=1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
293 h264_chroma_mc4
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
294 .endfunc
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
295
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
296 function ff_avg_h264_chroma_mc4_neon, export=1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
297 h264_chroma_mc4 avg=1
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents:
diff changeset
298 .endfunc