annotate arm/int_neon.S @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 361a5fcb4393
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11243
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
1 /*
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
2 * ARM NEON optimised integer operations
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
3 * Copyright (c) 2009 Kostya Shishkov
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
4 *
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
5 * This file is part of FFmpeg.
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
6 *
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
11 *
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
15 * Lesser General Public License for more details.
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
16 *
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
20 */
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
21
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
22 #include "asm.S"
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
23
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
24 preserve8
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
25 .fpu neon
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
26 .text
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
27
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
28 function ff_scalarproduct_int16_neon, export=1
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
29 vmov.i16 q0, #0
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
30 vmov.i16 q1, #0
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
31 vmov.i16 q2, #0
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
32 vmov.i16 q3, #0
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
33 negs r3, r3
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
34 beq 2f
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
35
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
36 vdup.s32 q12, r3
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
37 1: vld1.16 {d16-d17}, [r0]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
38 vld1.16 {d20-d21}, [r1,:128]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
39 vmull.s16 q12, d16, d20
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
40 vld1.16 {d18-d19}, [r0]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
41 vmull.s16 q13, d17, d21
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
42 vld1.16 {d22-d23}, [r1,:128]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
43 vmull.s16 q14, d18, d22
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
44 vmull.s16 q15, d19, d23
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
45 vshl.s32 q8, q12, q12
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
46 vshl.s32 q9, q13, q12
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
47 vadd.s32 q0, q0, q8
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
48 vshl.s32 q10, q14, q12
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
49 vadd.s32 q1, q1, q9
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
50 vshl.s32 q11, q15, q12
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
51 vadd.s32 q2, q2, q10
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
52 vadd.s32 q3, q3, q11
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
53 subs r2, r2, #16
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
54 bne 1b
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
55 b 3f
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
56
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
57 2: vld1.16 {d16-d17}, [r0]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
58 vld1.16 {d20-d21}, [r1,:128]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
59 vmlal.s16 q0, d16, d20
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
60 vld1.16 {d18-d19}, [r0]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
61 vmlal.s16 q1, d17, d21
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
62 vld1.16 {d22-d23}, [r1,:128]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
63 vmlal.s16 q2, d18, d22
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
64 vmlal.s16 q3, d19, d23
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
65 subs r2, r2, #16
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
66 bne 2b
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
67
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
68 3: vpadd.s32 d16, d0, d1
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
69 vpadd.s32 d17, d2, d3
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
70 vpadd.s32 d10, d4, d5
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
71 vpadd.s32 d11, d6, d7
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
72 vpadd.s32 d0, d16, d17
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
73 vpadd.s32 d1, d10, d11
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
74 vpadd.s32 d2, d0, d1
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
75 vpaddl.s32 d3, d2
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
76 vmov.32 r0, d3[0]
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
77 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 11243
diff changeset
78 endfunc
11243
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
79
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
80 @ scalarproduct_and_madd_int16(/*aligned*/v0,v1,v2,order,mul)
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
81 function ff_scalarproduct_and_madd_int16_neon, export=1
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
82 vld1.16 {d28[],d29[]}, [sp]
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
83 vmov.i16 q0, #0
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
84 vmov.i16 q1, #0
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
85 vmov.i16 q2, #0
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
86 vmov.i16 q3, #0
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
87 mov r12, r0
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
88
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
89 1: vld1.16 {d16-d17}, [r0,:128]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
90 vld1.16 {d18-d19}, [r1]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
91 vld1.16 {d20-d21}, [r2]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
92 vld1.16 {d22-d23}, [r0,:128]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
93 vld1.16 {d24-d25}, [r1]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
94 vld1.16 {d26-d27}, [r2]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
95 vmul.s16 q10, q10, q14
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
96 vmul.s16 q13, q13, q14
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
97 vmlal.s16 q0, d16, d18
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
98 vmlal.s16 q1, d17, d19
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
99 vadd.s16 q10, q8, q10
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
100 vadd.s16 q13, q11, q13
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
101 vmlal.s16 q2, d22, d24
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
102 vmlal.s16 q3, d23, d25
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
103 vst1.16 {q10}, [r12,:128]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
104 subs r3, r3, #16
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
105 vst1.16 {q13}, [r12,:128]!
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
106 bne 1b
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
107
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
108 vpadd.s32 d16, d0, d1
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
109 vpadd.s32 d17, d2, d3
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
110 vpadd.s32 d10, d4, d5
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
111 vpadd.s32 d11, d6, d7
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
112 vpadd.s32 d0, d16, d17
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
113 vpadd.s32 d1, d10, d11
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
114 vpadd.s32 d2, d0, d1
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
115 vpaddl.s32 d3, d2
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
116 vmov.32 r0, d3[0]
e71b0be9ac79 ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff changeset
117 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 11243
diff changeset
118 endfunc