Mercurial > libavcodec.hg
annotate arm/int_neon.S @ 11557:53822d92c3f7 libavcodec
Make sure the EC code does not attempt to use inter based concealment if there
is no reference frame available. (this can happen because the EC code will attempt
to use reference frames even for I/IDR frames)
author | michael |
---|---|
date | Tue, 30 Mar 2010 20:46:46 +0000 |
parents | 361a5fcb4393 |
children |
rev | line source |
---|---|
11243
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
1 /* |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
2 * ARM NEON optimised integer operations |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
3 * Copyright (c) 2009 Kostya Shishkov |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
4 * |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
5 * This file is part of FFmpeg. |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
6 * |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
9 * License as published by the Free Software Foundation; either |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
11 * |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
15 * Lesser General Public License for more details. |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
16 * |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
20 */ |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
21 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
22 #include "asm.S" |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
23 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
24 preserve8 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
25 .fpu neon |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
26 .text |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
27 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
28 function ff_scalarproduct_int16_neon, export=1 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
29 vmov.i16 q0, #0 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
30 vmov.i16 q1, #0 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
31 vmov.i16 q2, #0 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
32 vmov.i16 q3, #0 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
33 negs r3, r3 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
34 beq 2f |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
35 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
36 vdup.s32 q12, r3 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
37 1: vld1.16 {d16-d17}, [r0]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
38 vld1.16 {d20-d21}, [r1,:128]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
39 vmull.s16 q12, d16, d20 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
40 vld1.16 {d18-d19}, [r0]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
41 vmull.s16 q13, d17, d21 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
42 vld1.16 {d22-d23}, [r1,:128]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
43 vmull.s16 q14, d18, d22 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
44 vmull.s16 q15, d19, d23 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
45 vshl.s32 q8, q12, q12 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
46 vshl.s32 q9, q13, q12 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
47 vadd.s32 q0, q0, q8 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
48 vshl.s32 q10, q14, q12 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
49 vadd.s32 q1, q1, q9 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
50 vshl.s32 q11, q15, q12 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
51 vadd.s32 q2, q2, q10 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
52 vadd.s32 q3, q3, q11 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
53 subs r2, r2, #16 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
54 bne 1b |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
55 b 3f |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
56 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
57 2: vld1.16 {d16-d17}, [r0]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
58 vld1.16 {d20-d21}, [r1,:128]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
59 vmlal.s16 q0, d16, d20 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
60 vld1.16 {d18-d19}, [r0]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
61 vmlal.s16 q1, d17, d21 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
62 vld1.16 {d22-d23}, [r1,:128]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
63 vmlal.s16 q2, d18, d22 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
64 vmlal.s16 q3, d19, d23 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
65 subs r2, r2, #16 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
66 bne 2b |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
67 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
68 3: vpadd.s32 d16, d0, d1 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
69 vpadd.s32 d17, d2, d3 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
70 vpadd.s32 d10, d4, d5 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
71 vpadd.s32 d11, d6, d7 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
72 vpadd.s32 d0, d16, d17 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
73 vpadd.s32 d1, d10, d11 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
74 vpadd.s32 d2, d0, d1 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
75 vpaddl.s32 d3, d2 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
76 vmov.32 r0, d3[0] |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
77 bx lr |
11443 | 78 endfunc |
11243
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
79 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
80 @ scalarproduct_and_madd_int16(/*aligned*/v0,v1,v2,order,mul) |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
81 function ff_scalarproduct_and_madd_int16_neon, export=1 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
82 vld1.16 {d28[],d29[]}, [sp] |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
83 vmov.i16 q0, #0 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
84 vmov.i16 q1, #0 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
85 vmov.i16 q2, #0 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
86 vmov.i16 q3, #0 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
87 mov r12, r0 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
88 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
89 1: vld1.16 {d16-d17}, [r0,:128]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
90 vld1.16 {d18-d19}, [r1]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
91 vld1.16 {d20-d21}, [r2]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
92 vld1.16 {d22-d23}, [r0,:128]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
93 vld1.16 {d24-d25}, [r1]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
94 vld1.16 {d26-d27}, [r2]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
95 vmul.s16 q10, q10, q14 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
96 vmul.s16 q13, q13, q14 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
97 vmlal.s16 q0, d16, d18 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
98 vmlal.s16 q1, d17, d19 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
99 vadd.s16 q10, q8, q10 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
100 vadd.s16 q13, q11, q13 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
101 vmlal.s16 q2, d22, d24 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
102 vmlal.s16 q3, d23, d25 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
103 vst1.16 {q10}, [r12,:128]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
104 subs r3, r3, #16 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
105 vst1.16 {q13}, [r12,:128]! |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
106 bne 1b |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
107 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
108 vpadd.s32 d16, d0, d1 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
109 vpadd.s32 d17, d2, d3 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
110 vpadd.s32 d10, d4, d5 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
111 vpadd.s32 d11, d6, d7 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
112 vpadd.s32 d0, d16, d17 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
113 vpadd.s32 d1, d10, d11 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
114 vpadd.s32 d2, d0, d1 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
115 vpaddl.s32 d3, d2 |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
116 vmov.32 r0, d3[0] |
e71b0be9ac79
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16
mru
parents:
diff
changeset
|
117 bx lr |
11443 | 118 endfunc |