annotate arm/synth_filter_neon.S @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 85f6fd5dd599
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11593
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
1 /*
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
2 * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
3 *
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
4 * This file is part of FFmpeg.
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
5 *
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
10 *
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
14 * Lesser General Public License for more details.
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
15 *
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
19 */
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
20
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
21 #include "asm.S"
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
22
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
23 preserve8
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
24
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
25 function ff_synth_filter_float_neon, export=1
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
26 push {r3-r11,lr}
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
27
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
28 ldr r4, [r2] @ synth_buf_offset
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
29 add r1, r1, r4, lsl #2 @ synth_buf
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
30 sub r12, r4, #32
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
31 bfc r12, #9, #23
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
32 bic r4, r4, #63
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
33 str r12, [r2]
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
34
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
35 ldr r2, [sp, #12*4] @ in
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
36 mov r9, r1 @ synth_buf
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
37
11610
475eab960878 ARM: fix NEON synth_filter_float with hardfp calls
mru
parents: 11593
diff changeset
38 VFP vpush {d0}
11808
85f6fd5dd599 arm neon: Add missing mangle to external symbol
conrad
parents: 11610
diff changeset
39 bl X(ff_imdct_half_neon)
11610
475eab960878 ARM: fix NEON synth_filter_float with hardfp calls
mru
parents: 11593
diff changeset
40 VFP vpop {d0}
11593
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
41 pop {r3}
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
42
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
43 ldr r5, [sp, #9*4] @ window
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
44 ldr r2, [sp, #10*4] @ out
11610
475eab960878 ARM: fix NEON synth_filter_float with hardfp calls
mru
parents: 11593
diff changeset
45 NOVFP vldr d0, [sp, #12*4] @ scale, bias
11593
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
46 add r8, r9, #12*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
47
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
48 mov lr, #64*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
49 mov r1, #4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
50 1:
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
51 add r10, r9, #16*4 @ synth_buf
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
52 add r11, r8, #16*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
53 add r0, r5, #16*4 @ window
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
54 add r6, r5, #32*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
55 add r7, r5, #48*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
56
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
57 vld1.32 {q10}, [r3,:128] @ a
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
58 add r3, r3, #16*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
59 vld1.32 {q1}, [r3,:128] @ b
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
60 vmov.f32 q2, #0.0 @ c
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
61 vmov.f32 q3, #0.0 @ d
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
62
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
63 mov r12, #512
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
64 2:
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
65 vld1.32 {q9}, [r8, :128], lr
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
66 vrev64.32 q9, q9
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
67 vld1.32 {q8}, [r5, :128], lr
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
68 vmls.f32 d20, d16, d19
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
69 vld1.32 {q11}, [r0, :128], lr
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
70 vmls.f32 d21, d17, d18
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
71 vld1.32 {q12}, [r9, :128], lr
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
72 vmla.f32 d2, d22, d24
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
73 vld1.32 {q8}, [r6, :128], lr
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
74 vmla.f32 d3, d23, d25
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
75 vld1.32 {q9}, [r10,:128], lr
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
76 vmla.f32 d4, d16, d18
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
77 vld1.32 {q12}, [r11,:128], lr
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
78 vmla.f32 d5, d17, d19
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
79 vrev64.32 q12, q12
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
80 vld1.32 {q11}, [r7, :128], lr
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
81 vmla.f32 d6, d22, d25
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
82 vmla.f32 d7, d23, d24
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
83 subs r12, r12, #64
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
84 beq 3f
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
85 cmp r12, r4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
86 bne 2b
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
87 sub r8, r8, #512*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
88 sub r9, r9, #512*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
89 sub r10, r10, #512*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
90 sub r11, r11, #512*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
91 b 2b
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
92 3:
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
93 vdup.32 q8, d0[1]
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
94 vdup.32 q9, d0[1]
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
95 vmla.f32 q8, q10, d0[0]
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
96 vmla.f32 q9, q1, d0[0]
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
97 vst1.32 {q3}, [r3,:128]
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
98 sub r3, r3, #16*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
99 vst1.32 {q2}, [r3,:128]
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
100 vst1.32 {q8}, [r2,:128]
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
101 add r2, r2, #16*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
102 vst1.32 {q9}, [r2,:128]
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
103
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
104 subs r1, r1, #1
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
105 popeq {r4-r11,pc}
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
106
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
107 cmp r4, #0
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
108 subeq r8, r8, #512*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
109 subeq r9, r9, #512*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
110 sub r5, r5, #512*4
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
111 sub r2, r2, #12*4 @ out
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
112 add r3, r3, #4*4 @ synth_buf2
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
113 add r5, r5, #4*4 @ window
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
114 add r9, r9, #4*4 @ synth_buf
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
115 sub r8, r8, #4*4 @ synth_buf
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
116 b 1b
b7fa70eabb1f ARM: NEON optimised synth_filter_float
mru
parents:
diff changeset
117 endfunc