annotate arm/mpegvideo_neon.S @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents fda5ab5d31bb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11797
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
1 /*
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
2 * Copyright (c) 2010 Mans Rullgard
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
3 *
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
4 * This file is part of FFmpeg.
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
5 *
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
10 *
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
14 * Lesser General Public License for more details.
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
15 *
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
19 */
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
20
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
21 #include "asm.S"
11813
fda5ab5d31bb ARM: fail build if hardcoded struct offsets are wrong
mru
parents: 11797
diff changeset
22 #include "asm-offsets.h"
11797
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
23
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
24 function ff_dct_unquantize_h263_inter_neon, export=1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
25 add r0, r0, #0x2200
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
26 add r12, r0, #BLOCK_LAST_INDEX-0x2200
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
27 ldr r12, [r12, r2, lsl #2]
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
28 add r0, r0, #INTER_SCANTAB_RASTER_END-0x2200
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
29 ldrb r12, [r0, r12]
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
30 sub r2, r3, #1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
31 lsl r0, r3, #1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
32 orr r2, r2, #1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
33 add r3, r12, #1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
34 endfunc
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
35
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
36 function ff_dct_unquantize_h263_neon, export=1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
37 vdup.16 q15, r0 @ qmul
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
38 vdup.16 q14, r2 @ qadd
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
39 vneg.s16 q13, q14
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
40 cmp r3, #4
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
41 mov r0, r1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
42 ble 2f
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
43 1:
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
44 vld1.16 {q0}, [r0,:128]!
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
45 vclt.s16 q3, q0, #0
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
46 vld1.16 {q8}, [r0,:128]!
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
47 vceq.s16 q1, q0, #0
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
48 vmul.s16 q2, q0, q15
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
49 vclt.s16 q11, q8, #0
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
50 vmul.s16 q10, q8, q15
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
51 vbsl q3, q13, q14
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
52 vbsl q11, q13, q14
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
53 vadd.s16 q2, q2, q3
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
54 vceq.s16 q9, q8, #0
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
55 vadd.s16 q10, q10, q11
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
56 vbif q0, q2, q1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
57 vbif q8, q10, q9
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
58 subs r3, r3, #16
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
59 vst1.16 {q0}, [r1,:128]!
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
60 vst1.16 {q8}, [r1,:128]!
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
61 bxle lr
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
62 cmp r3, #8
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
63 bgt 1b
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
64 2:
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
65 vld1.16 {d0}, [r0,:64]
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
66 vclt.s16 d3, d0, #0
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
67 vceq.s16 d1, d0, #0
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
68 vmul.s16 d2, d0, d30
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
69 vbsl d3, d26, d28
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
70 vadd.s16 d2, d2, d3
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
71 vbif d0, d2, d1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
72 vst1.16 {d0}, [r1,:64]
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
73 bx lr
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
74 endfunc
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
75
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
76 function ff_dct_unquantize_h263_intra_neon, export=1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
77 push {r4-r6,lr}
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
78 ldr r6, [r0, #AC_PRED]
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
79 add r5, r0, #0x2700
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
80 cmp r6, #0
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
81 movne r12, #63
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
82 bne 1f
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
83 add lr, r0, #0x2200
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
84 add r12, lr, #BLOCK_LAST_INDEX-0x2200
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
85 add lr, lr, #INTER_SCANTAB_RASTER_END-0x2200
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
86 ldr r12, [r12, r2, lsl #2]
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
87 ldrb r12, [lr, r12]
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
88 1: ldr r5, [r5, #H263_AIC-0x2700]
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
89 ldrsh r4, [r1]
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
90 cmp r5, #0
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
91 mov r5, r1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
92 movne r2, #0
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
93 bne 2f
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
94 cmp r2, #4
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
95 addge r0, r0, #4
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
96 sub r2, r3, #1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
97 ldr r6, [r0, #Y_DC_SCALE]
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
98 orr r2, r2, #1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
99 smulbb r4, r4, r6
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
100 2: lsl r0, r3, #1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
101 add r3, r12, #1
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
102 bl ff_dct_unquantize_h263_neon
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
103 vmov.16 d0[0], r4
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
104 vst1.16 {d0[0]}, [r5]
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
105 pop {r4-r6,pc}
77243f47f39e ARM: NEON optimised dct_unquantize_h263_{intra,inter}
mru
parents:
diff changeset
106 endfunc