Mercurial > libavcodec.hg
annotate alpha/motion_est_mvi_asm.S @ 12483:0159a19bfff7 libavcodec
aacdec: Rework channel mapping compatibility hacks.
For a PCE based configuration map the channels solely based on tags.
For an indexed configuration map the channels solely based on position.
This works with all known exotic samples including al17, elem_id0, bad_concat,
and lfe_is_sce.
author | alexc |
---|---|
date | Fri, 10 Sep 2010 18:01:48 +0000 |
parents | 58c2da0a371b |
children |
rev | line source |
---|---|
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
1 /* |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
2 * Alpha optimized DSP utils |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org> |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
4 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3130
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3130
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3130
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
3130 | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3130
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3130
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
3130 | 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 * Lesser General Public License for more details. | |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
16 * |
3130 | 17 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3130
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3130 | 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
20 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
21 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
22 #include "regdef.h" |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
23 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
24 /* Some nicer register names. */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
25 #define ta t10 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
26 #define tb t11 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
27 #define tc t12 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
28 #define td AT |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
29 /* Danger: these overlap with the argument list and the return value */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
30 #define te a5 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
31 #define tf a4 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
32 #define tg a3 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
33 #define th v0 |
2967 | 34 |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
35 .set noat |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
36 .set noreorder |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
37 .arch pca56 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
38 .text |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
39 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
40 /***************************************************************************** |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
41 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
42 * |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
43 * This code is written with a pca56 in mind. For ev6, one should |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
44 * really take the increased latency of 3 cycles for MVI instructions |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
45 * into account. |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
46 * |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
47 * It is important to keep the loading and first use of a register as |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
48 * far apart as possible, because if a register is accessed before it |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
49 * has been fetched from memory, the CPU will stall. |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
50 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
51 .align 4 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
52 .globl pix_abs16x16_mvi_asm |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
53 .ent pix_abs16x16_mvi_asm |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
54 pix_abs16x16_mvi_asm: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
55 .frame sp, 0, ra, 0 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
56 .prologue 0 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
57 |
8625 | 58 and a2, 7, t0 |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
59 clr v0 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
60 beq t0, $aligned |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
61 .align 4 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
62 $unaligned: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
63 /* Registers: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
64 line 0: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
65 t0: left_u -> left lo -> left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
66 t1: mid |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
67 t2: right_u -> right hi -> right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
68 t3: ref left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
69 t4: ref right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
70 line 1: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
71 t5: left_u -> left lo -> left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
72 t6: mid |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
73 t7: right_u -> right hi -> right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
74 t8: ref left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
75 t9: ref right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
76 temp: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
77 ta: left hi |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
78 tb: right lo |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
79 tc: error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
80 td: error right */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
81 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
82 /* load line 0 */ |
8625 | 83 ldq_u t0, 0(a2) # left_u |
84 ldq_u t1, 8(a2) # mid | |
85 ldq_u t2, 16(a2) # right_u | |
86 ldq t3, 0(a1) # ref left | |
87 ldq t4, 8(a1) # ref right | |
88 addq a1, a3, a1 # pix1 | |
89 addq a2, a3, a2 # pix2 | |
2967 | 90 /* load line 1 */ |
8625 | 91 ldq_u t5, 0(a2) # left_u |
92 ldq_u t6, 8(a2) # mid | |
93 ldq_u t7, 16(a2) # right_u | |
94 ldq t8, 0(a1) # ref left | |
95 ldq t9, 8(a1) # ref right | |
96 addq a1, a3, a1 # pix1 | |
97 addq a2, a3, a2 # pix2 | |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
98 /* calc line 0 */ |
8625 | 99 extql t0, a2, t0 # left lo |
100 extqh t1, a2, ta # left hi | |
101 extql t1, a2, tb # right lo | |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
102 or t0, ta, t0 # left |
8625 | 103 extqh t2, a2, t2 # right hi |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
104 perr t3, t0, tc # error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
105 or t2, tb, t2 # right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
106 perr t4, t2, td # error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
107 addq v0, tc, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
108 addq v0, td, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
109 /* calc line 1 */ |
8625 | 110 extql t5, a2, t5 # left lo |
111 extqh t6, a2, ta # left hi | |
112 extql t6, a2, tb # right lo | |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
113 or t5, ta, t5 # left |
8625 | 114 extqh t7, a2, t7 # right hi |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
115 perr t8, t5, tc # error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
116 or t7, tb, t7 # right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
117 perr t9, t7, td # error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
118 addq v0, tc, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
119 addq v0, td, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
120 /* loop */ |
8625 | 121 subq a4, 2, a4 # h -= 2 |
122 bne a4, $unaligned | |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
123 ret |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
124 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
125 .align 4 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
126 $aligned: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
127 /* load line 0 */ |
8625 | 128 ldq t0, 0(a2) # left |
129 ldq t1, 8(a2) # right | |
130 addq a2, a3, a2 # pix2 | |
131 ldq t2, 0(a1) # ref left | |
132 ldq t3, 8(a1) # ref right | |
133 addq a1, a3, a1 # pix1 | |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
134 /* load line 1 */ |
8625 | 135 ldq t4, 0(a2) # left |
136 ldq t5, 8(a2) # right | |
137 addq a2, a3, a2 # pix2 | |
138 ldq t6, 0(a1) # ref left | |
139 ldq t7, 8(a1) # ref right | |
140 addq a1, a3, a1 # pix1 | |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
141 /* load line 2 */ |
8625 | 142 ldq t8, 0(a2) # left |
143 ldq t9, 8(a2) # right | |
144 addq a2, a3, a2 # pix2 | |
145 ldq ta, 0(a1) # ref left | |
146 ldq tb, 8(a1) # ref right | |
147 addq a1, a3, a1 # pix1 | |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
148 /* load line 3 */ |
8625 | 149 ldq tc, 0(a2) # left |
150 ldq td, 8(a2) # right | |
151 addq a2, a3, a2 # pix2 | |
152 ldq te, 0(a1) # ref left | |
153 ldq a0, 8(a1) # ref right | |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
154 /* calc line 0 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
155 perr t0, t2, t0 # error left |
8625 | 156 addq a1, a3, a1 # pix1 |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
157 perr t1, t3, t1 # error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
158 addq v0, t0, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
159 /* calc line 1 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
160 perr t4, t6, t0 # error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
161 addq v0, t1, v0 # add error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
162 perr t5, t7, t1 # error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
163 addq v0, t0, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
164 /* calc line 2 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
165 perr t8, ta, t0 # error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
166 addq v0, t1, v0 # add error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
167 perr t9, tb, t1 # error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
168 addq v0, t0, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
169 /* calc line 3 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
170 perr tc, te, t0 # error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
171 addq v0, t1, v0 # add error right |
8625 | 172 perr td, a0, t1 # error right |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
173 addq v0, t0, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
174 addq v0, t1, v0 # add error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
175 /* loop */ |
8625 | 176 subq a4, 4, a4 # h -= 4 |
177 bne a4, $aligned | |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
178 ret |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
179 .end pix_abs16x16_mvi_asm |