annotate alpha/motion_est_mvi_asm.S @ 905:2b93dc762f9a libavcodec

fixing illegal 3. esc bug (the mpeg4 std only requires encoders to use unescaped symbols but not esc1 or esc2 if they are shorter than esc3, andjust beause its logical to use the shortest possible vlc doesnt mean encoders do that)
author michaelni
date Wed, 04 Dec 2002 11:47:24 +0000
parents 107a56aa74f5
children 503496800167
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
1 /*
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
2 * Alpha optimized DSP utils
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
4 *
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
5 * This program is free software; you can redistribute it and/or modify
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
6 * it under the terms of the GNU General Public License as published by
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
7 * the Free Software Foundation; either version 2 of the License, or
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
8 * (at your option) any later version.
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
9 *
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
10 * This program is distributed in the hope that it will be useful,
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
13 * GNU General Public License for more details.
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
14 *
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
15 * You should have received a copy of the GNU General Public License
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
16 * along with this program; if not, write to the Free Software
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
18 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
19
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
20 #include "regdef.h"
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
21 #ifdef HAVE_AV_CONFIG_H
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
22 #include "config.h"
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
23 #endif
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
24
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
25 /* Some nicer register names. */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
26 #define ta t10
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
27 #define tb t11
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
28 #define tc t12
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
29 #define td AT
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
30 /* Danger: these overlap with the argument list and the return value */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
31 #define te a5
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
32 #define tf a4
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
33 #define tg a3
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
34 #define th v0
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
35
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
36 .set noat
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
37 .set noreorder
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
38 .arch pca56
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
39 .text
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
40
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
41 /*****************************************************************************
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
42 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
43 *
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
44 * This code is written with a pca56 in mind. For ev6, one should
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
45 * really take the increased latency of 3 cycles for MVI instructions
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
46 * into account.
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
47 *
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
48 * It is important to keep the loading and first use of a register as
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
49 * far apart as possible, because if a register is accessed before it
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
50 * has been fetched from memory, the CPU will stall.
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
51 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
52 .align 4
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
53 .globl pix_abs16x16_mvi_asm
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
54 .ent pix_abs16x16_mvi_asm
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
55 pix_abs16x16_mvi_asm:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
56 .frame sp, 0, ra, 0
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
57 .prologue 0
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
58
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
59 #ifdef HAVE_GPROF
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
60 lda AT, _mcount
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
61 jsr AT, (AT), _mcount
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
62 #endif
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
63
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
64 and a1, 7, t0
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
65 clr v0
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
66 lda a3, 16
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
67 beq t0, $aligned
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
68 .align 4
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
69 $unaligned:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
70 /* Registers:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
71 line 0:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
72 t0: left_u -> left lo -> left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
73 t1: mid
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
74 t2: right_u -> right hi -> right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
75 t3: ref left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
76 t4: ref right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
77 line 1:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
78 t5: left_u -> left lo -> left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
79 t6: mid
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
80 t7: right_u -> right hi -> right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
81 t8: ref left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
82 t9: ref right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
83 temp:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
84 ta: left hi
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
85 tb: right lo
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
86 tc: error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
87 td: error right */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
88
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
89 /* load line 0 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
90 ldq_u t0, 0(a1) # left_u
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
91 ldq_u t1, 8(a1) # mid
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
92 ldq_u t2, 16(a1) # right_u
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
93 ldq t3, 0(a0) # ref left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
94 ldq t4, 8(a0) # ref right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
95 addq a0, a2, a0 # pix1
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
96 addq a1, a2, a1 # pix2
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
97 /* load line 1 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
98 ldq_u t5, 0(a1) # left_u
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
99 ldq_u t6, 8(a1) # mid
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
100 ldq_u t7, 16(a1) # right_u
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
101 ldq t8, 0(a0) # ref left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
102 ldq t9, 8(a0) # ref right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
103 addq a0, a2, a0 # pix1
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
104 addq a1, a2, a1 # pix2
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
105 /* calc line 0 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
106 extql t0, a1, t0 # left lo
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
107 extqh t1, a1, ta # left hi
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
108 extql t1, a1, tb # right lo
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
109 or t0, ta, t0 # left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
110 extqh t2, a1, t2 # right hi
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
111 perr t3, t0, tc # error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
112 or t2, tb, t2 # right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
113 perr t4, t2, td # error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
114 addq v0, tc, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
115 addq v0, td, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
116 /* calc line 1 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
117 extql t5, a1, t5 # left lo
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
118 extqh t6, a1, ta # left hi
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
119 extql t6, a1, tb # right lo
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
120 or t5, ta, t5 # left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
121 extqh t7, a1, t7 # right hi
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
122 perr t8, t5, tc # error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
123 or t7, tb, t7 # right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
124 perr t9, t7, td # error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
125 addq v0, tc, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
126 addq v0, td, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
127 /* loop */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
128 subq a3, 2, a3 # h -= 2
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
129 bne a3, $unaligned
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
130 ret
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
131
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
132 .align 4
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
133 $aligned:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
134 /* load line 0 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
135 ldq t0, 0(a1) # left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
136 ldq t1, 8(a1) # right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
137 addq a1, a2, a1 # pix2
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
138 ldq t2, 0(a0) # ref left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
139 ldq t3, 8(a0) # ref right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
140 addq a0, a2, a0 # pix1
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
141 /* load line 1 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
142 ldq t4, 0(a1) # left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
143 ldq t5, 8(a1) # right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
144 addq a1, a2, a1 # pix2
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
145 ldq t6, 0(a0) # ref left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
146 ldq t7, 8(a0) # ref right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
147 addq a0, a2, a0 # pix1
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
148 /* load line 2 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
149 ldq t8, 0(a1) # left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
150 ldq t9, 8(a1) # right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
151 addq a1, a2, a1 # pix2
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
152 ldq ta, 0(a0) # ref left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
153 ldq tb, 8(a0) # ref right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
154 addq a0, a2, a0 # pix1
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
155 /* load line 3 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
156 ldq tc, 0(a1) # left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
157 ldq td, 8(a1) # right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
158 addq a1, a2, a1 # pix2
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
159 ldq te, 0(a0) # ref left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
160 ldq tf, 8(a0) # ref right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
161 /* calc line 0 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
162 perr t0, t2, t0 # error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
163 addq a0, a2, a0 # pix1
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
164 perr t1, t3, t1 # error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
165 addq v0, t0, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
166 /* calc line 1 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
167 perr t4, t6, t0 # error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
168 addq v0, t1, v0 # add error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
169 perr t5, t7, t1 # error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
170 addq v0, t0, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
171 /* calc line 2 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
172 perr t8, ta, t0 # error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
173 addq v0, t1, v0 # add error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
174 perr t9, tb, t1 # error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
175 addq v0, t0, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
176 /* calc line 3 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
177 perr tc, te, t0 # error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
178 addq v0, t1, v0 # add error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
179 perr td, tf, t1 # error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
180 addq v0, t0, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
181 addq v0, t1, v0 # add error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
182 /* loop */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
183 subq a3, 4, a3 # h -= 4
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
184 bne a3, $aligned
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
185 ret
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
186 .end pix_abs16x16_mvi_asm