annotate alpha/motion_est_mvi_asm.S @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 58c2da0a371b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
1 /*
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
2 * Alpha optimized DSP utils
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3130
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3130
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3130
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
3130
9361d38a3af0 Change license header to LGPL for consistency.
mellum
parents: 2967
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
9361d38a3af0 Change license header to LGPL for consistency.
mellum
parents: 2967
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3130
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3130
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
3130
9361d38a3af0 Change license header to LGPL for consistency.
mellum
parents: 2967
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9361d38a3af0 Change license header to LGPL for consistency.
mellum
parents: 2967
diff changeset
15 * Lesser General Public License for more details.
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
16 *
3130
9361d38a3af0 Change license header to LGPL for consistency.
mellum
parents: 2967
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3130
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3130
9361d38a3af0 Change license header to LGPL for consistency.
mellum
parents: 2967
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
20 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
21
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
22 #include "regdef.h"
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
23
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
24 /* Some nicer register names. */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
25 #define ta t10
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
26 #define tb t11
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
27 #define tc t12
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
28 #define td AT
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
29 /* Danger: these overlap with the argument list and the return value */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
30 #define te a5
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
31 #define tf a4
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
32 #define tg a3
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
33 #define th v0
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2122
diff changeset
34
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
35 .set noat
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
36 .set noreorder
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
37 .arch pca56
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
38 .text
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
39
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
40 /*****************************************************************************
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
41 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
42 *
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
43 * This code is written with a pca56 in mind. For ev6, one should
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
44 * really take the increased latency of 3 cycles for MVI instructions
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
45 * into account.
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
46 *
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
47 * It is important to keep the loading and first use of a register as
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
48 * far apart as possible, because if a register is accessed before it
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
49 * has been fetched from memory, the CPU will stall.
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
50 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
51 .align 4
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
52 .globl pix_abs16x16_mvi_asm
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
53 .ent pix_abs16x16_mvi_asm
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
54 pix_abs16x16_mvi_asm:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
55 .frame sp, 0, ra, 0
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
56 .prologue 0
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
57
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
58 and a2, 7, t0
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
59 clr v0
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
60 beq t0, $aligned
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
61 .align 4
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
62 $unaligned:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
63 /* Registers:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
64 line 0:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
65 t0: left_u -> left lo -> left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
66 t1: mid
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
67 t2: right_u -> right hi -> right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
68 t3: ref left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
69 t4: ref right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
70 line 1:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
71 t5: left_u -> left lo -> left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
72 t6: mid
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
73 t7: right_u -> right hi -> right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
74 t8: ref left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
75 t9: ref right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
76 temp:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
77 ta: left hi
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
78 tb: right lo
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
79 tc: error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
80 td: error right */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
81
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
82 /* load line 0 */
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
83 ldq_u t0, 0(a2) # left_u
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
84 ldq_u t1, 8(a2) # mid
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
85 ldq_u t2, 16(a2) # right_u
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
86 ldq t3, 0(a1) # ref left
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
87 ldq t4, 8(a1) # ref right
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
88 addq a1, a3, a1 # pix1
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
89 addq a2, a3, a2 # pix2
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2122
diff changeset
90 /* load line 1 */
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
91 ldq_u t5, 0(a2) # left_u
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
92 ldq_u t6, 8(a2) # mid
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
93 ldq_u t7, 16(a2) # right_u
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
94 ldq t8, 0(a1) # ref left
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
95 ldq t9, 8(a1) # ref right
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
96 addq a1, a3, a1 # pix1
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
97 addq a2, a3, a2 # pix2
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
98 /* calc line 0 */
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
99 extql t0, a2, t0 # left lo
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
100 extqh t1, a2, ta # left hi
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
101 extql t1, a2, tb # right lo
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
102 or t0, ta, t0 # left
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
103 extqh t2, a2, t2 # right hi
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
104 perr t3, t0, tc # error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
105 or t2, tb, t2 # right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
106 perr t4, t2, td # error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
107 addq v0, tc, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
108 addq v0, td, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
109 /* calc line 1 */
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
110 extql t5, a2, t5 # left lo
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
111 extqh t6, a2, ta # left hi
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
112 extql t6, a2, tb # right lo
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
113 or t5, ta, t5 # left
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
114 extqh t7, a2, t7 # right hi
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
115 perr t8, t5, tc # error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
116 or t7, tb, t7 # right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
117 perr t9, t7, td # error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
118 addq v0, tc, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
119 addq v0, td, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
120 /* loop */
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
121 subq a4, 2, a4 # h -= 2
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
122 bne a4, $unaligned
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
123 ret
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
124
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
125 .align 4
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
126 $aligned:
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
127 /* load line 0 */
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
128 ldq t0, 0(a2) # left
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
129 ldq t1, 8(a2) # right
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
130 addq a2, a3, a2 # pix2
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
131 ldq t2, 0(a1) # ref left
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
132 ldq t3, 8(a1) # ref right
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
133 addq a1, a3, a1 # pix1
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
134 /* load line 1 */
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
135 ldq t4, 0(a2) # left
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
136 ldq t5, 8(a2) # right
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
137 addq a2, a3, a2 # pix2
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
138 ldq t6, 0(a1) # ref left
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
139 ldq t7, 8(a1) # ref right
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
140 addq a1, a3, a1 # pix1
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
141 /* load line 2 */
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
142 ldq t8, 0(a2) # left
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
143 ldq t9, 8(a2) # right
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
144 addq a2, a3, a2 # pix2
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
145 ldq ta, 0(a1) # ref left
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
146 ldq tb, 8(a1) # ref right
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
147 addq a1, a3, a1 # pix1
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
148 /* load line 3 */
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
149 ldq tc, 0(a2) # left
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
150 ldq td, 8(a2) # right
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
151 addq a2, a3, a2 # pix2
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
152 ldq te, 0(a1) # ref left
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
153 ldq a0, 8(a1) # ref right
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
154 /* calc line 0 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
155 perr t0, t2, t0 # error left
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
156 addq a1, a3, a1 # pix1
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
157 perr t1, t3, t1 # error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
158 addq v0, t0, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
159 /* calc line 1 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
160 perr t4, t6, t0 # error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
161 addq v0, t1, v0 # add error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
162 perr t5, t7, t1 # error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
163 addq v0, t0, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
164 /* calc line 2 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
165 perr t8, ta, t0 # error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
166 addq v0, t1, v0 # add error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
167 perr t9, tb, t1 # error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
168 addq v0, t0, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
169 /* calc line 3 */
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
170 perr tc, te, t0 # error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
171 addq v0, t1, v0 # add error right
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
172 perr td, a0, t1 # error right
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
173 addq v0, t0, v0 # add error left
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
174 addq v0, t1, v0 # add error right
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
175 /* loop */
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
176 subq a4, 4, a4 # h -= 4
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8590
diff changeset
177 bne a4, $aligned
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
178 ret
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff changeset
179 .end pix_abs16x16_mvi_asm