Mercurial > libavcodec.hg
annotate alpha/motion_est_mvi_asm.S @ 905:2b93dc762f9a libavcodec
fixing illegal 3. esc bug (the mpeg4 std only requires encoders to use unescaped symbols but not esc1 or esc2 if they are shorter than esc3, andjust beause its logical to use the shortest possible vlc doesnt mean encoders do that)
author | michaelni |
---|---|
date | Wed, 04 Dec 2002 11:47:24 +0000 |
parents | 107a56aa74f5 |
children | 503496800167 |
rev | line source |
---|---|
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
1 /* |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
2 * Alpha optimized DSP utils |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org> |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
4 * |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
5 * This program is free software; you can redistribute it and/or modify |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
6 * it under the terms of the GNU General Public License as published by |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
7 * the Free Software Foundation; either version 2 of the License, or |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
8 * (at your option) any later version. |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
9 * |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
10 * This program is distributed in the hope that it will be useful, |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
13 * GNU General Public License for more details. |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
14 * |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
15 * You should have received a copy of the GNU General Public License |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
16 * along with this program; if not, write to the Free Software |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
18 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
19 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
20 #include "regdef.h" |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
21 #ifdef HAVE_AV_CONFIG_H |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
22 #include "config.h" |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
23 #endif |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
24 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
25 /* Some nicer register names. */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
26 #define ta t10 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
27 #define tb t11 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
28 #define tc t12 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
29 #define td AT |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
30 /* Danger: these overlap with the argument list and the return value */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
31 #define te a5 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
32 #define tf a4 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
33 #define tg a3 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
34 #define th v0 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
35 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
36 .set noat |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
37 .set noreorder |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
38 .arch pca56 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
39 .text |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
40 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
41 /***************************************************************************** |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
42 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
43 * |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
44 * This code is written with a pca56 in mind. For ev6, one should |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
45 * really take the increased latency of 3 cycles for MVI instructions |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
46 * into account. |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
47 * |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
48 * It is important to keep the loading and first use of a register as |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
49 * far apart as possible, because if a register is accessed before it |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
50 * has been fetched from memory, the CPU will stall. |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
51 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
52 .align 4 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
53 .globl pix_abs16x16_mvi_asm |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
54 .ent pix_abs16x16_mvi_asm |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
55 pix_abs16x16_mvi_asm: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
56 .frame sp, 0, ra, 0 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
57 .prologue 0 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
58 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
59 #ifdef HAVE_GPROF |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
60 lda AT, _mcount |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
61 jsr AT, (AT), _mcount |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
62 #endif |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
63 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
64 and a1, 7, t0 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
65 clr v0 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
66 lda a3, 16 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
67 beq t0, $aligned |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
68 .align 4 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
69 $unaligned: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
70 /* Registers: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
71 line 0: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
72 t0: left_u -> left lo -> left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
73 t1: mid |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
74 t2: right_u -> right hi -> right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
75 t3: ref left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
76 t4: ref right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
77 line 1: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
78 t5: left_u -> left lo -> left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
79 t6: mid |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
80 t7: right_u -> right hi -> right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
81 t8: ref left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
82 t9: ref right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
83 temp: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
84 ta: left hi |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
85 tb: right lo |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
86 tc: error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
87 td: error right */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
88 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
89 /* load line 0 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
90 ldq_u t0, 0(a1) # left_u |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
91 ldq_u t1, 8(a1) # mid |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
92 ldq_u t2, 16(a1) # right_u |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
93 ldq t3, 0(a0) # ref left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
94 ldq t4, 8(a0) # ref right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
95 addq a0, a2, a0 # pix1 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
96 addq a1, a2, a1 # pix2 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
97 /* load line 1 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
98 ldq_u t5, 0(a1) # left_u |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
99 ldq_u t6, 8(a1) # mid |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
100 ldq_u t7, 16(a1) # right_u |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
101 ldq t8, 0(a0) # ref left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
102 ldq t9, 8(a0) # ref right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
103 addq a0, a2, a0 # pix1 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
104 addq a1, a2, a1 # pix2 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
105 /* calc line 0 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
106 extql t0, a1, t0 # left lo |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
107 extqh t1, a1, ta # left hi |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
108 extql t1, a1, tb # right lo |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
109 or t0, ta, t0 # left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
110 extqh t2, a1, t2 # right hi |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
111 perr t3, t0, tc # error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
112 or t2, tb, t2 # right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
113 perr t4, t2, td # error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
114 addq v0, tc, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
115 addq v0, td, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
116 /* calc line 1 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
117 extql t5, a1, t5 # left lo |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
118 extqh t6, a1, ta # left hi |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
119 extql t6, a1, tb # right lo |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
120 or t5, ta, t5 # left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
121 extqh t7, a1, t7 # right hi |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
122 perr t8, t5, tc # error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
123 or t7, tb, t7 # right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
124 perr t9, t7, td # error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
125 addq v0, tc, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
126 addq v0, td, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
127 /* loop */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
128 subq a3, 2, a3 # h -= 2 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
129 bne a3, $unaligned |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
130 ret |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
131 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
132 .align 4 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
133 $aligned: |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
134 /* load line 0 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
135 ldq t0, 0(a1) # left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
136 ldq t1, 8(a1) # right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
137 addq a1, a2, a1 # pix2 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
138 ldq t2, 0(a0) # ref left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
139 ldq t3, 8(a0) # ref right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
140 addq a0, a2, a0 # pix1 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
141 /* load line 1 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
142 ldq t4, 0(a1) # left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
143 ldq t5, 8(a1) # right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
144 addq a1, a2, a1 # pix2 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
145 ldq t6, 0(a0) # ref left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
146 ldq t7, 8(a0) # ref right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
147 addq a0, a2, a0 # pix1 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
148 /* load line 2 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
149 ldq t8, 0(a1) # left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
150 ldq t9, 8(a1) # right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
151 addq a1, a2, a1 # pix2 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
152 ldq ta, 0(a0) # ref left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
153 ldq tb, 8(a0) # ref right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
154 addq a0, a2, a0 # pix1 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
155 /* load line 3 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
156 ldq tc, 0(a1) # left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
157 ldq td, 8(a1) # right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
158 addq a1, a2, a1 # pix2 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
159 ldq te, 0(a0) # ref left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
160 ldq tf, 8(a0) # ref right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
161 /* calc line 0 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
162 perr t0, t2, t0 # error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
163 addq a0, a2, a0 # pix1 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
164 perr t1, t3, t1 # error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
165 addq v0, t0, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
166 /* calc line 1 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
167 perr t4, t6, t0 # error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
168 addq v0, t1, v0 # add error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
169 perr t5, t7, t1 # error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
170 addq v0, t0, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
171 /* calc line 2 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
172 perr t8, ta, t0 # error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
173 addq v0, t1, v0 # add error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
174 perr t9, tb, t1 # error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
175 addq v0, t0, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
176 /* calc line 3 */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
177 perr tc, te, t0 # error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
178 addq v0, t1, v0 # add error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
179 perr td, tf, t1 # error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
180 addq v0, t0, v0 # add error left |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
181 addq v0, t1, v0 # add error right |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
182 /* loop */ |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
183 subq a3, 4, a3 # h -= 4 |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
184 bne a3, $aligned |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
185 ret |
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
diff
changeset
|
186 .end pix_abs16x16_mvi_asm |