Mercurial > libavcodec.hg
annotate alpha/dsputil_alpha_asm.S @ 12266:48d6738904a9 libavcodec
Fix SPLATB_REG mess. Used to be a if/elseif/elseif/elseif spaghetti, so this
splits it into small optimization-specific macros which are selected for each
DSP function. The advantage of this approach is that the sse4 functions now
use the ssse3 codepath also without needing an explicit sse4 codepath.
author | rbultje |
---|---|
date | Sat, 24 Jul 2010 19:33:05 +0000 |
parents | 58c2da0a371b |
children |
rev | line source |
---|---|
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
1 /* |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
2 * Alpha optimized DSP utils |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org> |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
4 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3130
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3130
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3130
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
3130 | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3130
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3130
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
3130 | 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 * Lesser General Public License for more details. | |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
16 * |
3130 | 17 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3130
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3130 | 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
20 */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
21 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
22 /* |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
23 * These functions are scheduled for pca56. They should work |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
24 * reasonably on ev6, though. |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
25 */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
26 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
27 #include "regdef.h" |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
28 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
29 /* Some nicer register names. */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
30 #define ta t10 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
31 #define tb t11 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
32 #define tc t12 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
33 #define td AT |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
34 /* Danger: these overlap with the argument list and the return value */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
35 #define te a5 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
36 #define tf a4 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
37 #define tg a3 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
38 #define th v0 |
2967 | 39 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
40 .set noat |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
41 .set noreorder |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
42 .arch pca56 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
43 .text |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
44 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
45 /************************************************************************ |
511
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
46 * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
47 * int line_size, int h) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
48 */ |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
49 .align 6 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
50 .globl put_pixels_axp_asm |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
51 .ent put_pixels_axp_asm |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
52 put_pixels_axp_asm: |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
53 .frame sp, 0, ra |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
54 .prologue 0 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
55 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
56 and a1, 7, t0 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
57 beq t0, $aligned |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
58 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
59 .align 4 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
60 $unaligned: |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
61 ldq_u t0, 0(a1) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
62 ldq_u t1, 8(a1) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
63 addq a1, a2, a1 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
64 nop |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
65 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
66 ldq_u t2, 0(a1) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
67 ldq_u t3, 8(a1) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
68 addq a1, a2, a1 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
69 nop |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
70 |
2979 | 71 ldq_u t4, 0(a1) |
511
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
72 ldq_u t5, 8(a1) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
73 addq a1, a2, a1 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
74 nop |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
75 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
76 ldq_u t6, 0(a1) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
77 ldq_u t7, 8(a1) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
78 extql t0, a1, t0 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
79 addq a1, a2, a1 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
80 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
81 extqh t1, a1, t1 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
82 addq a0, a2, t8 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
83 extql t2, a1, t2 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
84 addq t8, a2, t9 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
85 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
86 extqh t3, a1, t3 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
87 addq t9, a2, ta |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
88 extql t4, a1, t4 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
89 or t0, t1, t0 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
90 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
91 extqh t5, a1, t5 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
92 or t2, t3, t2 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
93 extql t6, a1, t6 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
94 or t4, t5, t4 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
95 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
96 extqh t7, a1, t7 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
97 or t6, t7, t6 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
98 stq t0, 0(a0) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
99 stq t2, 0(t8) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
100 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
101 stq t4, 0(t9) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
102 subq a3, 4, a3 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
103 stq t6, 0(ta) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
104 addq ta, a2, a0 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
105 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
106 bne a3, $unaligned |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
107 ret |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
108 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
109 .align 4 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
110 $aligned: |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
111 ldq t0, 0(a1) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
112 addq a1, a2, a1 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
113 ldq t1, 0(a1) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
114 addq a1, a2, a1 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
115 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
116 ldq t2, 0(a1) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
117 addq a1, a2, a1 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
118 ldq t3, 0(a1) |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
119 |
2979 | 120 addq a0, a2, t4 |
121 addq a1, a2, a1 | |
122 addq t4, a2, t5 | |
123 subq a3, 4, a3 | |
511
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
124 |
2979 | 125 stq t0, 0(a0) |
126 addq t5, a2, t6 | |
127 stq t1, 0(t4) | |
128 addq t6, a2, a0 | |
511
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
129 |
2979 | 130 stq t2, 0(t5) |
131 stq t3, 0(t6) | |
2967 | 132 |
2979 | 133 bne a3, $aligned |
511
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
134 ret |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
135 .end put_pixels_axp_asm |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
136 |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
510
diff
changeset
|
137 /************************************************************************ |
2967 | 138 * void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
139 * int line_size) |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
140 */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
141 .align 6 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
142 .globl put_pixels_clamped_mvi_asm |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
143 .ent put_pixels_clamped_mvi_asm |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
144 put_pixels_clamped_mvi_asm: |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
145 .frame sp, 0, ra |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
146 .prologue 0 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
147 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
148 lda t8, -1 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
149 lda t9, 8 # loop counter |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
150 zap t8, 0xaa, t8 # 00ff00ff00ff00ff |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
151 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
152 .align 4 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
153 1: ldq t0, 0(a0) |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
154 ldq t1, 8(a0) |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
155 ldq t2, 16(a0) |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
156 ldq t3, 24(a0) |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
157 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
158 maxsw4 t0, zero, t0 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
159 subq t9, 2, t9 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
160 maxsw4 t1, zero, t1 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
161 lda a0, 32(a0) |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
162 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
163 maxsw4 t2, zero, t2 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
164 addq a1, a2, ta |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
165 maxsw4 t3, zero, t3 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
166 minsw4 t0, t8, t0 |
2967 | 167 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
168 minsw4 t1, t8, t1 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
169 minsw4 t2, t8, t2 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
170 minsw4 t3, t8, t3 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
171 pkwb t0, t0 |
2967 | 172 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
173 pkwb t1, t1 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
174 pkwb t2, t2 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
175 pkwb t3, t3 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
176 stl t0, 0(a1) |
2967 | 177 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
178 stl t1, 4(a1) |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
179 addq ta, a2, a1 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
180 stl t2, 0(ta) |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
181 stl t3, 4(ta) |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
182 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
183 bne t9, 1b |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
184 ret |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
185 .end put_pixels_clamped_mvi_asm |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
186 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
187 /************************************************************************ |
2967 | 188 * void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
189 * int line_size) |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
190 */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
191 .align 6 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
192 .globl add_pixels_clamped_mvi_asm |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
193 .ent add_pixels_clamped_mvi_asm |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
194 add_pixels_clamped_mvi_asm: |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
195 .frame sp, 0, ra |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
196 .prologue 0 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
197 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
198 lda t1, -1 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
199 lda th, 8 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
200 zap t1, 0x33, tg |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
201 nop |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
202 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
203 srl tg, 1, t0 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
204 xor tg, t0, tg # 0x8000800080008000 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
205 zap t1, 0xaa, tf # 0x00ff00ff00ff00ff |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
206 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
207 .align 4 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
208 1: ldl t1, 0(a1) # pix0 (try to hit cache line soon) |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
209 ldl t4, 4(a1) # pix1 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
210 addq a1, a2, te # pixels += line_size |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
211 ldq t0, 0(a0) # shorts0 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
212 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
213 ldl t7, 0(te) # pix2 (try to hit cache line soon) |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
214 ldl ta, 4(te) # pix3 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
215 ldq t3, 8(a0) # shorts1 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
216 ldq t6, 16(a0) # shorts2 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
217 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
218 ldq t9, 24(a0) # shorts3 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
219 unpkbw t1, t1 # 0 0 (quarter/op no.) |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
220 and t0, tg, t2 # 0 1 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
221 unpkbw t4, t4 # 1 0 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
222 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
223 bic t0, tg, t0 # 0 2 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
224 unpkbw t7, t7 # 2 0 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
225 and t3, tg, t5 # 1 1 |
2967 | 226 addq t0, t1, t0 # 0 3 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
227 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
228 xor t0, t2, t0 # 0 4 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
229 unpkbw ta, ta # 3 0 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
230 and t6, tg, t8 # 2 1 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
231 maxsw4 t0, zero, t0 # 0 5 |
2967 | 232 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
233 bic t3, tg, t3 # 1 2 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
234 bic t6, tg, t6 # 2 2 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
235 minsw4 t0, tf, t0 # 0 6 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
236 addq t3, t4, t3 # 1 3 |
2967 | 237 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
238 pkwb t0, t0 # 0 7 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
239 xor t3, t5, t3 # 1 4 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
240 maxsw4 t3, zero, t3 # 1 5 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
241 addq t6, t7, t6 # 2 3 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
242 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
243 xor t6, t8, t6 # 2 4 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
244 and t9, tg, tb # 3 1 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
245 minsw4 t3, tf, t3 # 1 6 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
246 bic t9, tg, t9 # 3 2 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
247 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
248 maxsw4 t6, zero, t6 # 2 5 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
249 addq t9, ta, t9 # 3 3 |
2967 | 250 stl t0, 0(a1) # 0 8 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
251 minsw4 t6, tf, t6 # 2 6 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
252 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
253 xor t9, tb, t9 # 3 4 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
254 maxsw4 t9, zero, t9 # 3 5 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
255 lda a0, 32(a0) # block += 16; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
256 pkwb t3, t3 # 1 7 |
2967 | 257 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
258 minsw4 t9, tf, t9 # 3 6 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
259 subq th, 2, th |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
260 pkwb t6, t6 # 2 7 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
261 pkwb t9, t9 # 3 7 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
262 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
263 stl t3, 4(a1) # 1 8 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
264 addq te, a2, a1 # pixels += line_size |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
265 stl t6, 0(te) # 2 8 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
266 stl t9, 4(te) # 3 8 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
267 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
268 bne th, 1b |
2967 | 269 ret |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
diff
changeset
|
270 .end add_pixels_clamped_mvi_asm |