Mercurial > libavcodec.hg
annotate alpha/dsputil_alpha.c @ 513:fb670ca9f8eb libavcodec
Use updated motion compensation routines.
author | mellum |
---|---|
date | Wed, 03 Jul 2002 01:09:44 +0000 |
parents | fa4425cf6b31 |
children | 70113647b50d |
rev | line source |
---|---|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
1 /* |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
2 * Alpha optimized DSP utils |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org> |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
4 * |
429 | 5 * This library is free software; you can redistribute it and/or |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
9 * |
429 | 10 * This library is distributed in the hope that it will be useful, |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 * Lesser General Public License for more details. | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
14 * |
429 | 15 * You should have received a copy of the GNU Lesser General Public |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
18 */ |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
19 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
20 #include "asm.h" |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
21 #include "../dsputil.h" |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
22 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
23 void simple_idct_axp(DCTELEM *block); |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
24 |
511
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
509
diff
changeset
|
25 void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, |
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
509
diff
changeset
|
26 int line_size, int h); |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
27 void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
28 int line_size); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
29 void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
30 int line_size); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
31 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
32 #if 0 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
33 /* These functions were the base for the optimized assembler routines, |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
34 and remain here for documentation purposes. */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
35 static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
36 int line_size) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
37 { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
38 int i = 8; |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
39 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ |
505
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
40 |
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
41 ASM_ACCEPT_MVI; |
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
42 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
43 do { |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
44 uint64_t shorts0, shorts1; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
45 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
46 shorts0 = ldq(block); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
47 shorts0 = maxsw4(shorts0, 0); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
48 shorts0 = minsw4(shorts0, clampmask); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
49 stl(pkwb(shorts0), pixels); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
50 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
51 shorts1 = ldq(block + 4); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
52 shorts1 = maxsw4(shorts1, 0); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
53 shorts1 = minsw4(shorts1, clampmask); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
54 stl(pkwb(shorts1), pixels + 4); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
55 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
56 pixels += line_size; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
57 block += 8; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
58 } while (--i); |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
59 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
60 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
61 void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
62 int line_size) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
63 { |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
64 int h = 8; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
65 /* Keep this function a leaf function by generating the constants |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
66 manually (mainly for the hack value ;-). */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
67 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
68 uint64_t signmask = zap(-1, 0x33); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
69 signmask ^= signmask >> 1; /* 0x8000800080008000 */ |
505
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
70 |
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
71 ASM_ACCEPT_MVI; |
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
72 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
73 do { |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
74 uint64_t shorts0, pix0, signs0; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
75 uint64_t shorts1, pix1, signs1; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
76 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
77 shorts0 = ldq(block); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
78 shorts1 = ldq(block + 4); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
79 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
80 pix0 = unpkbw(ldl(pixels)); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
81 /* Signed subword add (MMX paddw). */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
82 signs0 = shorts0 & signmask; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
83 shorts0 &= ~signmask; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
84 shorts0 += pix0; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
85 shorts0 ^= signs0; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
86 /* Clamp. */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
87 shorts0 = maxsw4(shorts0, 0); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
88 shorts0 = minsw4(shorts0, clampmask); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
89 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
90 /* Next 4. */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
91 pix1 = unpkbw(ldl(pixels + 4)); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
92 signs1 = shorts1 & signmask; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
93 shorts1 &= ~signmask; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
94 shorts1 += pix1; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
95 shorts1 ^= signs1; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
96 shorts1 = maxsw4(shorts1, 0); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
97 shorts1 = minsw4(shorts1, clampmask); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
98 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
99 stl(pkwb(shorts0), pixels); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
100 stl(pkwb(shorts1), pixels + 4); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
101 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
102 pixels += line_size; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
103 block += 8; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
104 } while (--h); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
105 } |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
106 #endif |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
107 |
513 | 108 static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
109 { |
513 | 110 return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); |
111 } | |
112 | |
113 static inline uint64_t avg2(uint64_t a, uint64_t b) | |
114 { | |
115 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
116 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
117 |
513 | 118 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
119 { |
513 | 120 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) |
121 + ((l2 & ~BYTE_VEC(0x03)) >> 2) | |
122 + ((l3 & ~BYTE_VEC(0x03)) >> 2) | |
123 + ((l4 & ~BYTE_VEC(0x03)) >> 2); | |
124 uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) | |
125 + (l2 & BYTE_VEC(0x03)) | |
126 + (l3 & BYTE_VEC(0x03)) | |
127 + (l4 & BYTE_VEC(0x03)) | |
128 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); | |
129 return r1 + r2; | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
130 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
131 |
513 | 132 static inline uint64_t avg4_no_rnd(uint64_t l1, uint64_t l2, |
133 uint64_t l3, uint64_t l4) | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
134 { |
513 | 135 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) |
136 + ((l2 & ~BYTE_VEC(0x03)) >> 2) | |
137 + ((l3 & ~BYTE_VEC(0x03)) >> 2) | |
138 + ((l4 & ~BYTE_VEC(0x03)) >> 2); | |
139 uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) | |
140 + (l2 & BYTE_VEC(0x03)) | |
141 + (l3 & BYTE_VEC(0x03)) | |
142 + (l4 & BYTE_VEC(0x03)) | |
143 + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03); | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
144 return r1 + r2; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
145 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
146 |
513 | 147 #define OP(LOAD, STORE, INCR) \ |
148 do { \ | |
149 STORE(LOAD(pixels), block); \ | |
150 pixels += line_size; \ | |
151 block += INCR; \ | |
152 } while (--h) | |
153 | |
154 #define OP_X2(LOAD, STORE, INCR) \ | |
155 do { \ | |
156 uint64_t pix1, pix2; \ | |
157 \ | |
158 pix1 = LOAD(pixels); \ | |
159 pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |
160 STORE(AVG2(pix1, pix2), block); \ | |
161 pixels += line_size; \ | |
162 block += INCR; \ | |
163 } while (--h) | |
164 | |
165 #define OP_Y2(LOAD, STORE, INCR) \ | |
166 do { \ | |
167 uint64_t pix = LOAD(pixels); \ | |
168 do { \ | |
169 uint64_t next_pix; \ | |
170 \ | |
171 pixels += line_size; \ | |
172 next_pix = LOAD(pixels); \ | |
173 STORE(AVG2(pix, next_pix), block); \ | |
174 block += INCR; \ | |
175 pix = next_pix; \ | |
176 } while (--h); \ | |
177 } while (0) | |
178 | |
179 #define OP_XY2(LOAD, STORE, INCR) \ | |
180 do { \ | |
181 uint64_t pix1 = LOAD(pixels); \ | |
182 uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |
183 \ | |
184 do { \ | |
185 uint64_t next_pix1, next_pix2; \ | |
186 \ | |
187 pixels += line_size; \ | |
188 next_pix1 = LOAD(pixels); \ | |
189 next_pix2 = next_pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |
190 \ | |
191 STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block); \ | |
192 \ | |
193 block += INCR; \ | |
194 pix1 = next_pix1; \ | |
195 pix2 = next_pix2; \ | |
196 } while (--h); \ | |
197 } while (0) | |
198 | |
199 #define MAKE_OP(BTYPE, OPNAME, SUFF, OPKIND, STORE, INCR) \ | |
200 static void OPNAME ## _pixels ## SUFF ## _axp(BTYPE *block, \ | |
201 const uint8_t *pixels, \ | |
202 int line_size, int h) \ | |
203 { \ | |
204 if ((size_t) pixels & 0x7) { \ | |
205 OPKIND(uldq, STORE, INCR); \ | |
206 } else { \ | |
207 OPKIND(ldq, STORE, INCR); \ | |
208 } \ | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
209 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
210 |
513 | 211 #define PIXOP(BTYPE, OPNAME, STORE, INCR) \ |
212 MAKE_OP(BTYPE, OPNAME, , OP, STORE, INCR); \ | |
213 MAKE_OP(BTYPE, OPNAME, _x2, OP_X2, STORE, INCR); \ | |
214 MAKE_OP(BTYPE, OPNAME, _y2, OP_Y2, STORE, INCR); \ | |
215 MAKE_OP(BTYPE, OPNAME, _xy2, OP_XY2, STORE, INCR); | |
216 | |
217 /* Rounding primitives. */ | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
218 #define AVG2 avg2 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
219 #define AVG4 avg4 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
220 #define STORE(l, b) stq(l, b) |
513 | 221 PIXOP(uint8_t, put, STORE, line_size); |
222 | |
223 #undef STORE | |
224 #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | |
225 PIXOP(uint8_t, avg, STORE, line_size); | |
226 | |
227 /* Not rounding primitives. */ | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
228 #undef AVG2 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
229 #undef AVG4 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
230 #undef STORE |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
231 #define AVG2 avg2_no_rnd |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
232 #define AVG4 avg4_no_rnd |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
233 #define STORE(l, b) stq(l, b) |
513 | 234 PIXOP(uint8_t, put_no_rnd, STORE, line_size); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
235 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
236 #undef STORE |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
237 #define STORE(l, b) stq(AVG2(l, ldq(b)), b); |
513 | 238 PIXOP(uint8_t, avg_no_rnd, STORE, line_size); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
239 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
240 void dsputil_init_alpha(void) |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
241 { |
511
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
509
diff
changeset
|
242 put_pixels_tab[0] = put_pixels_axp_asm; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
243 put_pixels_tab[1] = put_pixels_x2_axp; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
244 put_pixels_tab[2] = put_pixels_y2_axp; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
245 put_pixels_tab[3] = put_pixels_xy2_axp; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
246 |
511
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
509
diff
changeset
|
247 put_no_rnd_pixels_tab[0] = put_pixels_axp_asm; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
248 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_axp; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
249 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
250 put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
251 |
513 | 252 avg_pixels_tab[0] = avg_pixels_axp; |
253 avg_pixels_tab[1] = avg_pixels_x2_axp; | |
254 avg_pixels_tab[2] = avg_pixels_y2_axp; | |
255 avg_pixels_tab[3] = avg_pixels_xy2_axp; | |
256 | |
257 avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_axp; | |
258 avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_axp; | |
259 avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_axp; | |
260 avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_axp; | |
261 | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
262 /* amask clears all bits that correspond to present features. */ |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
263 if (amask(AMASK_MVI) == 0) { |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
264 put_pixels_clamped = put_pixels_clamped_mvi_asm; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
265 add_pixels_clamped = add_pixels_clamped_mvi_asm; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
266 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
267 } |