Mercurial > libavcodec.hg
annotate alpha/dsputil_alpha.c @ 905:2b93dc762f9a libavcodec
fixing illegal 3. esc bug (the mpeg4 std only requires encoders to use unescaped symbols but not esc1 or esc2 if they are shorter than esc3, andjust beause its logical to use the shortest possible vlc doesnt mean encoders do that)
author | michaelni |
---|---|
date | Wed, 04 Dec 2002 11:47:24 +0000 |
parents | 3dbbdc2f8bd3 |
children | be3ffaaf5a6d |
rev | line source |
---|---|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
1 /* |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
2 * Alpha optimized DSP utils |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org> |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
4 * |
429 | 5 * This library is free software; you can redistribute it and/or |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
9 * |
429 | 10 * This library is distributed in the hope that it will be useful, |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 * Lesser General Public License for more details. | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
14 * |
429 | 15 * You should have received a copy of the GNU Lesser General Public |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
18 */ |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
19 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
20 #include "asm.h" |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
21 #include "../dsputil.h" |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
22 |
511
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
509
diff
changeset
|
23 void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, |
897 | 24 int line_size, int h); |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
25 void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, |
897 | 26 int line_size); |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
27 void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, |
897 | 28 int line_size); |
29 void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, | |
30 int line_size); | |
31 void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, | |
32 int line_size); | |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
33 |
586 | 34 void get_pixels_mvi(DCTELEM *restrict block, |
35 const uint8_t *restrict pixels, int line_size); | |
36 void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, | |
37 int stride); | |
38 int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); | |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
670
diff
changeset
|
39 int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size); |
586 | 40 int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); |
41 int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); | |
42 int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); | |
43 | |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
44 #if 0 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
45 /* These functions were the base for the optimized assembler routines, |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
46 and remain here for documentation purposes. */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
47 static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
48 int line_size) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
49 { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
50 int i = 8; |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
51 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ |
505
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
52 |
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
53 ASM_ACCEPT_MVI; |
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
54 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
55 do { |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
56 uint64_t shorts0, shorts1; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
57 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
58 shorts0 = ldq(block); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
59 shorts0 = maxsw4(shorts0, 0); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
60 shorts0 = minsw4(shorts0, clampmask); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
61 stl(pkwb(shorts0), pixels); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
62 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
63 shorts1 = ldq(block + 4); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
64 shorts1 = maxsw4(shorts1, 0); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
65 shorts1 = minsw4(shorts1, clampmask); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
66 stl(pkwb(shorts1), pixels + 4); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
67 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
68 pixels += line_size; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
69 block += 8; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
70 } while (--i); |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
71 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
72 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
73 void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
74 int line_size) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
75 { |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
76 int h = 8; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
77 /* Keep this function a leaf function by generating the constants |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
78 manually (mainly for the hack value ;-). */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
79 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
80 uint64_t signmask = zap(-1, 0x33); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
81 signmask ^= signmask >> 1; /* 0x8000800080008000 */ |
505
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
82 |
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
83 ASM_ACCEPT_MVI; |
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
84 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
85 do { |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
86 uint64_t shorts0, pix0, signs0; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
87 uint64_t shorts1, pix1, signs1; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
88 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
89 shorts0 = ldq(block); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
90 shorts1 = ldq(block + 4); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
91 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
92 pix0 = unpkbw(ldl(pixels)); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
93 /* Signed subword add (MMX paddw). */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
94 signs0 = shorts0 & signmask; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
95 shorts0 &= ~signmask; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
96 shorts0 += pix0; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
97 shorts0 ^= signs0; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
98 /* Clamp. */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
99 shorts0 = maxsw4(shorts0, 0); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
100 shorts0 = minsw4(shorts0, clampmask); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
101 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
102 /* Next 4. */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
103 pix1 = unpkbw(ldl(pixels + 4)); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
104 signs1 = shorts1 & signmask; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
105 shorts1 &= ~signmask; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
106 shorts1 += pix1; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
107 shorts1 ^= signs1; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
108 shorts1 = maxsw4(shorts1, 0); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
109 shorts1 = minsw4(shorts1, clampmask); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
110 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
111 stl(pkwb(shorts0), pixels); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
112 stl(pkwb(shorts1), pixels + 4); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
113 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
114 pixels += line_size; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
115 block += 8; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
116 } while (--h); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
117 } |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
118 #endif |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
119 |
518 | 120 static void clear_blocks_axp(DCTELEM *blocks) { |
121 uint64_t *p = (uint64_t *) blocks; | |
122 int n = sizeof(DCTELEM) * 6 * 64; | |
123 | |
124 do { | |
125 p[0] = 0; | |
126 p[1] = 0; | |
127 p[2] = 0; | |
128 p[3] = 0; | |
129 p[4] = 0; | |
130 p[5] = 0; | |
131 p[6] = 0; | |
132 p[7] = 0; | |
133 p += 8; | |
134 n -= 8 * 8; | |
135 } while (n); | |
136 } | |
137 | |
513 | 138 static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
139 { |
513 | 140 return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); |
141 } | |
142 | |
143 static inline uint64_t avg2(uint64_t a, uint64_t b) | |
144 { | |
145 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
146 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
147 |
546 | 148 #if 0 |
149 /* The XY2 routines basically utilize this scheme, but reuse parts in | |
150 each iteration. */ | |
513 | 151 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
152 { |
513 | 153 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) |
897 | 154 + ((l2 & ~BYTE_VEC(0x03)) >> 2) |
155 + ((l3 & ~BYTE_VEC(0x03)) >> 2) | |
156 + ((l4 & ~BYTE_VEC(0x03)) >> 2); | |
513 | 157 uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) |
897 | 158 + (l2 & BYTE_VEC(0x03)) |
159 + (l3 & BYTE_VEC(0x03)) | |
160 + (l4 & BYTE_VEC(0x03)) | |
161 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); | |
513 | 162 return r1 + r2; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
163 } |
546 | 164 #endif |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
165 |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
166 #define OP(LOAD, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
167 do { \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
168 STORE(LOAD(pixels), block); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
169 pixels += line_size; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
170 block += line_size; \ |
513 | 171 } while (--h) |
172 | |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
173 #define OP_X2(LOAD, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
174 do { \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
175 uint64_t pix1, pix2; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
176 \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
177 pix1 = LOAD(pixels); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
178 pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
179 STORE(AVG2(pix1, pix2), block); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
180 pixels += line_size; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
181 block += line_size; \ |
513 | 182 } while (--h) |
183 | |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
184 #define OP_Y2(LOAD, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
185 do { \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
186 uint64_t pix = LOAD(pixels); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
187 do { \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
188 uint64_t next_pix; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
189 \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
190 pixels += line_size; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
191 next_pix = LOAD(pixels); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
192 STORE(AVG2(pix, next_pix), block); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
193 block += line_size; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
194 pix = next_pix; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
195 } while (--h); \ |
513 | 196 } while (0) |
197 | |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
198 #define OP_XY2(LOAD, STORE) \ |
546 | 199 do { \ |
200 uint64_t pix1 = LOAD(pixels); \ | |
201 uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |
202 uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \ | |
203 + (pix2 & BYTE_VEC(0x03)); \ | |
204 uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \ | |
205 + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \ | |
206 \ | |
207 do { \ | |
208 uint64_t npix1, npix2; \ | |
209 uint64_t npix_l, npix_h; \ | |
210 uint64_t avg; \ | |
211 \ | |
212 pixels += line_size; \ | |
213 npix1 = LOAD(pixels); \ | |
214 npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |
215 npix_l = (npix1 & BYTE_VEC(0x03)) \ | |
216 + (npix2 & BYTE_VEC(0x03)); \ | |
217 npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \ | |
218 + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \ | |
219 avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \ | |
220 + pix_h + npix_h; \ | |
221 STORE(avg, block); \ | |
222 \ | |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
223 block += line_size; \ |
546 | 224 pix_l = npix_l; \ |
225 pix_h = npix_h; \ | |
226 } while (--h); \ | |
513 | 227 } while (0) |
228 | |
670
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
229 #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
230 static void OPNAME ## _pixels ## SUFF ## _axp \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
231 (uint8_t *restrict block, const uint8_t *restrict pixels, \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
232 int line_size, int h) \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
233 { \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
234 if ((size_t) pixels & 0x7) { \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
235 OPKIND(uldq, STORE); \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
236 } else { \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
237 OPKIND(ldq, STORE); \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
238 } \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
239 } \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
240 \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
241 static void OPNAME ## _pixels16 ## SUFF ## _axp \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
242 (uint8_t *restrict block, const uint8_t *restrict pixels, \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
243 int line_size, int h) \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
244 { \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
245 OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
246 OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
247 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
248 |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
249 #define PIXOP(OPNAME, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
250 MAKE_OP(OPNAME, , OP, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
251 MAKE_OP(OPNAME, _x2, OP_X2, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
252 MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
253 MAKE_OP(OPNAME, _xy2, OP_XY2, STORE) |
513 | 254 |
255 /* Rounding primitives. */ | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
256 #define AVG2 avg2 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
257 #define AVG4 avg4 |
546 | 258 #define AVG4_ROUNDER BYTE_VEC(0x02) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
259 #define STORE(l, b) stq(l, b) |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
260 PIXOP(put, STORE); |
513 | 261 |
262 #undef STORE | |
263 #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
264 PIXOP(avg, STORE); |
513 | 265 |
266 /* Not rounding primitives. */ | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
267 #undef AVG2 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
268 #undef AVG4 |
546 | 269 #undef AVG4_ROUNDER |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
270 #undef STORE |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
271 #define AVG2 avg2_no_rnd |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
272 #define AVG4 avg4_no_rnd |
546 | 273 #define AVG4_ROUNDER BYTE_VEC(0x01) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
274 #define STORE(l, b) stq(l, b) |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
275 PIXOP(put_no_rnd, STORE); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
276 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
277 #undef STORE |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
278 #define STORE(l, b) stq(AVG2(l, ldq(b)), b); |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
279 PIXOP(avg_no_rnd, STORE); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
280 |
670
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
281 void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
282 int line_size, int h) |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
283 { |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
284 put_pixels_axp_asm(block, pixels, line_size, h); |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
285 put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
286 } |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
287 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
288 void dsputil_init_alpha(DSPContext* c, unsigned mask) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
289 { |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
290 c->put_pixels_tab[0][0] = put_pixels16_axp_asm; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
291 c->put_pixels_tab[0][1] = put_pixels16_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
292 c->put_pixels_tab[0][2] = put_pixels16_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
293 c->put_pixels_tab[0][3] = put_pixels16_xy2_axp; |
670
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
294 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
295 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
296 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
297 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
298 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp; |
670
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
299 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
300 c->avg_pixels_tab[0][0] = avg_pixels16_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
301 c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
302 c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
303 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp; |
670
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
304 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
305 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
306 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
307 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
308 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_axp; |
670
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
309 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
310 c->put_pixels_tab[1][0] = put_pixels_axp_asm; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
311 c->put_pixels_tab[1][1] = put_pixels_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
312 c->put_pixels_tab[1][2] = put_pixels_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
313 c->put_pixels_tab[1][3] = put_pixels_xy2_axp; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
314 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
315 c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
316 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
317 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
318 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
319 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
320 c->avg_pixels_tab[1][0] = avg_pixels_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
321 c->avg_pixels_tab[1][1] = avg_pixels_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
322 c->avg_pixels_tab[1][2] = avg_pixels_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
323 c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp; |
513 | 324 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
325 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
326 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
327 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
328 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp; |
513 | 329 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
330 c->clear_blocks = clear_blocks_axp; |
518 | 331 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
332 /* amask clears all bits that correspond to present features. */ |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
333 if (amask(AMASK_MVI) == 0) { |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
334 c->put_pixels_clamped = put_pixels_clamped_mvi_asm; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
335 c->add_pixels_clamped = add_pixels_clamped_mvi_asm; |
586 | 336 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
337 c->get_pixels = get_pixels_mvi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
338 c->diff_pixels = diff_pixels_mvi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
339 c->pix_abs8x8 = pix_abs8x8_mvi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
340 c->pix_abs16x16 = pix_abs16x16_mvi_asm; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
341 c->pix_abs16x16_x2 = pix_abs16x16_x2_mvi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
342 c->pix_abs16x16_y2 = pix_abs16x16_y2_mvi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
343 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mvi; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
344 } |
897 | 345 |
346 put_pixels_clamped_axp_p = c->put_pixels_clamped; | |
347 add_pixels_clamped_axp_p = c->add_pixels_clamped; | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
348 } |