annotate alpha/dsputil_alpha.c @ 905:2b93dc762f9a libavcodec

fixing illegal 3. esc bug (the mpeg4 std only requires encoders to use unescaped symbols but not esc1 or esc2 if they are shorter than esc3, andjust beause its logical to use the shortest possible vlc doesnt mean encoders do that)
author michaelni
date Wed, 04 Dec 2002 11:47:24 +0000
parents 3dbbdc2f8bd3
children be3ffaaf5a6d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
1 /*
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
2 * Alpha optimized DSP utils
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
4 *
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
5 * This library is free software; you can redistribute it and/or
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
6 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
7 * License as published by the Free Software Foundation; either
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
8 * version 2 of the License, or (at your option) any later version.
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
9 *
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
10 * This library is distributed in the hope that it will be useful,
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
13 * Lesser General Public License for more details.
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
14 *
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
15 * You should have received a copy of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
16 * License along with this library; if not, write to the Free Software
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
18 */
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
19
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
20 #include "asm.h"
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
21 #include "../dsputil.h"
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
22
511
fa4425cf6b31 Assembly version of put_pixels. This is currently the function that
mellum
parents: 509
diff changeset
23 void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
24 int line_size, int h);
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
25 void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
26 int line_size);
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
27 void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
28 int line_size);
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
29 void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
30 int line_size);
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
31 void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
32 int line_size);
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
33
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
34 void get_pixels_mvi(DCTELEM *restrict block,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
35 const uint8_t *restrict pixels, int line_size);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
36 void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
37 int stride);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
38 int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents: 670
diff changeset
39 int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size);
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
40 int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
41 int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
42 int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
43
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
44 #if 0
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
45 /* These functions were the base for the optimized assembler routines,
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
46 and remain here for documentation purposes. */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
47 static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
48 int line_size)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
49 {
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
50 int i = 8;
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
51 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
505
7a976bf93394 Ugly hack to make the assembler accept MVI instructions.
mellum
parents: 429
diff changeset
52
7a976bf93394 Ugly hack to make the assembler accept MVI instructions.
mellum
parents: 429
diff changeset
53 ASM_ACCEPT_MVI;
7a976bf93394 Ugly hack to make the assembler accept MVI instructions.
mellum
parents: 429
diff changeset
54
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
55 do {
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
56 uint64_t shorts0, shorts1;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
57
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
58 shorts0 = ldq(block);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
59 shorts0 = maxsw4(shorts0, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
60 shorts0 = minsw4(shorts0, clampmask);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
61 stl(pkwb(shorts0), pixels);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
62
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
63 shorts1 = ldq(block + 4);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
64 shorts1 = maxsw4(shorts1, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
65 shorts1 = minsw4(shorts1, clampmask);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
66 stl(pkwb(shorts1), pixels + 4);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
67
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
68 pixels += line_size;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
69 block += 8;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
70 } while (--i);
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
71 }
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
72
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
73 void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
74 int line_size)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
75 {
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
76 int h = 8;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
77 /* Keep this function a leaf function by generating the constants
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
78 manually (mainly for the hack value ;-). */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
79 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
80 uint64_t signmask = zap(-1, 0x33);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
81 signmask ^= signmask >> 1; /* 0x8000800080008000 */
505
7a976bf93394 Ugly hack to make the assembler accept MVI instructions.
mellum
parents: 429
diff changeset
82
7a976bf93394 Ugly hack to make the assembler accept MVI instructions.
mellum
parents: 429
diff changeset
83 ASM_ACCEPT_MVI;
7a976bf93394 Ugly hack to make the assembler accept MVI instructions.
mellum
parents: 429
diff changeset
84
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
85 do {
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
86 uint64_t shorts0, pix0, signs0;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
87 uint64_t shorts1, pix1, signs1;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
88
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
89 shorts0 = ldq(block);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
90 shorts1 = ldq(block + 4);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
91
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
92 pix0 = unpkbw(ldl(pixels));
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
93 /* Signed subword add (MMX paddw). */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
94 signs0 = shorts0 & signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
95 shorts0 &= ~signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
96 shorts0 += pix0;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
97 shorts0 ^= signs0;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
98 /* Clamp. */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
99 shorts0 = maxsw4(shorts0, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
100 shorts0 = minsw4(shorts0, clampmask);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
101
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
102 /* Next 4. */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
103 pix1 = unpkbw(ldl(pixels + 4));
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
104 signs1 = shorts1 & signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
105 shorts1 &= ~signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
106 shorts1 += pix1;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
107 shorts1 ^= signs1;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
108 shorts1 = maxsw4(shorts1, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
109 shorts1 = minsw4(shorts1, clampmask);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
110
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
111 stl(pkwb(shorts0), pixels);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
112 stl(pkwb(shorts1), pixels + 4);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
113
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
114 pixels += line_size;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
115 block += 8;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
116 } while (--h);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
117 }
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
118 #endif
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
119
518
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
120 static void clear_blocks_axp(DCTELEM *blocks) {
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
121 uint64_t *p = (uint64_t *) blocks;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
122 int n = sizeof(DCTELEM) * 6 * 64;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
123
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
124 do {
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
125 p[0] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
126 p[1] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
127 p[2] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
128 p[3] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
129 p[4] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
130 p[5] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
131 p[6] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
132 p[7] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
133 p += 8;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
134 n -= 8 * 8;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
135 } while (n);
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
136 }
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
137
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
138 static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
139 {
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
140 return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
141 }
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
142
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
143 static inline uint64_t avg2(uint64_t a, uint64_t b)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
144 {
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
145 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
146 }
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
147
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
148 #if 0
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
149 /* The XY2 routines basically utilize this scheme, but reuse parts in
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
150 each iteration. */
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
151 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
152 {
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
153 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
154 + ((l2 & ~BYTE_VEC(0x03)) >> 2)
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
155 + ((l3 & ~BYTE_VEC(0x03)) >> 2)
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
156 + ((l4 & ~BYTE_VEC(0x03)) >> 2);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
157 uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
158 + (l2 & BYTE_VEC(0x03))
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
159 + (l3 & BYTE_VEC(0x03))
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
160 + (l4 & BYTE_VEC(0x03))
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
161 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
162 return r1 + r2;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
163 }
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
164 #endif
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
165
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
166 #define OP(LOAD, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
167 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
168 STORE(LOAD(pixels), block); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
169 pixels += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
170 block += line_size; \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
171 } while (--h)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
172
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
173 #define OP_X2(LOAD, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
174 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
175 uint64_t pix1, pix2; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
176 \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
177 pix1 = LOAD(pixels); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
178 pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
179 STORE(AVG2(pix1, pix2), block); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
180 pixels += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
181 block += line_size; \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
182 } while (--h)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
183
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
184 #define OP_Y2(LOAD, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
185 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
186 uint64_t pix = LOAD(pixels); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
187 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
188 uint64_t next_pix; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
189 \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
190 pixels += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
191 next_pix = LOAD(pixels); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
192 STORE(AVG2(pix, next_pix), block); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
193 block += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
194 pix = next_pix; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
195 } while (--h); \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
196 } while (0)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
197
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
198 #define OP_XY2(LOAD, STORE) \
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
199 do { \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
200 uint64_t pix1 = LOAD(pixels); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
201 uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
202 uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
203 + (pix2 & BYTE_VEC(0x03)); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
204 uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
205 + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
206 \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
207 do { \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
208 uint64_t npix1, npix2; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
209 uint64_t npix_l, npix_h; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
210 uint64_t avg; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
211 \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
212 pixels += line_size; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
213 npix1 = LOAD(pixels); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
214 npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
215 npix_l = (npix1 & BYTE_VEC(0x03)) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
216 + (npix2 & BYTE_VEC(0x03)); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
217 npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
218 + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
219 avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
220 + pix_h + npix_h; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
221 STORE(avg, block); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
222 \
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
223 block += line_size; \
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
224 pix_l = npix_l; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
225 pix_h = npix_h; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
226 } while (--h); \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
227 } while (0)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
228
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
229 #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
230 static void OPNAME ## _pixels ## SUFF ## _axp \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
231 (uint8_t *restrict block, const uint8_t *restrict pixels, \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
232 int line_size, int h) \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
233 { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
234 if ((size_t) pixels & 0x7) { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
235 OPKIND(uldq, STORE); \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
236 } else { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
237 OPKIND(ldq, STORE); \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
238 } \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
239 } \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
240 \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
241 static void OPNAME ## _pixels16 ## SUFF ## _axp \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
242 (uint8_t *restrict block, const uint8_t *restrict pixels, \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
243 int line_size, int h) \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
244 { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
245 OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
246 OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
247 }
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
248
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
249 #define PIXOP(OPNAME, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
250 MAKE_OP(OPNAME, , OP, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
251 MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
252 MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
253 MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
254
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
255 /* Rounding primitives. */
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
256 #define AVG2 avg2
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
257 #define AVG4 avg4
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
258 #define AVG4_ROUNDER BYTE_VEC(0x02)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
259 #define STORE(l, b) stq(l, b)
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
260 PIXOP(put, STORE);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
261
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
262 #undef STORE
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
263 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
264 PIXOP(avg, STORE);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
265
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
266 /* Not rounding primitives. */
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
267 #undef AVG2
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
268 #undef AVG4
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
269 #undef AVG4_ROUNDER
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
270 #undef STORE
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
271 #define AVG2 avg2_no_rnd
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
272 #define AVG4 avg4_no_rnd
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
273 #define AVG4_ROUNDER BYTE_VEC(0x01)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
274 #define STORE(l, b) stq(l, b)
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
275 PIXOP(put_no_rnd, STORE);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
276
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
277 #undef STORE
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
278 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
279 PIXOP(avg_no_rnd, STORE);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
280
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
281 void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
282 int line_size, int h)
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
283 {
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
284 put_pixels_axp_asm(block, pixels, line_size, h);
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
285 put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
286 }
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
287
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
288 void dsputil_init_alpha(DSPContext* c, unsigned mask)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
289 {
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
290 c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
291 c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
292 c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
293 c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
294
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
295 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
296 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
297 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
298 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
299
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
300 c->avg_pixels_tab[0][0] = avg_pixels16_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
301 c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
302 c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
303 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
304
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
305 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
306 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
307 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
308 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
309
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
310 c->put_pixels_tab[1][0] = put_pixels_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
311 c->put_pixels_tab[1][1] = put_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
312 c->put_pixels_tab[1][2] = put_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
313 c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
314
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
315 c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
316 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
317 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
318 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
319
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
320 c->avg_pixels_tab[1][0] = avg_pixels_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
321 c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
322 c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
323 c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
324
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
325 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
326 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
327 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
328 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp;
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
329
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
330 c->clear_blocks = clear_blocks_axp;
518
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
331
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
332 /* amask clears all bits that correspond to present features. */
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
333 if (amask(AMASK_MVI) == 0) {
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
334 c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
335 c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
336
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
337 c->get_pixels = get_pixels_mvi;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
338 c->diff_pixels = diff_pixels_mvi;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
339 c->pix_abs8x8 = pix_abs8x8_mvi;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
340 c->pix_abs16x16 = pix_abs16x16_mvi_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
341 c->pix_abs16x16_x2 = pix_abs16x16_x2_mvi;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
342 c->pix_abs16x16_y2 = pix_abs16x16_y2_mvi;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
343 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mvi;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
344 }
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
345
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
346 put_pixels_clamped_axp_p = c->put_pixels_clamped;
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
347 add_pixels_clamped_axp_p = c->add_pixels_clamped;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
348 }