annotate alpha/dsputil_alpha.c @ 1708:dea5b2946999 libavcodec

interlaced motion estimation interlaced mpeg2 encoding P & B frames rate distored interlaced mb decission alternate scantable support 4mv encoding fixes (thats also why the regression tests change) passing height to most dsp functions interlaced mpeg4 encoding (no direct mode MBs yet) various related cleanups disabled old motion estimaton algorithms (log, full, ...) they will either be fixed or removed
author michael
date Tue, 30 Dec 2003 16:07:57 +0000
parents 52254c2f9cae
children ef2149182f1c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
1 /*
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
2 * Alpha optimized DSP utils
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
4 *
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
5 * This library is free software; you can redistribute it and/or
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
6 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
7 * License as published by the Free Software Foundation; either
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
8 * version 2 of the License, or (at your option) any later version.
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
9 *
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
10 * This library is distributed in the hope that it will be useful,
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
13 * Lesser General Public License for more details.
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
14 *
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
15 * You should have received a copy of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
16 * License along with this library; if not, write to the Free Software
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
18 */
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
19
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
20 #include "asm.h"
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
21 #include "../dsputil.h"
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
22
1333
a1cc1810d58f build error on Alpha patch by (Sam Hocevar <sam at zoy dot org>)
michaelni
parents: 1324
diff changeset
23 extern void simple_idct_axp(DCTELEM *block);
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
24 extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
25 extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
26
511
fa4425cf6b31 Assembly version of put_pixels. This is currently the function that
mellum
parents: 509
diff changeset
27 void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
28 int line_size, int h);
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
29 void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
30 int line_size);
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
31 void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
32 int line_size);
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
33 void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
34 int line_size);
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
35 void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
36 int line_size);
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
37
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
38 void get_pixels_mvi(DCTELEM *restrict block,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
39 const uint8_t *restrict pixels, int line_size);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
40 void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
41 int stride);
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
42 int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents: 670
diff changeset
43 int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size);
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
44 int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
45 int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
46 int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
47
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
48 #if 0
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
49 /* These functions were the base for the optimized assembler routines,
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
50 and remain here for documentation purposes. */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
51 static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
52 int line_size)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
53 {
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
54 int i = 8;
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
55 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
505
7a976bf93394 Ugly hack to make the assembler accept MVI instructions.
mellum
parents: 429
diff changeset
56
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
57 do {
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
58 uint64_t shorts0, shorts1;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
59
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
60 shorts0 = ldq(block);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
61 shorts0 = maxsw4(shorts0, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
62 shorts0 = minsw4(shorts0, clampmask);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
63 stl(pkwb(shorts0), pixels);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
64
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
65 shorts1 = ldq(block + 4);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
66 shorts1 = maxsw4(shorts1, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
67 shorts1 = minsw4(shorts1, clampmask);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
68 stl(pkwb(shorts1), pixels + 4);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
69
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
70 pixels += line_size;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
71 block += 8;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
72 } while (--i);
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
73 }
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
74
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
75 void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
76 int line_size)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
77 {
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
78 int h = 8;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
79 /* Keep this function a leaf function by generating the constants
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
80 manually (mainly for the hack value ;-). */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
81 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
82 uint64_t signmask = zap(-1, 0x33);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
83 signmask ^= signmask >> 1; /* 0x8000800080008000 */
505
7a976bf93394 Ugly hack to make the assembler accept MVI instructions.
mellum
parents: 429
diff changeset
84
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
85 do {
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
86 uint64_t shorts0, pix0, signs0;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
87 uint64_t shorts1, pix1, signs1;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
88
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
89 shorts0 = ldq(block);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
90 shorts1 = ldq(block + 4);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
91
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
92 pix0 = unpkbw(ldl(pixels));
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
93 /* Signed subword add (MMX paddw). */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
94 signs0 = shorts0 & signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
95 shorts0 &= ~signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
96 shorts0 += pix0;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
97 shorts0 ^= signs0;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
98 /* Clamp. */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
99 shorts0 = maxsw4(shorts0, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
100 shorts0 = minsw4(shorts0, clampmask);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
101
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
102 /* Next 4. */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
103 pix1 = unpkbw(ldl(pixels + 4));
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
104 signs1 = shorts1 & signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
105 shorts1 &= ~signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
106 shorts1 += pix1;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
107 shorts1 ^= signs1;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
108 shorts1 = maxsw4(shorts1, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
109 shorts1 = minsw4(shorts1, clampmask);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
110
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
111 stl(pkwb(shorts0), pixels);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
112 stl(pkwb(shorts1), pixels + 4);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
113
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
114 pixels += line_size;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
115 block += 8;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
116 } while (--h);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
117 }
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
118 #endif
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
119
518
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
120 static void clear_blocks_axp(DCTELEM *blocks) {
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
121 uint64_t *p = (uint64_t *) blocks;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
122 int n = sizeof(DCTELEM) * 6 * 64;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
123
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
124 do {
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
125 p[0] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
126 p[1] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
127 p[2] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
128 p[3] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
129 p[4] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
130 p[5] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
131 p[6] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
132 p[7] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
133 p += 8;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
134 n -= 8 * 8;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
135 } while (n);
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
136 }
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
137
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
138 static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
139 {
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
140 return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
141 }
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
142
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
143 static inline uint64_t avg2(uint64_t a, uint64_t b)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
144 {
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
145 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
146 }
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
147
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
148 #if 0
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
149 /* The XY2 routines basically utilize this scheme, but reuse parts in
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
150 each iteration. */
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
151 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
152 {
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
153 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
154 + ((l2 & ~BYTE_VEC(0x03)) >> 2)
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
155 + ((l3 & ~BYTE_VEC(0x03)) >> 2)
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
156 + ((l4 & ~BYTE_VEC(0x03)) >> 2);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
157 uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
158 + (l2 & BYTE_VEC(0x03))
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
159 + (l3 & BYTE_VEC(0x03))
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
160 + (l4 & BYTE_VEC(0x03))
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
161 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
162 return r1 + r2;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
163 }
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
164 #endif
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
165
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
166 #define OP(LOAD, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
167 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
168 STORE(LOAD(pixels), block); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
169 pixels += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
170 block += line_size; \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
171 } while (--h)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
172
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
173 #define OP_X2(LOAD, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
174 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
175 uint64_t pix1, pix2; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
176 \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
177 pix1 = LOAD(pixels); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
178 pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
179 STORE(AVG2(pix1, pix2), block); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
180 pixels += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
181 block += line_size; \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
182 } while (--h)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
183
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
184 #define OP_Y2(LOAD, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
185 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
186 uint64_t pix = LOAD(pixels); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
187 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
188 uint64_t next_pix; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
189 \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
190 pixels += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
191 next_pix = LOAD(pixels); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
192 STORE(AVG2(pix, next_pix), block); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
193 block += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
194 pix = next_pix; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
195 } while (--h); \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
196 } while (0)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
197
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
198 #define OP_XY2(LOAD, STORE) \
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
199 do { \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
200 uint64_t pix1 = LOAD(pixels); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
201 uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
202 uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
203 + (pix2 & BYTE_VEC(0x03)); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
204 uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
205 + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
206 \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
207 do { \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
208 uint64_t npix1, npix2; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
209 uint64_t npix_l, npix_h; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
210 uint64_t avg; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
211 \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
212 pixels += line_size; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
213 npix1 = LOAD(pixels); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
214 npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
215 npix_l = (npix1 & BYTE_VEC(0x03)) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
216 + (npix2 & BYTE_VEC(0x03)); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
217 npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
218 + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
219 avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
220 + pix_h + npix_h; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
221 STORE(avg, block); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
222 \
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
223 block += line_size; \
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
224 pix_l = npix_l; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
225 pix_h = npix_h; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
226 } while (--h); \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
227 } while (0)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
228
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
229 #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
230 static void OPNAME ## _pixels ## SUFF ## _axp \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
231 (uint8_t *restrict block, const uint8_t *restrict pixels, \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
232 int line_size, int h) \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
233 { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
234 if ((size_t) pixels & 0x7) { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
235 OPKIND(uldq, STORE); \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
236 } else { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
237 OPKIND(ldq, STORE); \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
238 } \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
239 } \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
240 \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
241 static void OPNAME ## _pixels16 ## SUFF ## _axp \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
242 (uint8_t *restrict block, const uint8_t *restrict pixels, \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
243 int line_size, int h) \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
244 { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
245 OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
246 OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
247 }
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
248
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
249 #define PIXOP(OPNAME, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
250 MAKE_OP(OPNAME, , OP, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
251 MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
252 MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
253 MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
254
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
255 /* Rounding primitives. */
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
256 #define AVG2 avg2
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
257 #define AVG4 avg4
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
258 #define AVG4_ROUNDER BYTE_VEC(0x02)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
259 #define STORE(l, b) stq(l, b)
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
260 PIXOP(put, STORE);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
261
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
262 #undef STORE
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
263 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
264 PIXOP(avg, STORE);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
265
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
266 /* Not rounding primitives. */
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
267 #undef AVG2
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
268 #undef AVG4
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
269 #undef AVG4_ROUNDER
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
270 #undef STORE
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
271 #define AVG2 avg2_no_rnd
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
272 #define AVG4 avg4_no_rnd
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
273 #define AVG4_ROUNDER BYTE_VEC(0x01)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
274 #define STORE(l, b) stq(l, b)
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
275 PIXOP(put_no_rnd, STORE);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
276
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
277 #undef STORE
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
278 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
279 PIXOP(avg_no_rnd, STORE);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
280
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
281 void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
282 int line_size, int h)
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
283 {
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
284 put_pixels_axp_asm(block, pixels, line_size, h);
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
285 put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
286 }
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
287
986
be3ffaaf5a6d Implement sad8x8 and sad16x16 with pix_abs.
mellum
parents: 897
diff changeset
288 static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride)
be3ffaaf5a6d Implement sad8x8 and sad16x16 with pix_abs.
mellum
parents: 897
diff changeset
289 {
be3ffaaf5a6d Implement sad8x8 and sad16x16 with pix_abs.
mellum
parents: 897
diff changeset
290 return pix_abs16x16_mvi_asm(a, b, stride);
be3ffaaf5a6d Implement sad8x8 and sad16x16 with pix_abs.
mellum
parents: 897
diff changeset
291 }
be3ffaaf5a6d Implement sad8x8 and sad16x16 with pix_abs.
mellum
parents: 897
diff changeset
292
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
293 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
294 {
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
295 c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
296 c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
297 c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
298 c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
299
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
300 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
301 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
302 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
303 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
304
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
305 c->avg_pixels_tab[0][0] = avg_pixels16_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
306 c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
307 c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
308 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
309
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
310 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
311 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
312 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
313 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
314
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
315 c->put_pixels_tab[1][0] = put_pixels_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
316 c->put_pixels_tab[1][1] = put_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
317 c->put_pixels_tab[1][2] = put_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
318 c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
319
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
320 c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
321 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
322 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
323 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
324
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
325 c->avg_pixels_tab[1][0] = avg_pixels_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
326 c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
327 c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
328 c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
329
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
330 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
331 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
332 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
333 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp;
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
334
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
335 c->clear_blocks = clear_blocks_axp;
518
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
336
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
337 /* amask clears all bits that correspond to present features. */
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
338 if (amask(AMASK_MVI) == 0) {
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
339 c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
340 c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
341
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
342 c->get_pixels = get_pixels_mvi;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
343 c->diff_pixels = diff_pixels_mvi;
986
be3ffaaf5a6d Implement sad8x8 and sad16x16 with pix_abs.
mellum
parents: 897
diff changeset
344 c->sad[0] = sad16x16_mvi;
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
345 c->sad[1] = pix_abs8x8_mvi;
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
346 // c->pix_abs[0][0] = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
347 c->pix_abs[0][0] = sad16x16_mvi;
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
348 c->pix_abs[1][0] = pix_abs8x8_mvi;
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
349 c->pix_abs[0][1] = pix_abs16x16_x2_mvi;
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
350 c->pix_abs[0][2] = pix_abs16x16_y2_mvi;
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
351 c->pix_abs[0][3] = pix_abs16x16_xy2_mvi;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
352 }
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
353
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
354 put_pixels_clamped_axp_p = c->put_pixels_clamped;
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
355 add_pixels_clamped_axp_p = c->add_pixels_clamped;
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
356
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
357 c->idct_put = simple_idct_put_axp;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
358 c->idct_add = simple_idct_add_axp;
1333
a1cc1810d58f build error on Alpha patch by (Sam Hocevar <sam at zoy dot org>)
michaelni
parents: 1324
diff changeset
359 c->idct = simple_idct_axp;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
360 }