annotate alpha/dsputil_alpha.c @ 5062:2dd00b1cc94b libavcodec

Remove mdct.o and fft.o from fft-test prerequisites list. Both objects were added to the link command, resulting in multiple definitions of symbols. Now linking works in the general case when mdct.o and fft.o are compiled into libavcodec.a.
author diego
date Tue, 22 May 2007 07:08:38 +0000
parents d5ba514e3f4a
children f7cbb7733146
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
1 /*
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
2 * Alpha optimized DSP utils
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
15 * Lesser General Public License for more details.
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
16 *
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2967
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
20 */
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
21
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
22 #include "asm.h"
5010
d5ba514e3f4a Add libavcodec to compiler include flags in order to simplify header
diego
parents: 3947
diff changeset
23 #include "dsputil.h"
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
24
1333
a1cc1810d58f build error on Alpha patch by (Sam Hocevar <sam at zoy dot org>)
michaelni
parents: 1324
diff changeset
25 extern void simple_idct_axp(DCTELEM *block);
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
26 extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
27 extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
28
511
fa4425cf6b31 Assembly version of put_pixels. This is currently the function that
mellum
parents: 509
diff changeset
29 void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
30 int line_size, int h);
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
31 void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
32 int line_size);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
33 void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
34 int line_size);
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
35 void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
36 int line_size);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
37 void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
38 int line_size);
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
39
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
40 void get_pixels_mvi(DCTELEM *restrict block,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
41 const uint8_t *restrict pixels, int line_size);
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
42 void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
43 int stride);
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
44 int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
705
107a56aa74f5 Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents: 670
diff changeset
45 int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size);
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
46 int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
47 int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
48 int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
49
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
50 #if 0
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
51 /* These functions were the base for the optimized assembler routines,
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
52 and remain here for documentation purposes. */
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
53 static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
54 int line_size)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
55 {
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
56 int i = 8;
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
57 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
505
7a976bf93394 Ugly hack to make the assembler accept MVI instructions.
mellum
parents: 429
diff changeset
58
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
59 do {
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
60 uint64_t shorts0, shorts1;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
61
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
62 shorts0 = ldq(block);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
63 shorts0 = maxsw4(shorts0, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
64 shorts0 = minsw4(shorts0, clampmask);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
65 stl(pkwb(shorts0), pixels);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
66
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
67 shorts1 = ldq(block + 4);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
68 shorts1 = maxsw4(shorts1, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
69 shorts1 = minsw4(shorts1, clampmask);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
70 stl(pkwb(shorts1), pixels + 4);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
71
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
72 pixels += line_size;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
73 block += 8;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
74 } while (--i);
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
75 }
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
76
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
77 void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
78 int line_size)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
79 {
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
80 int h = 8;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
81 /* Keep this function a leaf function by generating the constants
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
82 manually (mainly for the hack value ;-). */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
83 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
84 uint64_t signmask = zap(-1, 0x33);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
85 signmask ^= signmask >> 1; /* 0x8000800080008000 */
505
7a976bf93394 Ugly hack to make the assembler accept MVI instructions.
mellum
parents: 429
diff changeset
86
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
87 do {
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
88 uint64_t shorts0, pix0, signs0;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
89 uint64_t shorts1, pix1, signs1;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
90
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
91 shorts0 = ldq(block);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
92 shorts1 = ldq(block + 4);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
93
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
94 pix0 = unpkbw(ldl(pixels));
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
95 /* Signed subword add (MMX paddw). */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
96 signs0 = shorts0 & signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
97 shorts0 &= ~signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
98 shorts0 += pix0;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
99 shorts0 ^= signs0;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
100 /* Clamp. */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
101 shorts0 = maxsw4(shorts0, 0);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
102 shorts0 = minsw4(shorts0, clampmask);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
103
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
104 /* Next 4. */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
105 pix1 = unpkbw(ldl(pixels + 4));
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
106 signs1 = shorts1 & signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
107 shorts1 &= ~signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
108 shorts1 += pix1;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
109 shorts1 ^= signs1;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
110 shorts1 = maxsw4(shorts1, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
111 shorts1 = minsw4(shorts1, clampmask);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
112
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
113 stl(pkwb(shorts0), pixels);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
114 stl(pkwb(shorts1), pixels + 4);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
115
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
116 pixels += line_size;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
117 block += 8;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
118 } while (--h);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
119 }
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
120 #endif
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
121
518
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
122 static void clear_blocks_axp(DCTELEM *blocks) {
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
123 uint64_t *p = (uint64_t *) blocks;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
124 int n = sizeof(DCTELEM) * 6 * 64;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
125
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
126 do {
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
127 p[0] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
128 p[1] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
129 p[2] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
130 p[3] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
131 p[4] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
132 p[5] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
133 p[6] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
134 p[7] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
135 p += 8;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
136 n -= 8 * 8;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
137 } while (n);
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
138 }
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
139
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
140 static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
141 {
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
142 return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
143 }
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
144
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
145 static inline uint64_t avg2(uint64_t a, uint64_t b)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
146 {
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
147 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
148 }
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
149
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
150 #if 0
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
151 /* The XY2 routines basically utilize this scheme, but reuse parts in
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
152 each iteration. */
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
153 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
154 {
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
155 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
156 + ((l2 & ~BYTE_VEC(0x03)) >> 2)
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
157 + ((l3 & ~BYTE_VEC(0x03)) >> 2)
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
158 + ((l4 & ~BYTE_VEC(0x03)) >> 2);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
159 uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
160 + (l2 & BYTE_VEC(0x03))
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
161 + (l3 & BYTE_VEC(0x03))
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
162 + (l4 & BYTE_VEC(0x03))
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
163 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
164 return r1 + r2;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
165 }
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
166 #endif
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
167
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
168 #define OP(LOAD, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
169 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
170 STORE(LOAD(pixels), block); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
171 pixels += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
172 block += line_size; \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
173 } while (--h)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
174
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
175 #define OP_X2(LOAD, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
176 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
177 uint64_t pix1, pix2; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
178 \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
179 pix1 = LOAD(pixels); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
180 pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
181 STORE(AVG2(pix1, pix2), block); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
182 pixels += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
183 block += line_size; \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
184 } while (--h)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
185
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
186 #define OP_Y2(LOAD, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
187 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
188 uint64_t pix = LOAD(pixels); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
189 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
190 uint64_t next_pix; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
191 \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
192 pixels += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
193 next_pix = LOAD(pixels); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
194 STORE(AVG2(pix, next_pix), block); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
195 block += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
196 pix = next_pix; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
197 } while (--h); \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
198 } while (0)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
199
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
200 #define OP_XY2(LOAD, STORE) \
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
201 do { \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
202 uint64_t pix1 = LOAD(pixels); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
203 uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
204 uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
205 + (pix2 & BYTE_VEC(0x03)); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
206 uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
207 + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
208 \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
209 do { \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
210 uint64_t npix1, npix2; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
211 uint64_t npix_l, npix_h; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
212 uint64_t avg; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
213 \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
214 pixels += line_size; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
215 npix1 = LOAD(pixels); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
216 npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
217 npix_l = (npix1 & BYTE_VEC(0x03)) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
218 + (npix2 & BYTE_VEC(0x03)); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
219 npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
220 + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
221 avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
222 + pix_h + npix_h; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
223 STORE(avg, block); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
224 \
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
225 block += line_size; \
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
226 pix_l = npix_l; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
227 pix_h = npix_h; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
228 } while (--h); \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
229 } while (0)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
230
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
231 #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
232 static void OPNAME ## _pixels ## SUFF ## _axp \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
233 (uint8_t *restrict block, const uint8_t *restrict pixels, \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
234 int line_size, int h) \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
235 { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
236 if ((size_t) pixels & 0x7) { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
237 OPKIND(uldq, STORE); \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
238 } else { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
239 OPKIND(ldq, STORE); \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
240 } \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
241 } \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
242 \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
243 static void OPNAME ## _pixels16 ## SUFF ## _axp \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
244 (uint8_t *restrict block, const uint8_t *restrict pixels, \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
245 int line_size, int h) \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
246 { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
247 OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
248 OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
249 }
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
250
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
251 #define PIXOP(OPNAME, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
252 MAKE_OP(OPNAME, , OP, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
253 MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
254 MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
255 MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
256
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
257 /* Rounding primitives. */
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
258 #define AVG2 avg2
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
259 #define AVG4 avg4
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
260 #define AVG4_ROUNDER BYTE_VEC(0x02)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
261 #define STORE(l, b) stq(l, b)
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
262 PIXOP(put, STORE);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
263
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
264 #undef STORE
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
265 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
266 PIXOP(avg, STORE);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
267
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
268 /* Not rounding primitives. */
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
269 #undef AVG2
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
270 #undef AVG4
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
271 #undef AVG4_ROUNDER
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
272 #undef STORE
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
273 #define AVG2 avg2_no_rnd
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
274 #define AVG4 avg4_no_rnd
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
275 #define AVG4_ROUNDER BYTE_VEC(0x01)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
276 #define STORE(l, b) stq(l, b)
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
277 PIXOP(put_no_rnd, STORE);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
278
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
279 #undef STORE
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
280 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
281 PIXOP(avg_no_rnd, STORE);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
282
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
283 void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
284 int line_size, int h)
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
285 {
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
286 put_pixels_axp_asm(block, pixels, line_size, h);
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
287 put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
288 }
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
289
986
be3ffaaf5a6d Implement sad8x8 and sad16x16 with pix_abs.
mellum
parents: 897
diff changeset
290 static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride)
be3ffaaf5a6d Implement sad8x8 and sad16x16 with pix_abs.
mellum
parents: 897
diff changeset
291 {
be3ffaaf5a6d Implement sad8x8 and sad16x16 with pix_abs.
mellum
parents: 897
diff changeset
292 return pix_abs16x16_mvi_asm(a, b, stride);
be3ffaaf5a6d Implement sad8x8 and sad16x16 with pix_abs.
mellum
parents: 897
diff changeset
293 }
be3ffaaf5a6d Implement sad8x8 and sad16x16 with pix_abs.
mellum
parents: 897
diff changeset
294
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
295 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
296 {
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
297 c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
298 c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
299 c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
300 c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
301
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
302 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
303 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
304 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
305 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
306
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
307 c->avg_pixels_tab[0][0] = avg_pixels16_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
308 c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
309 c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
310 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
311
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
312 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
313 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
314 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
315 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
316
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
317 c->put_pixels_tab[1][0] = put_pixels_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
318 c->put_pixels_tab[1][1] = put_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
319 c->put_pixels_tab[1][2] = put_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
320 c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
321
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
322 c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
323 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
324 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
325 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
326
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
327 c->avg_pixels_tab[1][0] = avg_pixels_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
328 c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
329 c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
330 c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
331
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
332 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
333 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
334 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
335 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp;
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
336
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
337 c->clear_blocks = clear_blocks_axp;
518
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
338
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
339 /* amask clears all bits that correspond to present features. */
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
340 if (amask(AMASK_MVI) == 0) {
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
341 c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
342 c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
343
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
344 c->get_pixels = get_pixels_mvi;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
345 c->diff_pixels = diff_pixels_mvi;
986
be3ffaaf5a6d Implement sad8x8 and sad16x16 with pix_abs.
mellum
parents: 897
diff changeset
346 c->sad[0] = sad16x16_mvi;
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
347 c->sad[1] = pix_abs8x8_mvi;
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
348 // c->pix_abs[0][0] = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
349 c->pix_abs[0][0] = sad16x16_mvi;
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
350 c->pix_abs[1][0] = pix_abs8x8_mvi;
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
351 c->pix_abs[0][1] = pix_abs16x16_x2_mvi;
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
352 c->pix_abs[0][2] = pix_abs16x16_y2_mvi;
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
353 c->pix_abs[0][3] = pix_abs16x16_xy2_mvi;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
354 }
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
355
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
356 put_pixels_clamped_axp_p = c->put_pixels_clamped;
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
357 add_pixels_clamped_axp_p = c->add_pixels_clamped;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
358
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
359 c->idct_put = simple_idct_put_axp;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
360 c->idct_add = simple_idct_add_axp;
1333
a1cc1810d58f build error on Alpha patch by (Sam Hocevar <sam at zoy dot org>)
michaelni
parents: 1324
diff changeset
361 c->idct = simple_idct_axp;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
362 }