annotate alpha/dsputil_alpha.c @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 9e7d38743146
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
1 /*
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
2 * Alpha optimized DSP utils
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
15 * Lesser General Public License for more details.
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
16 *
429
718a22dc121f license/copyright change
glantau
parents: 214
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2967
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
20 */
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
21
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 5010
diff changeset
22 #include "libavcodec/dsputil.h"
11396
9e7d38743146 Alpha: move dsputil prototypes to a header file
mru
parents: 8625
diff changeset
23 #include "dsputil_alpha.h"
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
24 #include "asm.h"
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
25
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
26 void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
27 int line_size);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
28 void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
29 int line_size);
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
30
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
31 #if 0
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
32 /* These functions were the base for the optimized assembler routines,
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
33 and remain here for documentation purposes. */
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
34 static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
35 int line_size)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
36 {
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
37 int i = 8;
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
38 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
505
7a976bf93394 Ugly hack to make the assembler accept MVI instructions.
mellum
parents: 429
diff changeset
39
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
40 do {
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
41 uint64_t shorts0, shorts1;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
42
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
43 shorts0 = ldq(block);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
44 shorts0 = maxsw4(shorts0, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
45 shorts0 = minsw4(shorts0, clampmask);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
46 stl(pkwb(shorts0), pixels);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
47
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
48 shorts1 = ldq(block + 4);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
49 shorts1 = maxsw4(shorts1, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
50 shorts1 = minsw4(shorts1, clampmask);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
51 stl(pkwb(shorts1), pixels + 4);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
52
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
53 pixels += line_size;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
54 block += 8;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
55 } while (--i);
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
56 }
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
57
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
58 void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
59 int line_size)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
60 {
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
61 int h = 8;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
62 /* Keep this function a leaf function by generating the constants
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
63 manually (mainly for the hack value ;-). */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
64 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
65 uint64_t signmask = zap(-1, 0x33);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
66 signmask ^= signmask >> 1; /* 0x8000800080008000 */
505
7a976bf93394 Ugly hack to make the assembler accept MVI instructions.
mellum
parents: 429
diff changeset
67
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
68 do {
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
69 uint64_t shorts0, pix0, signs0;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
70 uint64_t shorts1, pix1, signs1;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
71
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
72 shorts0 = ldq(block);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
73 shorts1 = ldq(block + 4);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
74
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
75 pix0 = unpkbw(ldl(pixels));
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
76 /* Signed subword add (MMX paddw). */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
77 signs0 = shorts0 & signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
78 shorts0 &= ~signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
79 shorts0 += pix0;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
80 shorts0 ^= signs0;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
81 /* Clamp. */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
82 shorts0 = maxsw4(shorts0, 0);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
83 shorts0 = minsw4(shorts0, clampmask);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
84
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
85 /* Next 4. */
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
86 pix1 = unpkbw(ldl(pixels + 4));
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
87 signs1 = shorts1 & signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
88 shorts1 &= ~signmask;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
89 shorts1 += pix1;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
90 shorts1 ^= signs1;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
91 shorts1 = maxsw4(shorts1, 0);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
92 shorts1 = minsw4(shorts1, clampmask);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
93
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
94 stl(pkwb(shorts0), pixels);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
95 stl(pkwb(shorts1), pixels + 4);
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
96
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
97 pixels += line_size;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
98 block += 8;
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
99 } while (--h);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
100 }
509
cab79946302f Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents: 505
diff changeset
101 #endif
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
102
518
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
103 static void clear_blocks_axp(DCTELEM *blocks) {
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
104 uint64_t *p = (uint64_t *) blocks;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
105 int n = sizeof(DCTELEM) * 6 * 64;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
106
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
107 do {
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
108 p[0] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
109 p[1] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
110 p[2] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
111 p[3] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
112 p[4] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
113 p[5] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
114 p[6] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
115 p[7] = 0;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
116 p += 8;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
117 n -= 8 * 8;
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
118 } while (n);
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
119 }
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
120
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
121 static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
122 {
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
123 return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
124 }
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
125
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
126 static inline uint64_t avg2(uint64_t a, uint64_t b)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
127 {
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
128 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
129 }
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
130
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
131 #if 0
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
132 /* The XY2 routines basically utilize this scheme, but reuse parts in
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
133 each iteration. */
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
134 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
135 {
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
136 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
137 + ((l2 & ~BYTE_VEC(0x03)) >> 2)
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
138 + ((l3 & ~BYTE_VEC(0x03)) >> 2)
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
139 + ((l4 & ~BYTE_VEC(0x03)) >> 2);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
140 uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
141 + (l2 & BYTE_VEC(0x03))
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
142 + (l3 & BYTE_VEC(0x03))
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
143 + (l4 & BYTE_VEC(0x03))
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
144 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
145 return r1 + r2;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
146 }
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
147 #endif
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
148
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
149 #define OP(LOAD, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
150 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
151 STORE(LOAD(pixels), block); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
152 pixels += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
153 block += line_size; \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
154 } while (--h)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
155
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
156 #define OP_X2(LOAD, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
157 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
158 uint64_t pix1, pix2; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
159 \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
160 pix1 = LOAD(pixels); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
161 pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
162 STORE(AVG2(pix1, pix2), block); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
163 pixels += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
164 block += line_size; \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
165 } while (--h)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
166
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
167 #define OP_Y2(LOAD, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
168 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
169 uint64_t pix = LOAD(pixels); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
170 do { \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
171 uint64_t next_pix; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
172 \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
173 pixels += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
174 next_pix = LOAD(pixels); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
175 STORE(AVG2(pix, next_pix), block); \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
176 block += line_size; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
177 pix = next_pix; \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
178 } while (--h); \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
179 } while (0)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
180
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
181 #define OP_XY2(LOAD, STORE) \
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
182 do { \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
183 uint64_t pix1 = LOAD(pixels); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
184 uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
185 uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
186 + (pix2 & BYTE_VEC(0x03)); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
187 uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
188 + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
189 \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
190 do { \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
191 uint64_t npix1, npix2; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
192 uint64_t npix_l, npix_h; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
193 uint64_t avg; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
194 \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
195 pixels += line_size; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
196 npix1 = LOAD(pixels); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
197 npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
198 npix_l = (npix1 & BYTE_VEC(0x03)) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
199 + (npix2 & BYTE_VEC(0x03)); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
200 npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
201 + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
202 avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
203 + pix_h + npix_h; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
204 STORE(avg, block); \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
205 \
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
206 block += line_size; \
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
207 pix_l = npix_l; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
208 pix_h = npix_h; \
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
209 } while (--h); \
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
210 } while (0)
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
211
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
212 #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
213 static void OPNAME ## _pixels ## SUFF ## _axp \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
214 (uint8_t *restrict block, const uint8_t *restrict pixels, \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
215 int line_size, int h) \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
216 { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
217 if ((size_t) pixels & 0x7) { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
218 OPKIND(uldq, STORE); \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
219 } else { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
220 OPKIND(ldq, STORE); \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
221 } \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
222 } \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
223 \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
224 static void OPNAME ## _pixels16 ## SUFF ## _axp \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
225 (uint8_t *restrict block, const uint8_t *restrict pixels, \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
226 int line_size, int h) \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
227 { \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
228 OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
229 OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
230 }
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
231
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
232 #define PIXOP(OPNAME, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
233 MAKE_OP(OPNAME, , OP, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
234 MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
235 MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
236 MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
237
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
238 /* Rounding primitives. */
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
239 #define AVG2 avg2
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
240 #define AVG4 avg4
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
241 #define AVG4_ROUNDER BYTE_VEC(0x02)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
242 #define STORE(l, b) stq(l, b)
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
243 PIXOP(put, STORE);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
244
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
245 #undef STORE
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
246 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
247 PIXOP(avg, STORE);
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
248
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
249 /* Not rounding primitives. */
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
250 #undef AVG2
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
251 #undef AVG4
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
252 #undef AVG4_ROUNDER
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
253 #undef STORE
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
254 #define AVG2 avg2_no_rnd
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
255 #define AVG4 avg4_no_rnd
546
8cefba09f2e8 * Improve xy2 routines slightly
mellum
parents: 518
diff changeset
256 #define AVG4_ROUNDER BYTE_VEC(0x01)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
257 #define STORE(l, b) stq(l, b)
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
258 PIXOP(put_no_rnd, STORE);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
259
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
260 #undef STORE
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
261 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
548
3f05be811b5a Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents: 546
diff changeset
262 PIXOP(avg_no_rnd, STORE);
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
263
11396
9e7d38743146 Alpha: move dsputil prototypes to a header file
mru
parents: 8625
diff changeset
264 static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
9e7d38743146 Alpha: move dsputil prototypes to a header file
mru
parents: 8625
diff changeset
265 int line_size, int h)
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
266 {
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
267 put_pixels_axp_asm(block, pixels, line_size, h);
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
268 put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
269 }
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
270
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 986
diff changeset
271 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
272 {
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
273 c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
274 c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
275 c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
276 c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
277
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
278 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
279 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
280 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
281 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
282
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
283 c->avg_pixels_tab[0][0] = avg_pixels16_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
284 c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
285 c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
286 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
287
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
288 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
289 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
290 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
291 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_axp;
670
340e3ba84119 Synthesize pixels16 functions from pixels functions.
mellum
parents: 663
diff changeset
292
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
293 c->put_pixels_tab[1][0] = put_pixels_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
294 c->put_pixels_tab[1][1] = put_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
295 c->put_pixels_tab[1][2] = put_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
296 c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
297
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
298 c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
299 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
300 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
301 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
302
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
303 c->avg_pixels_tab[1][0] = avg_pixels_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
304 c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
305 c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
306 c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
307
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
308 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
309 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels_x2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
310 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels_y2_axp;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
311 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp;
513
fb670ca9f8eb Use updated motion compensation routines.
mellum
parents: 511
diff changeset
312
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
313 c->clear_blocks = clear_blocks_axp;
518
70113647b50d Implement clear_blocks_axp.
mellum
parents: 513
diff changeset
314
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
315 /* amask clears all bits that correspond to present features. */
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
316 if (amask(AMASK_MVI) == 0) {
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
317 c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
318 c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
586
54b1c94977d5 MVI optimizations for motion estimation.
mellum
parents: 548
diff changeset
319
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
320 c->get_pixels = get_pixels_mvi;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 744
diff changeset
321 c->diff_pixels = diff_pixels_mvi;
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8621
diff changeset
322 c->sad[0] = pix_abs16x16_mvi_asm;
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
323 c->sad[1] = pix_abs8x8_mvi;
8625
6f1b210e58d1 Alpha: fix pix_abs16
mru
parents: 8621
diff changeset
324 c->pix_abs[0][0] = pix_abs16x16_mvi_asm;
1708
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
325 c->pix_abs[1][0] = pix_abs8x8_mvi;
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
326 c->pix_abs[0][1] = pix_abs16x16_x2_mvi;
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
327 c->pix_abs[0][2] = pix_abs16x16_y2_mvi;
dea5b2946999 interlaced motion estimation
michael
parents: 1465
diff changeset
328 c->pix_abs[0][3] = pix_abs16x16_xy2_mvi;
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
329 }
897
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
330
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
331 put_pixels_clamped_axp_p = c->put_pixels_clamped;
3dbbdc2f8bd3 Kludge around compilation failure on Alpha.
mellum
parents: 856
diff changeset
332 add_pixels_clamped_axp_p = c->add_pixels_clamped;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1708
diff changeset
333
8619
ce9b3d6a0abf Alpha: proper IDCT selection
mru
parents: 8250
diff changeset
334 if (!avctx->lowres &&
ce9b3d6a0abf Alpha: proper IDCT selection
mru
parents: 8250
diff changeset
335 (avctx->idct_algo == FF_IDCT_AUTO ||
ce9b3d6a0abf Alpha: proper IDCT selection
mru
parents: 8250
diff changeset
336 avctx->idct_algo == FF_IDCT_SIMPLEALPHA)) {
8621
72e109759617 Alpha: add ff_ prefix to idct functions
mru
parents: 8620
diff changeset
337 c->idct_put = ff_simple_idct_put_axp;
72e109759617 Alpha: add ff_ prefix to idct functions
mru
parents: 8620
diff changeset
338 c->idct_add = ff_simple_idct_add_axp;
72e109759617 Alpha: add ff_ prefix to idct functions
mru
parents: 8620
diff changeset
339 c->idct = ff_simple_idct_axp;
8619
ce9b3d6a0abf Alpha: proper IDCT selection
mru
parents: 8250
diff changeset
340 }
214
73df666cacc7 Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff changeset
341 }