annotate mlib/dsputil_mlib.c @ 2262:7a1c3178d759 libavcodec

optimizing 4x4 idct
author michael
date Sun, 26 Sep 2004 16:33:39 +0000
parents c9b2debe7b2b
children ef2149182f1c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
1 /*
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
2 * Sun mediaLib optimized DSP utils
429
718a22dc121f license/copyright change
glantau
parents: 90
diff changeset
3 * Copyright (c) 2001 Fabrice Bellard.
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
4 *
429
718a22dc121f license/copyright change
glantau
parents: 90
diff changeset
5 * This library is free software; you can redistribute it and/or
718a22dc121f license/copyright change
glantau
parents: 90
diff changeset
6 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 90
diff changeset
7 * License as published by the Free Software Foundation; either
718a22dc121f license/copyright change
glantau
parents: 90
diff changeset
8 * version 2 of the License, or (at your option) any later version.
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
9 *
429
718a22dc121f license/copyright change
glantau
parents: 90
diff changeset
10 * This library is distributed in the hope that it will be useful,
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429
718a22dc121f license/copyright change
glantau
parents: 90
diff changeset
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 90
diff changeset
13 * Lesser General Public License for more details.
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
14 *
429
718a22dc121f license/copyright change
glantau
parents: 90
diff changeset
15 * You should have received a copy of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 90
diff changeset
16 * License along with this library; if not, write to the Free Software
718a22dc121f license/copyright change
glantau
parents: 90
diff changeset
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
18 */
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
19
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
20 #include "../dsputil.h"
628
f596db4aa871 sun solaris compilation bugfix, patch by (Martin Olschewski <olschewski at zpr dot uni-koeln dot de>)
michaelni
parents: 429
diff changeset
21 #include "../mpegvideo.h"
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
22
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
23 #include <mlib_types.h>
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
24 #include <mlib_status.h>
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
25 #include <mlib_sys.h>
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
26 #include <mlib_algebra.h>
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
27 #include <mlib_video.h>
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
28
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
29 /* misc */
735
b4bf95260ffe 16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents: 730
diff changeset
30
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
31 static void get_pixels_mlib(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
735
b4bf95260ffe 16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents: 730
diff changeset
32 {
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
33 int i;
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
34
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
35 for (i=0;i<8;i++) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
36 mlib_VectorConvert_S16_U8_Mod((mlib_s16 *)block, (mlib_u8 *)pixels, 8);
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
37
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
38 pixels += line_size;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
39 block += 8;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
40 }
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
41 }
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
42
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
43 static void diff_pixels_mlib(DCTELEM *restrict block, const uint8_t *s1, const uint8_t *s2, int line_size)
735
b4bf95260ffe 16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents: 730
diff changeset
44 {
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
45 int i;
735
b4bf95260ffe 16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents: 730
diff changeset
46
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
47 for (i=0;i<8;i++) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
48 mlib_VectorSub_S16_U8_Mod((mlib_s16 *)block, (mlib_u8 *)s1, (mlib_u8 *)s2, 8);
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
49
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
50 s1 += line_size;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
51 s2 += line_size;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
52 block += 8;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
53 }
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
54 }
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
55
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 929
diff changeset
56 static void add_pixels_clamped_mlib(const DCTELEM *block, uint8_t *pixels, int line_size)
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
57 {
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
58 mlib_VideoAddBlock_U8_S16(pixels, (mlib_s16 *)block, line_size);
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
59 }
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
60
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
61 /* put block, width 16 pixel, height 8/16 */
735
b4bf95260ffe 16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents: 730
diff changeset
62
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
63 static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
64 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
65 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
66 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
67 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
68 mlib_VideoCopyRef_U8_U8_16x8(dest, (uint8_t *)ref, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
69 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
70
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
71 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
72 mlib_VideoCopyRef_U8_U8_16x16(dest, (uint8_t *)ref, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
73 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
74
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
75 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
76 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
77 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
78 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
79
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
80 static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
81 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
82 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
83 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
84 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
85 mlib_VideoInterpX_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
86 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
87
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
88 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
89 mlib_VideoInterpX_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
90 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
91
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
92 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
93 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
94 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
95 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
96
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
97 static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
98 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
99 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
100 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
101 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
102 mlib_VideoInterpY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
103 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
104
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
105 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
106 mlib_VideoInterpY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
107 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
108
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
109 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
110 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
111 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
112 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
113
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
114 static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
115 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
116 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
117 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
118 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
119 mlib_VideoInterpXY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
120 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
121
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
122 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
123 mlib_VideoInterpXY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
124 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
125
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
126 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
127 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
128 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
129 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
130
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
131 /* put block, width 8 pixel, height 4/8/16 */
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
132
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
133 static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
134 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
135 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
136 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
137 case 4:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
138 mlib_VideoCopyRef_U8_U8_8x4(dest, (uint8_t *)ref, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
139 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
140
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
141 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
142 mlib_VideoCopyRef_U8_U8_8x8(dest, (uint8_t *)ref, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
143 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
144
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
145 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
146 mlib_VideoCopyRef_U8_U8_8x16(dest, (uint8_t *)ref, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
147 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
148
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
149 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
150 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
151 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
152 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
153
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
154 static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
155 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
156 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
157 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
158 case 4:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
159 mlib_VideoInterpX_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
160 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
161
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
162 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
163 mlib_VideoInterpX_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
164 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
165
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
166 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
167 mlib_VideoInterpX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
168 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
169
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
170 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
171 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
172 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
173 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
174
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
175 static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
176 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
177 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
178 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
179 case 4:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
180 mlib_VideoInterpY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
181 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
182
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
183 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
184 mlib_VideoInterpY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
185 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
186
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
187 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
188 mlib_VideoInterpY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
189 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
190
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
191 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
192 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
193 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
194 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
195
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
196 static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
197 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
198 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
199 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
200 case 4:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
201 mlib_VideoInterpXY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
202 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
203
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
204 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
205 mlib_VideoInterpXY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
206 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
207
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
208 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
209 mlib_VideoInterpXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
210 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
211
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
212 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
213 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
214 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
215 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
216
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
217 /* average block, width 16 pixel, height 8/16 */
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
218
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
219 static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
220 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
221 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
222 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
223 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
224 mlib_VideoCopyRefAve_U8_U8_16x8(dest, (uint8_t *)ref, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
225 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
226
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
227 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
228 mlib_VideoCopyRefAve_U8_U8_16x16(dest, (uint8_t *)ref, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
229 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
230
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
231 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
232 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
233 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
234 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
235
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
236 static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
237 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
238 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
239 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
240 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
241 mlib_VideoInterpAveX_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
242 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
243
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
244 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
245 mlib_VideoInterpAveX_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
246 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
247
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
248 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
249 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
250 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
251 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
252
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
253 static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
254 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
255 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
256 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
257 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
258 mlib_VideoInterpAveY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
259 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
260
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
261 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
262 mlib_VideoInterpAveY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
263 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
264
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
265 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
266 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
267 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
268 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
269
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
270 static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
271 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
272 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
273 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
274 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
275 mlib_VideoInterpAveXY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
276 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
277
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
278 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
279 mlib_VideoInterpAveXY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
280 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
281
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
282 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
283 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
284 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
285 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
286
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
287 /* average block, width 8 pixel, height 4/8/16 */
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
288
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
289 static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
290 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
291 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
292 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
293 case 4:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
294 mlib_VideoCopyRefAve_U8_U8_8x4(dest, (uint8_t *)ref, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
295 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
296
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
297 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
298 mlib_VideoCopyRefAve_U8_U8_8x8(dest, (uint8_t *)ref, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
299 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
300
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
301 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
302 mlib_VideoCopyRefAve_U8_U8_8x16(dest, (uint8_t *)ref, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
303 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
304
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
305 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
306 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
307 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
308 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
309
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
310 static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
311 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
312 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
313 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
314 case 4:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
315 mlib_VideoInterpAveX_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
316 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
317
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
318 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
319 mlib_VideoInterpAveX_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
320 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
321
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
322 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
323 mlib_VideoInterpAveX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
324 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
325
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
326 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
327 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
328 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
329 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
330
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
331 static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
332 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
333 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
334 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
335 case 4:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
336 mlib_VideoInterpAveY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
337 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
338
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
339 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
340 mlib_VideoInterpAveY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
341 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
342
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
343 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
344 mlib_VideoInterpAveY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
345 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
346
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
347 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
348 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
349 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
350 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
351
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
352 static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
353 int stride, int height)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
354 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
355 switch (height) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
356 case 4:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
357 mlib_VideoInterpAveXY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
358 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
359
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
360 case 8:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
361 mlib_VideoInterpAveXY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
362 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
363
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
364 case 16:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
365 mlib_VideoInterpAveXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
366 break;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
367
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
368 default:
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
369 assert(0);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
370 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
371 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
372
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
373 /* swap byte order of a buffer */
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
374
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
375 static void bswap_buf_mlib(uint32_t *dst, uint32_t *src, int w)
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
376 {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
377 mlib_VectorReverseByteOrder_U32_U32(dst, src, w);
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
378 }
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
379
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
380 /* transformations */
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
381
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 929
diff changeset
382 static void ff_idct_put_mlib(uint8_t *dest, int line_size, DCTELEM *data)
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
383 {
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
384 int i;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
385 uint8_t *cm = cropTbl + MAX_NEG_CROP;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
386
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
387 mlib_VideoIDCT8x8_S16_S16 (data, data);
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
388
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
389 for(i=0;i<8;i++) {
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
390 dest[0] = cm[data[0]];
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
391 dest[1] = cm[data[1]];
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
392 dest[2] = cm[data[2]];
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
393 dest[3] = cm[data[3]];
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
394 dest[4] = cm[data[4]];
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
395 dest[5] = cm[data[5]];
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
396 dest[6] = cm[data[6]];
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
397 dest[7] = cm[data[7]];
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
398
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
399 dest += line_size;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
400 data += 8;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
401 }
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
402 }
735
b4bf95260ffe 16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents: 730
diff changeset
403
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 929
diff changeset
404 static void ff_idct_add_mlib(uint8_t *dest, int line_size, DCTELEM *data)
730
b24ea84b23e2 trying to fix the non-x86 IDCTs (untested)
michaelni
parents: 682
diff changeset
405 {
b24ea84b23e2 trying to fix the non-x86 IDCTs (untested)
michaelni
parents: 682
diff changeset
406 mlib_VideoIDCT8x8_S16_S16 (data, data);
929
c35f06d95e64 mlib fix patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents: 856
diff changeset
407 mlib_VideoAddBlock_U8_S16(dest, (mlib_s16 *)data, line_size);
730
b24ea84b23e2 trying to fix the non-x86 IDCTs (untested)
michaelni
parents: 682
diff changeset
408 }
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
409
1889
c9b2debe7b2b update the API for the idct function
melanson
parents: 1515
diff changeset
410 static void ff_idct_mlib(DCTELEM *data)
1324
7d328fd9d8a5 the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents: 1276
diff changeset
411 {
7d328fd9d8a5 the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents: 1276
diff changeset
412 mlib_VideoIDCT8x8_S16_S16 (data, data);
7d328fd9d8a5 the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents: 1276
diff changeset
413 }
7d328fd9d8a5 the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents: 1276
diff changeset
414
735
b4bf95260ffe 16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents: 730
diff changeset
415 static void ff_fdct_mlib(DCTELEM *data)
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
416 {
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
417 mlib_VideoDCT8x8_S16_S16 (data, data);
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
418 }
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
419
1276
d5719a953ee0 * compile fixes by Mitch at Bits.COM
kabi
parents: 1064
diff changeset
420 void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
421 {
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
422 c->get_pixels = get_pixels_mlib;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
423 c->diff_pixels = diff_pixels_mlib;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
424 c->add_pixels_clamped = add_pixels_clamped_mlib;
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
425
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
426 c->put_pixels_tab[0][0] = put_pixels16_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
427 c->put_pixels_tab[0][1] = put_pixels16_x2_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
428 c->put_pixels_tab[0][2] = put_pixels16_y2_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
429 c->put_pixels_tab[0][3] = put_pixels16_xy2_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
430 c->put_pixels_tab[1][0] = put_pixels8_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
431 c->put_pixels_tab[1][1] = put_pixels8_x2_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
432 c->put_pixels_tab[1][2] = put_pixels8_y2_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
433 c->put_pixels_tab[1][3] = put_pixels8_xy2_mlib;
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
434
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
435 c->avg_pixels_tab[0][0] = avg_pixels16_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
436 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
437 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
438 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
439 c->avg_pixels_tab[1][0] = avg_pixels8_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
440 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
441 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
442 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
443
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
444 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
445 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mlib;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 768
diff changeset
446
1515
e94e299aee40 optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents: 1324
diff changeset
447 c->bswap_buf = bswap_buf_mlib;
88
06f63b58d2a8 mlib merge
glantau
parents:
diff changeset
448 }
628
f596db4aa871 sun solaris compilation bugfix, patch by (Martin Olschewski <olschewski at zpr dot uni-koeln dot de>)
michaelni
parents: 429
diff changeset
449
f596db4aa871 sun solaris compilation bugfix, patch by (Martin Olschewski <olschewski at zpr dot uni-koeln dot de>)
michaelni
parents: 429
diff changeset
450 void MPV_common_init_mlib(MpegEncContext *s)
f596db4aa871 sun solaris compilation bugfix, patch by (Martin Olschewski <olschewski at zpr dot uni-koeln dot de>)
michaelni
parents: 429
diff changeset
451 {
f596db4aa871 sun solaris compilation bugfix, patch by (Martin Olschewski <olschewski at zpr dot uni-koeln dot de>)
michaelni
parents: 429
diff changeset
452 if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){
1276
d5719a953ee0 * compile fixes by Mitch at Bits.COM
kabi
parents: 1064
diff changeset
453 s->dsp.fdct = ff_fdct_mlib;
628
f596db4aa871 sun solaris compilation bugfix, patch by (Martin Olschewski <olschewski at zpr dot uni-koeln dot de>)
michaelni
parents: 429
diff changeset
454 }
730
b24ea84b23e2 trying to fix the non-x86 IDCTs (untested)
michaelni
parents: 682
diff changeset
455
735
b4bf95260ffe 16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents: 730
diff changeset
456 if(s->avctx->idct_algo==FF_IDCT_AUTO || s->avctx->idct_algo==FF_IDCT_MLIB){
1276
d5719a953ee0 * compile fixes by Mitch at Bits.COM
kabi
parents: 1064
diff changeset
457 s->dsp.idct_put= ff_idct_put_mlib;
d5719a953ee0 * compile fixes by Mitch at Bits.COM
kabi
parents: 1064
diff changeset
458 s->dsp.idct_add= ff_idct_add_mlib;
1324
7d328fd9d8a5 the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents: 1276
diff changeset
459 s->dsp.idct = ff_idct_mlib;
1276
d5719a953ee0 * compile fixes by Mitch at Bits.COM
kabi
parents: 1064
diff changeset
460 s->dsp.idct_permutation_type= FF_NO_IDCT_PERM;
730
b24ea84b23e2 trying to fix the non-x86 IDCTs (untested)
michaelni
parents: 682
diff changeset
461 }
628
f596db4aa871 sun solaris compilation bugfix, patch by (Martin Olschewski <olschewski at zpr dot uni-koeln dot de>)
michaelni
parents: 429
diff changeset
462 }