Mercurial > libavcodec.hg
annotate mlib/dsputil_mlib.c @ 12497:c5ffa8b81f9c libavcodec
Move sse16_sse2() from inline asm to yasm. It is one of the functions causing
Win64/FATE issues.
author | rbultje |
---|---|
date | Fri, 17 Sep 2010 01:44:17 +0000 |
parents | 04423b2f6e0b |
children |
rev | line source |
---|---|
88 | 1 /* |
2 * Sun mediaLib optimized DSP utils | |
8629
04423b2f6e0b
cosmetics: Remove pointless period after copyright statement non-sentences.
diego
parents:
6763
diff
changeset
|
3 * Copyright (c) 2001 Fabrice Bellard |
88 | 4 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
429 | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
88 | 11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
88 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 * Lesser General Public License for more details. | |
88 | 16 * |
429 | 17 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
88 | 20 */ |
21 | |
6763 | 22 #include "libavcodec/dsputil.h" |
23 #include "libavcodec/mpegvideo.h" | |
88 | 24 |
25 #include <mlib_types.h> | |
26 #include <mlib_status.h> | |
27 #include <mlib_sys.h> | |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
28 #include <mlib_algebra.h> |
88 | 29 #include <mlib_video.h> |
30 | |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
31 /* misc */ |
735
b4bf95260ffe
16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents:
730
diff
changeset
|
32 |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
33 static void get_pixels_mlib(DCTELEM *restrict block, const uint8_t *pixels, int line_size) |
735
b4bf95260ffe
16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents:
730
diff
changeset
|
34 { |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
35 int i; |
88 | 36 |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
37 for (i=0;i<8;i++) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
38 mlib_VectorConvert_S16_U8_Mod((mlib_s16 *)block, (mlib_u8 *)pixels, 8); |
88 | 39 |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
40 pixels += line_size; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
41 block += 8; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
42 } |
88 | 43 } |
44 | |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
45 static void diff_pixels_mlib(DCTELEM *restrict block, const uint8_t *s1, const uint8_t *s2, int line_size) |
735
b4bf95260ffe
16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents:
730
diff
changeset
|
46 { |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
47 int i; |
735
b4bf95260ffe
16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents:
730
diff
changeset
|
48 |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
49 for (i=0;i<8;i++) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
50 mlib_VectorSub_S16_U8_Mod((mlib_s16 *)block, (mlib_u8 *)s1, (mlib_u8 *)s2, 8); |
88 | 51 |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
52 s1 += line_size; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
53 s2 += line_size; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
54 block += 8; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
55 } |
88 | 56 } |
57 | |
1064 | 58 static void add_pixels_clamped_mlib(const DCTELEM *block, uint8_t *pixels, int line_size) |
88 | 59 { |
60 mlib_VideoAddBlock_U8_S16(pixels, (mlib_s16 *)block, line_size); | |
61 } | |
62 | |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
63 /* put block, width 16 pixel, height 8/16 */ |
735
b4bf95260ffe
16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents:
730
diff
changeset
|
64 |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
65 static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref, |
2979 | 66 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
67 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
68 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
69 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
70 mlib_VideoCopyRef_U8_U8_16x8(dest, (uint8_t *)ref, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
71 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
72 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
73 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
74 mlib_VideoCopyRef_U8_U8_16x16(dest, (uint8_t *)ref, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
75 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
76 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
77 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
78 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
79 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
80 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
81 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
82 static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, |
2979 | 83 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
84 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
85 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
86 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
87 mlib_VideoInterpX_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
88 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
89 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
90 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
91 mlib_VideoInterpX_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
92 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
93 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
94 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
95 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
96 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
97 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
98 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
99 static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, |
2979 | 100 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
101 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
102 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
103 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
104 mlib_VideoInterpY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
105 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
106 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
107 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
108 mlib_VideoInterpY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
109 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
110 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
111 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
112 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
113 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
114 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
115 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
116 static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, |
2979 | 117 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
118 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
119 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
120 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
121 mlib_VideoInterpXY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
122 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
123 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
124 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
125 mlib_VideoInterpXY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
126 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
127 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
128 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
129 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
130 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
131 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
132 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
133 /* put block, width 8 pixel, height 4/8/16 */ |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
134 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
135 static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref, |
2979 | 136 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
137 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
138 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
139 case 4: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
140 mlib_VideoCopyRef_U8_U8_8x4(dest, (uint8_t *)ref, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
141 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
142 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
143 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
144 mlib_VideoCopyRef_U8_U8_8x8(dest, (uint8_t *)ref, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
145 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
146 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
147 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
148 mlib_VideoCopyRef_U8_U8_8x16(dest, (uint8_t *)ref, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
149 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
150 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
151 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
152 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
153 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
154 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
155 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
156 static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, |
2979 | 157 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
158 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
159 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
160 case 4: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
161 mlib_VideoInterpX_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
162 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
163 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
164 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
165 mlib_VideoInterpX_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
166 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
167 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
168 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
169 mlib_VideoInterpX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
170 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
171 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
172 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
173 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
174 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
175 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
176 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
177 static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, |
2979 | 178 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
179 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
180 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
181 case 4: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
182 mlib_VideoInterpY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
183 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
184 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
185 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
186 mlib_VideoInterpY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
187 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
188 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
189 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
190 mlib_VideoInterpY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
191 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
192 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
193 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
194 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
195 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
196 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
197 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
198 static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref, |
2979 | 199 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
200 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
201 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
202 case 4: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
203 mlib_VideoInterpXY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
204 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
205 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
206 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
207 mlib_VideoInterpXY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
208 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
209 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
210 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
211 mlib_VideoInterpXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
212 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
213 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
214 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
215 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
216 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
217 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
218 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
219 /* average block, width 16 pixel, height 8/16 */ |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
220 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
221 static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref, |
2979 | 222 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
223 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
224 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
225 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
226 mlib_VideoCopyRefAve_U8_U8_16x8(dest, (uint8_t *)ref, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
227 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
228 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
229 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
230 mlib_VideoCopyRefAve_U8_U8_16x16(dest, (uint8_t *)ref, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
231 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
232 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
233 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
234 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
235 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
236 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
237 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
238 static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, |
2979 | 239 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
240 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
241 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
242 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
243 mlib_VideoInterpAveX_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
244 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
245 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
246 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
247 mlib_VideoInterpAveX_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
248 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
249 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
250 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
251 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
252 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
253 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
254 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
255 static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, |
2979 | 256 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
257 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
258 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
259 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
260 mlib_VideoInterpAveY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
261 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
262 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
263 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
264 mlib_VideoInterpAveY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
265 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
266 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
267 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
268 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
269 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
270 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
271 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
272 static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, |
2979 | 273 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
274 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
275 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
276 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
277 mlib_VideoInterpAveXY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
278 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
279 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
280 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
281 mlib_VideoInterpAveXY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
282 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
283 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
284 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
285 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
286 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
287 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
288 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
289 /* average block, width 8 pixel, height 4/8/16 */ |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
290 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
291 static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref, |
2979 | 292 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
293 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
294 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
295 case 4: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
296 mlib_VideoCopyRefAve_U8_U8_8x4(dest, (uint8_t *)ref, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
297 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
298 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
299 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
300 mlib_VideoCopyRefAve_U8_U8_8x8(dest, (uint8_t *)ref, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
301 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
302 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
303 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
304 mlib_VideoCopyRefAve_U8_U8_8x16(dest, (uint8_t *)ref, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
305 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
306 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
307 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
308 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
309 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
310 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
311 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
312 static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, |
2979 | 313 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
314 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
315 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
316 case 4: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
317 mlib_VideoInterpAveX_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
318 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
319 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
320 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
321 mlib_VideoInterpAveX_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
322 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
323 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
324 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
325 mlib_VideoInterpAveX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
326 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
327 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
328 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
329 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
330 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
331 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
332 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
333 static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, |
2979 | 334 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
335 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
336 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
337 case 4: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
338 mlib_VideoInterpAveY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
339 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
340 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
341 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
342 mlib_VideoInterpAveY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
343 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
344 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
345 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
346 mlib_VideoInterpAveY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
347 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
348 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
349 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
350 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
351 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
352 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
353 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
354 static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref, |
2979 | 355 int stride, int height) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
356 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
357 switch (height) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
358 case 4: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
359 mlib_VideoInterpAveXY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
360 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
361 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
362 case 8: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
363 mlib_VideoInterpAveXY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
364 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
365 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
366 case 16: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
367 mlib_VideoInterpAveXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
368 break; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
369 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
370 default: |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
371 assert(0); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
372 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
373 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
374 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
375 /* swap byte order of a buffer */ |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
376 |
6241 | 377 static void bswap_buf_mlib(uint32_t *dst, const uint32_t *src, int w) |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
378 { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
379 mlib_VectorReverseByteOrder_U32_U32(dst, src, w); |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
380 } |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
381 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
382 /* transformations */ |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
383 |
1064 | 384 static void ff_idct_put_mlib(uint8_t *dest, int line_size, DCTELEM *data) |
88 | 385 { |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
386 int i; |
4176 | 387 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
388 |
88 | 389 mlib_VideoIDCT8x8_S16_S16 (data, data); |
2967 | 390 |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
391 for(i=0;i<8;i++) { |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
392 dest[0] = cm[data[0]]; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
393 dest[1] = cm[data[1]]; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
394 dest[2] = cm[data[2]]; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
395 dest[3] = cm[data[3]]; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
396 dest[4] = cm[data[4]]; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
397 dest[5] = cm[data[5]]; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
398 dest[6] = cm[data[6]]; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
399 dest[7] = cm[data[7]]; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
400 |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
401 dest += line_size; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
402 data += 8; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
403 } |
88 | 404 } |
735
b4bf95260ffe
16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents:
730
diff
changeset
|
405 |
1064 | 406 static void ff_idct_add_mlib(uint8_t *dest, int line_size, DCTELEM *data) |
730 | 407 { |
408 mlib_VideoIDCT8x8_S16_S16 (data, data); | |
929
c35f06d95e64
mlib fix patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents:
856
diff
changeset
|
409 mlib_VideoAddBlock_U8_S16(dest, (mlib_s16 *)data, line_size); |
730 | 410 } |
88 | 411 |
1889 | 412 static void ff_idct_mlib(DCTELEM *data) |
1324
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1276
diff
changeset
|
413 { |
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1276
diff
changeset
|
414 mlib_VideoIDCT8x8_S16_S16 (data, data); |
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1276
diff
changeset
|
415 } |
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1276
diff
changeset
|
416 |
735
b4bf95260ffe
16x{8,16} {avg,put}_pixels support patch by (Juergen Keil <jk at tools dot de>)
michaelni
parents:
730
diff
changeset
|
417 static void ff_fdct_mlib(DCTELEM *data) |
88 | 418 { |
419 mlib_VideoDCT8x8_S16_S16 (data, data); | |
420 } | |
421 | |
1276 | 422 void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx) |
88 | 423 { |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
424 c->get_pixels = get_pixels_mlib; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
425 c->diff_pixels = diff_pixels_mlib; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
426 c->add_pixels_clamped = add_pixels_clamped_mlib; |
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
427 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
428 c->put_pixels_tab[0][0] = put_pixels16_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
429 c->put_pixels_tab[0][1] = put_pixels16_x2_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
430 c->put_pixels_tab[0][2] = put_pixels16_y2_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
431 c->put_pixels_tab[0][3] = put_pixels16_xy2_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
432 c->put_pixels_tab[1][0] = put_pixels8_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
433 c->put_pixels_tab[1][1] = put_pixels8_x2_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
434 c->put_pixels_tab[1][2] = put_pixels8_y2_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
435 c->put_pixels_tab[1][3] = put_pixels8_xy2_mlib; |
88 | 436 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
437 c->avg_pixels_tab[0][0] = avg_pixels16_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
438 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
439 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
440 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
441 c->avg_pixels_tab[1][0] = avg_pixels8_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
442 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
443 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
444 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
445 |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
446 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
447 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mlib; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
768
diff
changeset
|
448 |
1515
e94e299aee40
optimised versions of the get_pixels, diff_pixels, and bswap_buf
melanson
parents:
1324
diff
changeset
|
449 c->bswap_buf = bswap_buf_mlib; |
88 | 450 } |
628
f596db4aa871
sun solaris compilation bugfix, patch by (Martin Olschewski <olschewski at zpr dot uni-koeln dot de>)
michaelni
parents:
429
diff
changeset
|
451 |
f596db4aa871
sun solaris compilation bugfix, patch by (Martin Olschewski <olschewski at zpr dot uni-koeln dot de>)
michaelni
parents:
429
diff
changeset
|
452 void MPV_common_init_mlib(MpegEncContext *s) |
f596db4aa871
sun solaris compilation bugfix, patch by (Martin Olschewski <olschewski at zpr dot uni-koeln dot de>)
michaelni
parents:
429
diff
changeset
|
453 { |
f596db4aa871
sun solaris compilation bugfix, patch by (Martin Olschewski <olschewski at zpr dot uni-koeln dot de>)
michaelni
parents:
429
diff
changeset
|
454 if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){ |
2979 | 455 s->dsp.fdct = ff_fdct_mlib; |
628
f596db4aa871
sun solaris compilation bugfix, patch by (Martin Olschewski <olschewski at zpr dot uni-koeln dot de>)
michaelni
parents:
429
diff
changeset
|
456 } |
730 | 457 |
6560
ad2cf074632a
Do not use the mlib IDCT by default. We do not want videos which are only
michael
parents:
6241
diff
changeset
|
458 if(s->avctx->idct_algo==FF_IDCT_MLIB){ |
1276 | 459 s->dsp.idct_put= ff_idct_put_mlib; |
460 s->dsp.idct_add= ff_idct_add_mlib; | |
1324
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1276
diff
changeset
|
461 s->dsp.idct = ff_idct_mlib; |
1276 | 462 s->dsp.idct_permutation_type= FF_NO_IDCT_PERM; |
730 | 463 } |
628
f596db4aa871
sun solaris compilation bugfix, patch by (Martin Olschewski <olschewski at zpr dot uni-koeln dot de>)
michaelni
parents:
429
diff
changeset
|
464 } |