Mercurial > libavcodec.hg
annotate dsputil.h @ 2497:69adfbbdcdeb libavcodec
- samples from mplayer ftp in the "adv" profile seem to have profile=2,
which isn't the advanced one; and indeed, using adv. profile parser fails.
Using normal parser works, and that's what is done
- attempt at taking care of stride for NORM2 bitplane decoding
- duplication of much code from msmpeg4.c; this code isn't yet used, but
goes down as far as the block layer (mainly Transform Type stuff, the
remains are wild editing without checking). Unusable yet, and lacks the AC
decoding (but a step further in bitstream parsing)
patch by anonymous
author | michael |
---|---|
date | Fri, 04 Feb 2005 02:20:38 +0000 |
parents | db2cf6005d19 |
children | 72e6ffa1f3a5 |
rev | line source |
---|---|
429 | 1 /* |
2 * DSP utils | |
3 * Copyright (c) 2000, 2001, 2002 Fabrice Bellard. | |
1739
07a484280a82
copyright year update of the files i touched and remembered, things look annoyingly unmaintained otherwise
michael
parents:
1729
diff
changeset
|
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
429 | 5 * |
6 * This library is free software; you can redistribute it and/or | |
7 * modify it under the terms of the GNU Lesser General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2 of the License, or (at your option) any later version. | |
10 * | |
11 * This library is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
17 * License along with this library; if not, write to the Free Software | |
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
19 */ | |
1102 | 20 |
21 /** | |
22 * @file dsputil.h | |
1106 | 23 * DSP utils. |
1213 | 24 * note, many functions in here may use MMX which trashes the FPU state, it is |
25 * absolutely necessary to call emms_c() between dsp & float/double code | |
1102 | 26 */ |
27 | |
0 | 28 #ifndef DSPUTIL_H |
29 #define DSPUTIL_H | |
30 | |
31 #include "common.h" | |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
32 #include "avcodec.h" |
0 | 33 |
1102 | 34 |
255 | 35 //#define DEBUG |
0 | 36 /* dct code */ |
37 typedef short DCTELEM; | |
38 | |
474
11dbd00682fc
avoid name clash with libjpeg - added missing externs
bellard
parents:
429
diff
changeset
|
39 void fdct_ifast (DCTELEM *data); |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1567
diff
changeset
|
40 void fdct_ifast248 (DCTELEM *data); |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
623
diff
changeset
|
41 void ff_jpeg_fdct_islow (DCTELEM *data); |
1567 | 42 void ff_fdct248_islow (DCTELEM *data); |
0 | 43 |
44 void j_rev_dct (DCTELEM *data); | |
2256 | 45 void j_rev_dct4 (DCTELEM *data); |
2257 | 46 void j_rev_dct2 (DCTELEM *data); |
2259 | 47 void j_rev_dct1 (DCTELEM *data); |
0 | 48 |
687
9abb13c21fbe
fdct_mmx -> ff_fdct_mmx (renamed to avoid namespace conflict with xvid)
arpi_esp
parents:
675
diff
changeset
|
49 void ff_fdct_mmx(DCTELEM *block); |
1565 | 50 void ff_fdct_mmx2(DCTELEM *block); |
1765
e31754bc5b65
SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents:
1739
diff
changeset
|
51 void ff_fdct_sse2(DCTELEM *block); |
0 | 52 |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
53 void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
54 void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
55 void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
56 |
34 | 57 /* encoding scans */ |
1064 | 58 extern const uint8_t ff_alternate_horizontal_scan[64]; |
59 extern const uint8_t ff_alternate_vertical_scan[64]; | |
60 extern const uint8_t ff_zigzag_direct[64]; | |
1567 | 61 extern const uint8_t ff_zigzag248_direct[64]; |
190
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
62 |
0 | 63 /* pixel operations */ |
2090 | 64 #define MAX_NEG_CROP 1024 |
0 | 65 |
66 /* temporary */ | |
1064 | 67 extern uint32_t squareTbl[512]; |
68 extern uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; | |
0 | 69 |
1866
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
70 /* VP3 DSP functions */ |
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
71 void vp3_dsp_init_c(void); |
1977 | 72 void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix, |
73 int coeff_count, DCTELEM *output_data); | |
1866
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
74 |
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
75 void vp3_dsp_init_mmx(void); |
1977 | 76 void vp3_idct_mmx(int16_t *input_data, int16_t *dequant_matrix, |
77 int coeff_count, DCTELEM *output_data); | |
1866
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
78 |
1972 | 79 void vp3_dsp_init_sse2(void); |
1977 | 80 void vp3_idct_sse2(int16_t *input_data, int16_t *dequant_matrix, |
81 int coeff_count, DCTELEM *output_data); | |
0 | 82 |
675 | 83 /* minimum alignment rules ;) |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
84 if u notice errors in the align stuff, need more alignment for some asm code for some cpu |
675 | 85 or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ... |
86 | |
87 !warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible) | |
88 i (michael) didnt check them, these are just the alignents which i think could be reached easily ... | |
89 | |
90 !future video codecs might need functions with less strict alignment | |
91 */ | |
0 | 92 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
93 /* |
1064 | 94 void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size); |
95 void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); | |
96 void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); | |
97 void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); | |
296 | 98 void clear_blocks_c(DCTELEM *blocks); |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
99 */ |
0 | 100 |
101 /* add and put pixel (decoding) */ | |
675 | 102 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 |
1709 | 103 //h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4 |
1064 | 104 typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h); |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
105 typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); |
1064 | 106 typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); |
1168 | 107 typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); |
2415 | 108 typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset); |
109 typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets); | |
0 | 110 |
984 | 111 #define DEF_OLD_QPEL(name)\ |
1064 | 112 void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ |
113 void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ | |
114 void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
115 |
984 | 116 DEF_OLD_QPEL(qpel16_mc11_old_c) |
117 DEF_OLD_QPEL(qpel16_mc31_old_c) | |
118 DEF_OLD_QPEL(qpel16_mc12_old_c) | |
119 DEF_OLD_QPEL(qpel16_mc32_old_c) | |
120 DEF_OLD_QPEL(qpel16_mc13_old_c) | |
121 DEF_OLD_QPEL(qpel16_mc33_old_c) | |
122 DEF_OLD_QPEL(qpel8_mc11_old_c) | |
123 DEF_OLD_QPEL(qpel8_mc31_old_c) | |
124 DEF_OLD_QPEL(qpel8_mc12_old_c) | |
125 DEF_OLD_QPEL(qpel8_mc32_old_c) | |
126 DEF_OLD_QPEL(qpel8_mc13_old_c) | |
127 DEF_OLD_QPEL(qpel8_mc33_old_c) | |
651 | 128 |
129 #define CALL_2X_PIXELS(a, b, n)\ | |
130 static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
131 b(block , pixels , line_size, h);\ | |
132 b(block+n, pixels+n, line_size, h);\ | |
133 } | |
255 | 134 |
0 | 135 /* motion estimation */ |
1709 | 136 // h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2 |
137 // allthough currently h<4 is not used as functions with width <8 are not used and neither implemented | |
1708 | 138 typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; |
936 | 139 |
1168 | 140 |
1102 | 141 /** |
142 * DSPContext. | |
143 */ | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
144 typedef struct DSPContext { |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
145 /* pixel ops : interface with DCT */ |
1064 | 146 void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); |
147 void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); | |
148 void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
149 void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); |
1064 | 150 void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); |
1136 | 151 /** |
152 * translational global motion compensation. | |
153 */ | |
1064 | 154 void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); |
1136 | 155 /** |
156 * global motion compensation. | |
157 */ | |
1064 | 158 void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
159 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
160 void (*clear_blocks)(DCTELEM *blocks/*align 16*/); |
1064 | 161 int (*pix_sum)(uint8_t * pix, int line_size); |
162 int (*pix_norm1)(uint8_t * pix, int line_size); | |
1708 | 163 // 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4 |
164 | |
1729 | 165 me_cmp_func sad[5]; /* identical to pix_absAxA except additional void * */ |
166 me_cmp_func sse[5]; | |
167 me_cmp_func hadamard8_diff[5]; | |
168 me_cmp_func dct_sad[5]; | |
169 me_cmp_func quant_psnr[5]; | |
170 me_cmp_func bit[5]; | |
171 me_cmp_func rd[5]; | |
172 me_cmp_func vsad[5]; | |
173 me_cmp_func vsse[5]; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
174 me_cmp_func nsse[5]; |
2184 | 175 me_cmp_func w53[5]; |
176 me_cmp_func w97[5]; | |
2382 | 177 me_cmp_func dct_max[5]; |
936 | 178 |
1708 | 179 me_cmp_func me_pre_cmp[5]; |
180 me_cmp_func me_cmp[5]; | |
181 me_cmp_func me_sub_cmp[5]; | |
182 me_cmp_func mb_cmp[5]; | |
1729 | 183 me_cmp_func ildct_cmp[5]; //only width 16 used |
2382 | 184 me_cmp_func frame_skip_cmp[5]; //only width 8 used |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
185 |
1136 | 186 /** |
187 * Halfpel motion compensation with rounding (a+b+1)>>1. | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
188 * this is an array[4][4] of motion compensation funcions for 4 |
1713 | 189 * horizontal blocksizes (8,16) and the 4 halfpel positions<br> |
1213 | 190 * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] |
1136 | 191 * @param block destination where the result is stored |
192 * @param pixels source | |
193 * @param line_size number of bytes in a horizontal line of block | |
194 * @param h height | |
195 */ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
196 op_pixels_func put_pixels_tab[4][4]; |
1136 | 197 |
198 /** | |
199 * Halfpel motion compensation with rounding (a+b+1)>>1. | |
1320 | 200 * This is an array[4][4] of motion compensation functions for 4 |
1713 | 201 * horizontal blocksizes (8,16) and the 4 halfpel positions<br> |
1213 | 202 * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] |
1136 | 203 * @param block destination into which the result is averaged (a+b+1)>>1 |
204 * @param pixels source | |
205 * @param line_size number of bytes in a horizontal line of block | |
206 * @param h height | |
207 */ | |
1319 | 208 op_pixels_func avg_pixels_tab[4][4]; |
1136 | 209 |
210 /** | |
211 * Halfpel motion compensation with no rounding (a+b)>>1. | |
1225 | 212 * this is an array[2][4] of motion compensation funcions for 2 |
213 * horizontal blocksizes (8,16) and the 4 halfpel positions<br> | |
1213 | 214 * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] |
1136 | 215 * @param block destination where the result is stored |
216 * @param pixels source | |
217 * @param line_size number of bytes in a horizontal line of block | |
218 * @param h height | |
219 */ | |
2075 | 220 op_pixels_func put_no_rnd_pixels_tab[4][4]; |
1136 | 221 |
222 /** | |
223 * Halfpel motion compensation with no rounding (a+b)>>1. | |
1225 | 224 * this is an array[2][4] of motion compensation funcions for 2 |
225 * horizontal blocksizes (8,16) and the 4 halfpel positions<br> | |
1213 | 226 * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] |
1136 | 227 * @param block destination into which the result is averaged (a+b)>>1 |
228 * @param pixels source | |
229 * @param line_size number of bytes in a horizontal line of block | |
230 * @param h height | |
231 */ | |
2075 | 232 op_pixels_func avg_no_rnd_pixels_tab[4][4]; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
233 |
1864 | 234 void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h); |
235 | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
236 /** |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
237 * Thirdpel motion compensation with rounding (a+b+1)>>1. |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
238 * this is an array[12] of motion compensation funcions for the 9 thirdpel positions<br> |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
239 * *pixels_tab[ xthirdpel + 4*ythirdpel ] |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
240 * @param block destination where the result is stored |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
241 * @param pixels source |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
242 * @param line_size number of bytes in a horizontal line of block |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
243 * @param h height |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
244 */ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
245 tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width? |
1319 | 246 tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width? |
247 | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
248 qpel_mc_func put_qpel_pixels_tab[2][16]; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
249 qpel_mc_func avg_qpel_pixels_tab[2][16]; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
250 qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
251 qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; |
936 | 252 qpel_mc_func put_mspel_pixels_tab[8]; |
1168 | 253 |
254 /** | |
255 * h264 Chram MC | |
256 */ | |
257 h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; | |
258 h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
259 |
1168 | 260 qpel_mc_func put_h264_qpel_pixels_tab[3][16]; |
261 qpel_mc_func avg_h264_qpel_pixels_tab[3][16]; | |
262 | |
2415 | 263 h264_weight_func weight_h264_pixels_tab[10]; |
264 h264_biweight_func biweight_h264_pixels_tab[10]; | |
265 | |
1708 | 266 me_cmp_func pix_abs[2][4]; |
866 | 267 |
268 /* huffyuv specific */ | |
269 void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); | |
936 | 270 void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); |
1527 | 271 /** |
272 * subtract huffyuv's variant of median prediction | |
273 * note, this might read from src1[-1], src2[-1] | |
274 */ | |
275 void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top); | |
1273 | 276 void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w); |
1092 | 277 |
1644 | 278 void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); |
279 void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); | |
280 | |
2045 | 281 void (*h261_loop_filter)(uint8_t *src, int stride); |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
2024
diff
changeset
|
282 |
1092 | 283 /* (I)DCT */ |
284 void (*fdct)(DCTELEM *block/* align 16*/); | |
1567 | 285 void (*fdct248)(DCTELEM *block/* align 16*/); |
1102 | 286 |
1324
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1320
diff
changeset
|
287 /* IDCT really*/ |
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1320
diff
changeset
|
288 void (*idct)(DCTELEM *block/* align 16*/); |
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1320
diff
changeset
|
289 |
1102 | 290 /** |
1104 | 291 * block -> idct -> clip to unsigned 8 bit -> dest. |
1102 | 292 * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...) |
1111 | 293 * @param line_size size in bytes of a horizotal line of dest |
1102 | 294 */ |
1092 | 295 void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); |
1102 | 296 |
297 /** | |
298 * block -> idct -> add dest -> clip to unsigned 8 bit -> dest. | |
1111 | 299 * @param line_size size in bytes of a horizotal line of dest |
1102 | 300 */ |
1092 | 301 void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); |
1102 | 302 |
303 /** | |
1104 | 304 * idct input permutation. |
1241 | 305 * several optimized IDCTs need a permutated input (relative to the normal order of the reference |
306 * IDCT) | |
307 * this permutation must be performed before the idct_put/add, note, normally this can be merged | |
308 * with the zigzag/alternate scan<br> | |
1102 | 309 * an example to avoid confusion: |
310 * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...) | |
311 * - (x -> referece dct -> reference idct -> x) | |
312 * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x) | |
313 * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...) | |
314 */ | |
1092 | 315 uint8_t idct_permutation[64]; |
316 int idct_permutation_type; | |
317 #define FF_NO_IDCT_PERM 1 | |
318 #define FF_LIBMPEG2_IDCT_PERM 2 | |
319 #define FF_SIMPLE_IDCT_PERM 3 | |
320 #define FF_TRANSPOSE_IDCT_PERM 4 | |
321 | |
1784 | 322 int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale); |
323 void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); | |
324 #define BASIS_SHIFT 16 | |
325 #define RECON_SHIFT 6 | |
326 | |
1866
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
327 /** |
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
328 * This function handles any initialization for the VP3 DSP functions. |
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
329 */ |
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
330 void (*vp3_dsp_init)(void); |
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
331 |
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
332 /** |
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
333 * This function is responsible for taking a block of zigzag'd, |
1977 | 334 * quantized DCT coefficients and reconstructing the original block of |
335 * samples. | |
1866
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
336 * @param input_data 64 zigzag'd, quantized DCT coefficients |
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
337 * @param dequant_matrix 64 zigzag'd quantizer coefficients |
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
338 * @param coeff_count index of the last coefficient |
1977 | 339 * @param output_samples space for 64 DCTELEMs where the transformed |
340 * samples will be stored | |
1866
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
341 */ |
1977 | 342 void (*vp3_idct)(int16_t *input_data, int16_t *dequant_matrix, |
343 int coeff_count, DCTELEM *output_samples); | |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
344 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
345 void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride); |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
346 } DSPContext; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
347 |
1201 | 348 void dsputil_static_init(void); |
1092 | 349 void dsputil_init(DSPContext* p, AVCodecContext *avctx); |
0 | 350 |
764 | 351 /** |
352 * permute block according to permuatation. | |
353 * @param last last non zero element in scantable order | |
354 */ | |
1064 | 355 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last); |
34 | 356 |
1729 | 357 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); |
358 | |
1264 | 359 #define BYTE_VEC32(c) ((c)*0x01010101UL) |
360 | |
361 static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) | |
362 { | |
363 return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); | |
364 } | |
365 | |
366 static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b) | |
367 { | |
368 return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); | |
369 } | |
370 | |
2184 | 371 static inline int get_penalty_factor(int lambda, int lambda2, int type){ |
372 switch(type&0xFF){ | |
373 default: | |
374 case FF_CMP_SAD: | |
375 return lambda>>FF_LAMBDA_SHIFT; | |
376 case FF_CMP_DCT: | |
377 return (3*lambda)>>(FF_LAMBDA_SHIFT+1); | |
378 case FF_CMP_W53: | |
379 return (4*lambda)>>(FF_LAMBDA_SHIFT); | |
380 case FF_CMP_W97: | |
381 return (2*lambda)>>(FF_LAMBDA_SHIFT); | |
382 case FF_CMP_SATD: | |
383 return (2*lambda)>>FF_LAMBDA_SHIFT; | |
384 case FF_CMP_RD: | |
385 case FF_CMP_PSNR: | |
386 case FF_CMP_SSE: | |
387 case FF_CMP_NSSE: | |
388 return lambda2>>FF_LAMBDA_SHIFT; | |
389 case FF_CMP_BIT: | |
390 return 1; | |
391 } | |
392 } | |
393 | |
1102 | 394 /** |
1104 | 395 * Empty mmx state. |
1102 | 396 * this must be called between any dsp function and float/double code. |
397 * for example sin(); dsp->idct_put(); emms_c(); cos() | |
398 */ | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
399 #define emms_c() |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
400 |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
984
diff
changeset
|
401 /* should be defined by architectures supporting |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
984
diff
changeset
|
402 one or more MultiMedia extension */ |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
984
diff
changeset
|
403 int mm_support(void); |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
984
diff
changeset
|
404 |
1974
8c5489b2cf3e
move __align16 some place where non-MMX machines can see it
melanson
parents:
1972
diff
changeset
|
405 #define __align16 __attribute__ ((aligned (16))) |
8c5489b2cf3e
move __align16 some place where non-MMX machines can see it
melanson
parents:
1972
diff
changeset
|
406 |
62 | 407 #if defined(HAVE_MMX) |
0 | 408 |
862 | 409 #undef emms_c |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
410 |
0 | 411 #define MM_MMX 0x0001 /* standard MMX */ |
412 #define MM_3DNOW 0x0004 /* AMD 3DNOW */ | |
413 #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ | |
414 #define MM_SSE 0x0008 /* SSE functions */ | |
415 #define MM_SSE2 0x0010 /* PIV SSE2 functions */ | |
2388 | 416 #define MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */ |
0 | 417 |
418 extern int mm_flags; | |
419 | |
1064 | 420 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); |
421 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
422 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); |
0 | 423 |
424 static inline void emms(void) | |
425 { | |
6
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
426 __asm __volatile ("emms;":::"memory"); |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
427 } |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
428 |
936 | 429 |
6
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
430 #define emms_c() \ |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
431 {\ |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
432 if (mm_flags & MM_MMX)\ |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
433 emms();\ |
0 | 434 } |
435 | |
436 #define __align8 __attribute__ ((aligned (8))) | |
2324 | 437 #define STRIDE_ALIGN 8 |
0 | 438 |
1092 | 439 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); |
440 void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx); | |
1065 | 441 |
62 | 442 #elif defined(ARCH_ARMV4L) |
443 | |
444 /* This is to use 4 bytes read to the IDCT pointers for some 'zero' | |
1974
8c5489b2cf3e
move __align16 some place where non-MMX machines can see it
melanson
parents:
1972
diff
changeset
|
445 line optimizations */ |
62 | 446 #define __align8 __attribute__ ((aligned (4))) |
2324 | 447 #define STRIDE_ALIGN 4 |
62 | 448 |
1092 | 449 void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx); |
62 | 450 |
88 | 451 #elif defined(HAVE_MLIB) |
452 | |
453 /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ | |
454 #define __align8 __attribute__ ((aligned (8))) | |
2324 | 455 #define STRIDE_ALIGN 8 |
88 | 456 |
1092 | 457 void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); |
88 | 458 |
1959
55b7435c59b8
VIS optimized motion compensation code. by (David S. Miller <davem at redhat dot com>)
michael
parents:
1879
diff
changeset
|
459 #elif defined(ARCH_SPARC) |
55b7435c59b8
VIS optimized motion compensation code. by (David S. Miller <davem at redhat dot com>)
michael
parents:
1879
diff
changeset
|
460 |
55b7435c59b8
VIS optimized motion compensation code. by (David S. Miller <davem at redhat dot com>)
michael
parents:
1879
diff
changeset
|
461 /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ |
55b7435c59b8
VIS optimized motion compensation code. by (David S. Miller <davem at redhat dot com>)
michael
parents:
1879
diff
changeset
|
462 #define __align8 __attribute__ ((aligned (8))) |
2324 | 463 #define STRIDE_ALIGN 8 |
1959
55b7435c59b8
VIS optimized motion compensation code. by (David S. Miller <davem at redhat dot com>)
michael
parents:
1879
diff
changeset
|
464 void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); |
55b7435c59b8
VIS optimized motion compensation code. by (David S. Miller <davem at redhat dot com>)
michael
parents:
1879
diff
changeset
|
465 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
190
diff
changeset
|
466 #elif defined(ARCH_ALPHA) |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
190
diff
changeset
|
467 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
190
diff
changeset
|
468 #define __align8 __attribute__ ((aligned (8))) |
2324 | 469 #define STRIDE_ALIGN 8 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
190
diff
changeset
|
470 |
1092 | 471 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
190
diff
changeset
|
472 |
623
92e99e506920
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
612
diff
changeset
|
473 #elif defined(ARCH_POWERPC) |
92e99e506920
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
612
diff
changeset
|
474 |
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
884
diff
changeset
|
475 #define MM_ALTIVEC 0x0001 /* standard AltiVec */ |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
884
diff
changeset
|
476 |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
884
diff
changeset
|
477 extern int mm_flags; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
884
diff
changeset
|
478 |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1008
diff
changeset
|
479 #if defined(HAVE_ALTIVEC) && !defined(CONFIG_DARWIN) |
1653 | 480 #define pixel altivec_pixel |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1008
diff
changeset
|
481 #include <altivec.h> |
1653 | 482 #undef pixel |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1008
diff
changeset
|
483 #endif |
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1008
diff
changeset
|
484 |
623
92e99e506920
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
612
diff
changeset
|
485 #define __align8 __attribute__ ((aligned (16))) |
2324 | 486 #define STRIDE_ALIGN 16 |
623
92e99e506920
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
612
diff
changeset
|
487 |
1092 | 488 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); |
623
92e99e506920
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
612
diff
changeset
|
489 |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
687
diff
changeset
|
490 #elif defined(HAVE_MMI) |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
687
diff
changeset
|
491 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
687
diff
changeset
|
492 #define __align8 __attribute__ ((aligned (16))) |
2324 | 493 #define STRIDE_ALIGN 16 |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
687
diff
changeset
|
494 |
1092 | 495 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
687
diff
changeset
|
496 |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1241
diff
changeset
|
497 #elif defined(ARCH_SH4) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1241
diff
changeset
|
498 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1241
diff
changeset
|
499 #define __align8 __attribute__ ((aligned (8))) |
2324 | 500 #define STRIDE_ALIGN 8 |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1241
diff
changeset
|
501 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1241
diff
changeset
|
502 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1241
diff
changeset
|
503 |
0 | 504 #else |
505 | |
2324 | 506 #define __align8 __attribute__ ((aligned (8))) |
507 #define STRIDE_ALIGN 8 | |
0 | 508 |
509 #endif | |
510 | |
493
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
511 #ifdef __GNUC__ |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
512 |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
513 struct unaligned_64 { uint64_t l; } __attribute__((packed)); |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
514 struct unaligned_32 { uint32_t l; } __attribute__((packed)); |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
515 struct unaligned_16 { uint16_t l; } __attribute__((packed)); |
493
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
516 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
517 #define LD16(a) (((const struct unaligned_16 *) (a))->l) |
493
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
518 #define LD32(a) (((const struct unaligned_32 *) (a))->l) |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
519 #define LD64(a) (((const struct unaligned_64 *) (a))->l) |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
520 |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
521 #define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b) |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
522 |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
523 #else /* __GNUC__ */ |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
524 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
525 #define LD16(a) (*((uint16_t*)(a))) |
493
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
526 #define LD32(a) (*((uint32_t*)(a))) |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
527 #define LD64(a) (*((uint64_t*)(a))) |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
528 |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
529 #define ST32(a, b) *((uint32_t*)(a)) = (b) |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
530 |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
531 #endif /* !__GNUC__ */ |
873b9075d853
move unaligned access macros to dsputil.h - added unaligned 32 bit store
bellard
parents:
480
diff
changeset
|
532 |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
533 /* PSNR */ |
1064 | 534 void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3], |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
535 int orig_linesize[3], int coded_linesize, |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
536 AVCodecContext *avctx); |
781 | 537 |
538 /* FFT computation */ | |
539 | |
540 /* NOTE: soon integer code will be added, so you must use the | |
541 FFTSample type */ | |
542 typedef float FFTSample; | |
543 | |
544 typedef struct FFTComplex { | |
545 FFTSample re, im; | |
546 } FFTComplex; | |
547 | |
548 typedef struct FFTContext { | |
549 int nbits; | |
550 int inverse; | |
551 uint16_t *revtab; | |
552 FFTComplex *exptab; | |
553 FFTComplex *exptab1; /* only used by SSE code */ | |
554 void (*fft_calc)(struct FFTContext *s, FFTComplex *z); | |
555 } FFTContext; | |
556 | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1866
diff
changeset
|
557 int ff_fft_init(FFTContext *s, int nbits, int inverse); |
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1866
diff
changeset
|
558 void ff_fft_permute(FFTContext *s, FFTComplex *z); |
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1866
diff
changeset
|
559 void ff_fft_calc_c(FFTContext *s, FFTComplex *z); |
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1866
diff
changeset
|
560 void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); |
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1866
diff
changeset
|
561 void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z); |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
954
diff
changeset
|
562 |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1866
diff
changeset
|
563 static inline void ff_fft_calc(FFTContext *s, FFTComplex *z) |
781 | 564 { |
565 s->fft_calc(s, z); | |
566 } | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1866
diff
changeset
|
567 void ff_fft_end(FFTContext *s); |
781 | 568 |
569 /* MDCT computation */ | |
570 | |
571 typedef struct MDCTContext { | |
572 int n; /* size of MDCT (i.e. number of input data * 2) */ | |
573 int nbits; /* n = 2^nbits */ | |
574 /* pre/post rotation tables */ | |
575 FFTSample *tcos; | |
576 FFTSample *tsin; | |
577 FFTContext fft; | |
578 } MDCTContext; | |
579 | |
794 | 580 int ff_mdct_init(MDCTContext *s, int nbits, int inverse); |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
581 void ff_imdct_calc(MDCTContext *s, FFTSample *output, |
781 | 582 const FFTSample *input, FFTSample *tmp); |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
583 void ff_mdct_calc(MDCTContext *s, FFTSample *out, |
781 | 584 const FFTSample *input, FFTSample *tmp); |
794 | 585 void ff_mdct_end(MDCTContext *s); |
781 | 586 |
1708 | 587 #define WARPER8_16(name8, name16)\ |
588 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ | |
589 return name8(s, dst , src , stride, h)\ | |
590 +name8(s, dst+8 , src+8 , stride, h);\ | |
591 } | |
592 | |
593 #define WARPER8_16_SQ(name8, name16)\ | |
594 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ | |
595 int score=0;\ | |
596 score +=name8(s, dst , src , stride, 8);\ | |
597 score +=name8(s, dst+8 , src+8 , stride, 8);\ | |
598 if(h==16){\ | |
599 dst += 8*stride;\ | |
600 src += 8*stride;\ | |
601 score +=name8(s, dst , src , stride, 8);\ | |
602 score +=name8(s, dst+8 , src+8 , stride, 8);\ | |
603 }\ | |
604 return score;\ | |
936 | 605 } |
606 | |
838
b78812db886f
lrintf detection (based upon a patch by Franois Revol <revol at free dot fr>)
michaelni
parents:
802
diff
changeset
|
607 #ifndef HAVE_LRINTF |
796
8a5b70c68fbd
added lrintf for non ISOC libcs (fixme: find a better test)
bellard
parents:
794
diff
changeset
|
608 /* XXX: add ISOC specific test to avoid specific BSD testing. */ |
8a5b70c68fbd
added lrintf for non ISOC libcs (fixme: find a better test)
bellard
parents:
794
diff
changeset
|
609 /* better than nothing implementation. */ |
802 | 610 /* btw, rintf() is existing on fbsd too -- alex */ |
2185 | 611 static always_inline long int lrintf(float x) |
796
8a5b70c68fbd
added lrintf for non ISOC libcs (fixme: find a better test)
bellard
parents:
794
diff
changeset
|
612 { |
1040
998d5035b15b
win32: rint() does not seem to be defined with mingw32-gcc 2.95 - do you have a better solution ?
bellard
parents:
1033
diff
changeset
|
613 #ifdef CONFIG_WIN32 |
2185 | 614 # ifdef ARCH_X86 |
615 int32_t i; | |
616 asm volatile( | |
617 "fistpl %0\n\t" | |
618 : "=m" (i) : "t" (x) : "st" | |
619 ); | |
620 return i; | |
621 # else | |
1040
998d5035b15b
win32: rint() does not seem to be defined with mingw32-gcc 2.95 - do you have a better solution ?
bellard
parents:
1033
diff
changeset
|
622 /* XXX: incorrect, but make it compile */ |
2185 | 623 return (int)(x + (x < 0 ? -0.5 : 0.5)); |
624 # endif | |
1040
998d5035b15b
win32: rint() does not seem to be defined with mingw32-gcc 2.95 - do you have a better solution ?
bellard
parents:
1033
diff
changeset
|
625 #else |
796
8a5b70c68fbd
added lrintf for non ISOC libcs (fixme: find a better test)
bellard
parents:
794
diff
changeset
|
626 return (int)(rint(x)); |
1040
998d5035b15b
win32: rint() does not seem to be defined with mingw32-gcc 2.95 - do you have a better solution ?
bellard
parents:
1033
diff
changeset
|
627 #endif |
796
8a5b70c68fbd
added lrintf for non ISOC libcs (fixme: find a better test)
bellard
parents:
794
diff
changeset
|
628 } |
2024
f65d87bfdd5a
some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents:
1984
diff
changeset
|
629 #else |
f65d87bfdd5a
some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents:
1984
diff
changeset
|
630 #ifndef _ISOC9X_SOURCE |
f65d87bfdd5a
some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents:
1984
diff
changeset
|
631 #define _ISOC9X_SOURCE |
f65d87bfdd5a
some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents:
1984
diff
changeset
|
632 #endif |
f65d87bfdd5a
some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents:
1984
diff
changeset
|
633 #include <math.h> |
0 | 634 #endif |
796
8a5b70c68fbd
added lrintf for non ISOC libcs (fixme: find a better test)
bellard
parents:
794
diff
changeset
|
635 |
8a5b70c68fbd
added lrintf for non ISOC libcs (fixme: find a better test)
bellard
parents:
794
diff
changeset
|
636 #endif |