Mercurial > libavcodec.hg
annotate dsputil.h @ 11034:fd5921186064 libavcodec
Make the fast loop filter path work with unavailable left MBs.
This prevents the issue with having to switch between slow and
fast code paths in each row.
0.5% faster loopfilter for cathedral
author | michael |
---|---|
date | Thu, 28 Jan 2010 02:15:25 +0000 |
parents | 34a65026fa06 |
children | 7b3f6955462b |
rev | line source |
---|---|
429 | 1 /* |
2 * DSP utils | |
8629
04423b2f6e0b
cosmetics: Remove pointless period after copyright statement non-sentences.
diego
parents:
8590
diff
changeset
|
3 * Copyright (c) 2000, 2001, 2002 Fabrice Bellard |
1739
07a484280a82
copyright year update of the files i touched and remembered, things look annoyingly unmaintained otherwise
michael
parents:
1729
diff
changeset
|
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
429 | 5 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
6 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
7 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
429 | 9 * modify it under the terms of the GNU Lesser General Public |
10 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
429 | 12 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
429 | 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 * Lesser General Public License for more details. | |
17 * | |
18 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3029
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
429 | 21 */ |
1102 | 22 |
23 /** | |
8718
e9d9d946f213
Use full internal pathname in doxygen @file directives.
diego
parents:
8694
diff
changeset
|
24 * @file libavcodec/dsputil.h |
1106 | 25 * DSP utils. |
1213 | 26 * note, many functions in here may use MMX which trashes the FPU state, it is |
27 * absolutely necessary to call emms_c() between dsp & float/double code | |
1102 | 28 */ |
29 | |
7760 | 30 #ifndef AVCODEC_DSPUTIL_H |
31 #define AVCODEC_DSPUTIL_H | |
0 | 32 |
8573
2acf0ae7b041
Fix build: Add intreadwrite.h and bswap.h #includes where necessary.
diego
parents:
8567
diff
changeset
|
33 #include "libavutil/intreadwrite.h" |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
34 #include "avcodec.h" |
0 | 35 |
1102 | 36 |
255 | 37 //#define DEBUG |
0 | 38 /* dct code */ |
39 typedef short DCTELEM; | |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3175
diff
changeset
|
40 typedef int DWTELEM; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5573
diff
changeset
|
41 typedef short IDWTELEM; |
0 | 42 |
474
11dbd00682fc
avoid name clash with libjpeg - added missing externs
bellard
parents:
429
diff
changeset
|
43 void fdct_ifast (DCTELEM *data); |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1567
diff
changeset
|
44 void fdct_ifast248 (DCTELEM *data); |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
623
diff
changeset
|
45 void ff_jpeg_fdct_islow (DCTELEM *data); |
1567 | 46 void ff_fdct248_islow (DCTELEM *data); |
0 | 47 |
48 void j_rev_dct (DCTELEM *data); | |
2256 | 49 void j_rev_dct4 (DCTELEM *data); |
2257 | 50 void j_rev_dct2 (DCTELEM *data); |
2259 | 51 void j_rev_dct1 (DCTELEM *data); |
5887 | 52 void ff_wmv2_idct_c(DCTELEM *data); |
0 | 53 |
687
9abb13c21fbe
fdct_mmx -> ff_fdct_mmx (renamed to avoid namespace conflict with xvid)
arpi_esp
parents:
675
diff
changeset
|
54 void ff_fdct_mmx(DCTELEM *block); |
1565 | 55 void ff_fdct_mmx2(DCTELEM *block); |
1765
e31754bc5b65
SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents:
1739
diff
changeset
|
56 void ff_fdct_sse2(DCTELEM *block); |
0 | 57 |
2755 | 58 void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride); |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
59 void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); |
3105
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3089
diff
changeset
|
60 void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3089
diff
changeset
|
61 void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
62 void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
63 void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); |
8375
de2509cf3c44
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents:
8359
diff
changeset
|
64 void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
de2509cf3c44
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents:
8359
diff
changeset
|
65 void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
de2509cf3c44
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents:
8359
diff
changeset
|
66 void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
de2509cf3c44
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents:
8359
diff
changeset
|
67 void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
68 |
7261 | 69 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, |
70 const float *win, float add_bias, int len); | |
7218 | 71 void ff_float_to_int16_c(int16_t *dst, const float *src, long len); |
7446 | 72 void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels); |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3555
diff
changeset
|
73 |
34 | 74 /* encoding scans */ |
1064 | 75 extern const uint8_t ff_alternate_horizontal_scan[64]; |
76 extern const uint8_t ff_alternate_vertical_scan[64]; | |
77 extern const uint8_t ff_zigzag_direct[64]; | |
1567 | 78 extern const uint8_t ff_zigzag248_direct[64]; |
190
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
79 |
0 | 80 /* pixel operations */ |
2090 | 81 #define MAX_NEG_CROP 1024 |
0 | 82 |
83 /* temporary */ | |
4179 | 84 extern uint32_t ff_squareTbl[512]; |
4176 | 85 extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP]; |
0 | 86 |
1866
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
87 /* VP3 DSP functions */ |
2693 | 88 void ff_vp3_idct_c(DCTELEM *block/* align 16*/); |
89 void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | |
90 void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | |
0 | 91 |
7995 | 92 void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values); |
93 void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values); | |
94 | |
8785
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
95 /* VP6 DSP functions */ |
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
96 void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride, |
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
97 const int16_t *h_weights, const int16_t *v_weights); |
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
98 |
3245 | 99 /* 1/2^n downscaling functions from imgconvert.c */ |
100 void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | |
101 void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | |
102 void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | |
103 void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | |
104 | |
3248
7aa9f80e7954
mmx implementation of 3-point GMC. (5x faster than C)
lorenm
parents:
3245
diff
changeset
|
105 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, |
7aa9f80e7954
mmx implementation of 3-point GMC. (5x faster than C)
lorenm
parents:
3245
diff
changeset
|
106 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); |
7aa9f80e7954
mmx implementation of 3-point GMC. (5x faster than C)
lorenm
parents:
3245
diff
changeset
|
107 |
675 | 108 /* minimum alignment rules ;) |
6486 | 109 If you notice errors in the align stuff, need more alignment for some ASM code |
110 for some CPU or need to use a function with less aligned data then send a mail | |
111 to the ffmpeg-devel mailing list, ... | |
675 | 112 |
6486 | 113 !warning These alignments might not match reality, (missing attribute((align)) |
114 stuff somewhere possible). | |
6488 | 115 I (Michael) did not check them, these are just the alignments which I think |
6486 | 116 could be reached easily ... |
675 | 117 |
118 !future video codecs might need functions with less strict alignment | |
119 */ | |
0 | 120 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
121 /* |
1064 | 122 void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size); |
123 void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); | |
124 void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); | |
125 void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); | |
296 | 126 void clear_blocks_c(DCTELEM *blocks); |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
127 */ |
0 | 128 |
129 /* add and put pixel (decoding) */ | |
675 | 130 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 |
1709 | 131 //h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4 |
1064 | 132 typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h); |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
133 typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); |
1064 | 134 typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); |
1168 | 135 typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); |
2415 | 136 typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset); |
3029 | 137 typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset); |
0 | 138 |
984 | 139 #define DEF_OLD_QPEL(name)\ |
1064 | 140 void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ |
141 void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ | |
142 void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
143 |
984 | 144 DEF_OLD_QPEL(qpel16_mc11_old_c) |
145 DEF_OLD_QPEL(qpel16_mc31_old_c) | |
146 DEF_OLD_QPEL(qpel16_mc12_old_c) | |
147 DEF_OLD_QPEL(qpel16_mc32_old_c) | |
148 DEF_OLD_QPEL(qpel16_mc13_old_c) | |
149 DEF_OLD_QPEL(qpel16_mc33_old_c) | |
150 DEF_OLD_QPEL(qpel8_mc11_old_c) | |
151 DEF_OLD_QPEL(qpel8_mc31_old_c) | |
152 DEF_OLD_QPEL(qpel8_mc12_old_c) | |
153 DEF_OLD_QPEL(qpel8_mc32_old_c) | |
154 DEF_OLD_QPEL(qpel8_mc13_old_c) | |
155 DEF_OLD_QPEL(qpel8_mc33_old_c) | |
651 | 156 |
157 #define CALL_2X_PIXELS(a, b, n)\ | |
158 static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
159 b(block , pixels , line_size, h);\ | |
160 b(block+n, pixels+n, line_size, h);\ | |
161 } | |
255 | 162 |
0 | 163 /* motion estimation */ |
1709 | 164 // h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2 |
5127 | 165 // although currently h<4 is not used as functions with width <8 are neither used nor implemented |
1708 | 166 typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; |
936 | 167 |
1168 | 168 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3175
diff
changeset
|
169 // for snow slices |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3175
diff
changeset
|
170 typedef struct slice_buffer_s slice_buffer; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3175
diff
changeset
|
171 |
1102 | 172 /** |
6438 | 173 * Scantable. |
174 */ | |
175 typedef struct ScanTable{ | |
176 const uint8_t *scantable; | |
177 uint8_t permutated[64]; | |
178 uint8_t raster_end[64]; | |
8590 | 179 #if ARCH_PPC |
6438 | 180 /** Used by dct_quantize_altivec to find last-non-zero */ |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10944
diff
changeset
|
181 DECLARE_ALIGNED(16, uint8_t, inverse)[64]; |
6438 | 182 #endif |
183 } ScanTable; | |
184 | |
185 void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable); | |
186 | |
6445 | 187 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, |
188 int block_w, int block_h, | |
189 int src_x, int src_y, int w, int h); | |
190 | |
6438 | 191 /** |
1102 | 192 * DSPContext. |
193 */ | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
194 typedef struct DSPContext { |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
195 /* pixel ops : interface with DCT */ |
1064 | 196 void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); |
197 void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); | |
198 void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
199 void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); |
1064 | 200 void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); |
2763 | 201 void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size); |
202 void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size); | |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4962
diff
changeset
|
203 int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/); |
1136 | 204 /** |
205 * translational global motion compensation. | |
206 */ | |
1064 | 207 void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); |
1136 | 208 /** |
209 * global motion compensation. | |
210 */ | |
1064 | 211 void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, |
2979 | 212 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); |
8288 | 213 void (*clear_block)(DCTELEM *block/*align 16*/); |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
214 void (*clear_blocks)(DCTELEM *blocks/*align 16*/); |
1064 | 215 int (*pix_sum)(uint8_t * pix, int line_size); |
216 int (*pix_norm1)(uint8_t * pix, int line_size); | |
1708 | 217 // 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4 |
2967 | 218 |
8976
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
219 me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */ |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
220 me_cmp_func sse[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
221 me_cmp_func hadamard8_diff[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
222 me_cmp_func dct_sad[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
223 me_cmp_func quant_psnr[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
224 me_cmp_func bit[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
225 me_cmp_func rd[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
226 me_cmp_func vsad[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
227 me_cmp_func vsse[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
228 me_cmp_func nsse[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
229 me_cmp_func w53[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
230 me_cmp_func w97[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
231 me_cmp_func dct_max[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
232 me_cmp_func dct264_sad[6]; |
936 | 233 |
8976
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
234 me_cmp_func me_pre_cmp[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
235 me_cmp_func me_cmp[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
236 me_cmp_func me_sub_cmp[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
237 me_cmp_func mb_cmp[6]; |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
238 me_cmp_func ildct_cmp[6]; //only width 16 used |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
239 me_cmp_func frame_skip_cmp[6]; //only width 8 used |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
240 |
5255 | 241 int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, |
242 int size); | |
4749 | 243 |
1136 | 244 /** |
245 * Halfpel motion compensation with rounding (a+b+1)>>1. | |
4751 | 246 * this is an array[4][4] of motion compensation functions for 4 |
1713 | 247 * horizontal blocksizes (8,16) and the 4 halfpel positions<br> |
1213 | 248 * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] |
1136 | 249 * @param block destination where the result is stored |
250 * @param pixels source | |
251 * @param line_size number of bytes in a horizontal line of block | |
252 * @param h height | |
253 */ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
254 op_pixels_func put_pixels_tab[4][4]; |
1136 | 255 |
256 /** | |
257 * Halfpel motion compensation with rounding (a+b+1)>>1. | |
2967 | 258 * This is an array[4][4] of motion compensation functions for 4 |
1713 | 259 * horizontal blocksizes (8,16) and the 4 halfpel positions<br> |
1213 | 260 * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] |
1136 | 261 * @param block destination into which the result is averaged (a+b+1)>>1 |
262 * @param pixels source | |
263 * @param line_size number of bytes in a horizontal line of block | |
264 * @param h height | |
265 */ | |
1319 | 266 op_pixels_func avg_pixels_tab[4][4]; |
1136 | 267 |
268 /** | |
269 * Halfpel motion compensation with no rounding (a+b)>>1. | |
4751 | 270 * this is an array[2][4] of motion compensation functions for 2 |
1225 | 271 * horizontal blocksizes (8,16) and the 4 halfpel positions<br> |
1213 | 272 * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] |
1136 | 273 * @param block destination where the result is stored |
274 * @param pixels source | |
275 * @param line_size number of bytes in a horizontal line of block | |
276 * @param h height | |
277 */ | |
2075 | 278 op_pixels_func put_no_rnd_pixels_tab[4][4]; |
1136 | 279 |
280 /** | |
281 * Halfpel motion compensation with no rounding (a+b)>>1. | |
4751 | 282 * this is an array[2][4] of motion compensation functions for 2 |
1225 | 283 * horizontal blocksizes (8,16) and the 4 halfpel positions<br> |
1213 | 284 * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] |
1136 | 285 * @param block destination into which the result is averaged (a+b)>>1 |
286 * @param pixels source | |
287 * @param line_size number of bytes in a horizontal line of block | |
288 * @param h height | |
289 */ | |
2075 | 290 op_pixels_func avg_no_rnd_pixels_tab[4][4]; |
2967 | 291 |
1864 | 292 void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h); |
2967 | 293 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
294 /** |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
295 * Thirdpel motion compensation with rounding (a+b+1)>>1. |
4751 | 296 * this is an array[12] of motion compensation functions for the 9 thirdpe |
297 * positions<br> | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
298 * *pixels_tab[ xthirdpel + 4*ythirdpel ] |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
299 * @param block destination where the result is stored |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
300 * @param pixels source |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
301 * @param line_size number of bytes in a horizontal line of block |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
302 * @param h height |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
303 */ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
304 tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width? |
1319 | 305 tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width? |
306 | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
307 qpel_mc_func put_qpel_pixels_tab[2][16]; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
308 qpel_mc_func avg_qpel_pixels_tab[2][16]; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
309 qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
310 qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; |
936 | 311 qpel_mc_func put_mspel_pixels_tab[8]; |
2967 | 312 |
1168 | 313 /** |
4751 | 314 * h264 Chroma MC |
1168 | 315 */ |
316 h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; | |
9439
ef3a7b711cc0
Rename put_no_rnd_h264_chroma* to reflect its usage in VC1 only
conrad
parents:
9437
diff
changeset
|
317 h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; |
3663 | 318 /* This is really one func used in VC-1 decoding */ |
9439
ef3a7b711cc0
Rename put_no_rnd_h264_chroma* to reflect its usage in VC1 only
conrad
parents:
9437
diff
changeset
|
319 h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3]; |
9440 | 320 h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3]; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
321 |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3010
diff
changeset
|
322 qpel_mc_func put_h264_qpel_pixels_tab[4][16]; |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3010
diff
changeset
|
323 qpel_mc_func avg_h264_qpel_pixels_tab[4][16]; |
2967 | 324 |
3807
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3746
diff
changeset
|
325 qpel_mc_func put_2tap_qpel_pixels_tab[4][16]; |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3746
diff
changeset
|
326 qpel_mc_func avg_2tap_qpel_pixels_tab[4][16]; |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3746
diff
changeset
|
327 |
2415 | 328 h264_weight_func weight_h264_pixels_tab[10]; |
329 h264_biweight_func biweight_h264_pixels_tab[10]; | |
2967 | 330 |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3279
diff
changeset
|
331 /* AVS specific */ |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3279
diff
changeset
|
332 qpel_mc_func put_cavs_qpel_pixels_tab[2][16]; |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3279
diff
changeset
|
333 qpel_mc_func avg_cavs_qpel_pixels_tab[2][16]; |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3279
diff
changeset
|
334 void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3279
diff
changeset
|
335 void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3279
diff
changeset
|
336 void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3279
diff
changeset
|
337 void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3279
diff
changeset
|
338 void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3279
diff
changeset
|
339 |
1708 | 340 me_cmp_func pix_abs[2][4]; |
2967 | 341 |
866 | 342 /* huffyuv specific */ |
343 void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); | |
6384 | 344 void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w); |
936 | 345 void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); |
1527 | 346 /** |
347 * subtract huffyuv's variant of median prediction | |
348 * note, this might read from src1[-1], src2[-1] | |
349 */ | |
10431 | 350 void (*sub_hfyu_median_prediction)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top); |
351 void (*add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top); | |
10430 | 352 int (*add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, int w, int left); |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10827
diff
changeset
|
353 void (*add_hfyu_left_prediction_bgr32)(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha); |
6384 | 354 /* this might write to dst[w] */ |
355 void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); | |
6241 | 356 void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); |
2633 | 357 |
7579 | 358 void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); |
359 void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0); | |
360 /* v/h_loop_filter_luma_intra: align 16 */ | |
8395
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
361 void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
362 void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); |
7579 | 363 void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); |
364 void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0); | |
365 void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); | |
366 void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); | |
3645
47821be55b6c
mmx implementation of deblocking strength decision.
lorenm
parents:
3574
diff
changeset
|
367 // h264_loop_filter_strength: simd only. the C version is inlined in h264.c |
47821be55b6c
mmx implementation of deblocking strength decision.
lorenm
parents:
3574
diff
changeset
|
368 void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], |
7327
483421b11d98
Fix h264_loop_filter_strength_mmx2() so it works with PAFF.
michael
parents:
7286
diff
changeset
|
369 int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field); |
2967 | 370 |
1644 | 371 void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); |
372 void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); | |
373 | |
2045 | 374 void (*h261_loop_filter)(uint8_t *src, int stride); |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
2024
diff
changeset
|
375 |
5887 | 376 void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale); |
377 void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale); | |
378 | |
7995 | 379 void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values); |
380 void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values); | |
381 | |
8785
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
382 void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride, |
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
383 const int16_t *h_weights,const int16_t *v_weights); |
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
384 |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3555
diff
changeset
|
385 /* assume len is a multiple of 4, and arrays are 16-byte aligned */ |
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
386 void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); |
7563 | 387 void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); |
5737 | 388 /* no alignment needed */ |
10424
94595d0e617c
Move autocorrelation function from flacenc.c to lpc.c. Also rename the
jbr
parents:
10420
diff
changeset
|
389 void (*lpc_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc); |
3574 | 390 /* assume len is a multiple of 8, and arrays are 16-byte aligned */ |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3555
diff
changeset
|
391 void (*vector_fmul)(float *dst, const float *src, int len); |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3555
diff
changeset
|
392 void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3555
diff
changeset
|
393 /* assume len is a multiple of 8, and src arrays are 16-byte aligned */ |
10300
4d1b9ca628fc
Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents:
10219
diff
changeset
|
394 void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len); |
7261 | 395 /* assume len is a multiple of 4, and arrays are 16-byte aligned */ |
396 void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len); | |
7564 | 397 /* assume len is a multiple of 8, and arrays are 16-byte aligned */ |
398 void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len); | |
10105 | 399 void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */); |
10219 | 400 /** |
401 * Multiply a vector of floats by a scalar float. Source and | |
402 * destination vectors must overlap exactly or not at all. | |
403 * @param dst result vector, 16-byte aligned | |
404 * @param src input vector, 16-byte aligned | |
405 * @param mul scalar value | |
406 * @param len length of vector, multiple of 4 | |
407 */ | |
408 void (*vector_fmul_scalar)(float *dst, const float *src, float mul, | |
409 int len); | |
410 /** | |
411 * Multiply a vector of floats by concatenated short vectors of | |
412 * floats and by a scalar float. Source and destination vectors | |
413 * must overlap exactly or not at all. | |
414 * [0]: short vectors of length 2, 8-byte aligned | |
415 * [1]: short vectors of length 4, 16-byte aligned | |
416 * @param dst output vector, 16-byte aligned | |
417 * @param src input vector, 16-byte aligned | |
418 * @param sv array of pointers to short vectors | |
419 * @param mul scalar value | |
420 * @param len number of elements in src and dst, multiple of 4 | |
421 */ | |
422 void (*vector_fmul_sv_scalar[2])(float *dst, const float *src, | |
423 const float **sv, float mul, int len); | |
424 /** | |
425 * Multiply short vectors of floats by a scalar float, store | |
426 * concatenated result. | |
427 * [0]: short vectors of length 2, 8-byte aligned | |
428 * [1]: short vectors of length 4, 16-byte aligned | |
429 * @param dst output vector, 16-byte aligned | |
430 * @param sv array of pointers to short vectors | |
431 * @param mul scalar value | |
432 * @param len number of output elements, multiple of 4 | |
433 */ | |
434 void (*sv_fmul_scalar[2])(float *dst, const float **sv, | |
435 float mul, int len); | |
436 /** | |
437 * Calculate the scalar product of two vectors of floats. | |
438 * @param v1 first vector, 16-byte aligned | |
439 * @param v2 second vector, 16-byte aligned | |
440 * @param len length of vectors, multiple of 4 | |
441 */ | |
442 float (*scalarproduct_float)(const float *v1, const float *v2, int len); | |
443 /** | |
444 * Calculate the sum and difference of two vectors of floats. | |
445 * @param v1 first input vector, sum output, 16-byte aligned | |
446 * @param v2 second input vector, difference output, 16-byte aligned | |
447 * @param len length of vectors, multiple of 4 | |
448 */ | |
449 void (*butterflies_float)(float *restrict v1, float *restrict v2, int len); | |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3555
diff
changeset
|
450 |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3555
diff
changeset
|
451 /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767] |
3660
7e1ee254a3ee
Align the input buffer in ffplay, introduce a public macro for aligned declarations
lu_zero
parents:
3656
diff
changeset
|
452 * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */ |
7218 | 453 void (*float_to_int16)(int16_t *dst, const float *src, long len); |
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
454 void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels); |
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
455 |
1092 | 456 /* (I)DCT */ |
457 void (*fdct)(DCTELEM *block/* align 16*/); | |
1567 | 458 void (*fdct248)(DCTELEM *block/* align 16*/); |
2967 | 459 |
1324
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1320
diff
changeset
|
460 /* IDCT really*/ |
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1320
diff
changeset
|
461 void (*idct)(DCTELEM *block/* align 16*/); |
2967 | 462 |
1102 | 463 /** |
1104 | 464 * block -> idct -> clip to unsigned 8 bit -> dest. |
1102 | 465 * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...) |
5728 | 466 * @param line_size size in bytes of a horizontal line of dest |
1102 | 467 */ |
1092 | 468 void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); |
2967 | 469 |
1102 | 470 /** |
471 * block -> idct -> add dest -> clip to unsigned 8 bit -> dest. | |
5728 | 472 * @param line_size size in bytes of a horizontal line of dest |
1102 | 473 */ |
1092 | 474 void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); |
2967 | 475 |
1102 | 476 /** |
1104 | 477 * idct input permutation. |
1241 | 478 * several optimized IDCTs need a permutated input (relative to the normal order of the reference |
479 * IDCT) | |
480 * this permutation must be performed before the idct_put/add, note, normally this can be merged | |
481 * with the zigzag/alternate scan<br> | |
1102 | 482 * an example to avoid confusion: |
483 * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...) | |
484 * - (x -> referece dct -> reference idct -> x) | |
485 * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x) | |
486 * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...) | |
487 */ | |
1092 | 488 uint8_t idct_permutation[64]; |
489 int idct_permutation_type; | |
490 #define FF_NO_IDCT_PERM 1 | |
491 #define FF_LIBMPEG2_IDCT_PERM 2 | |
492 #define FF_SIMPLE_IDCT_PERM 3 | |
493 #define FF_TRANSPOSE_IDCT_PERM 4 | |
2696
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
494 #define FF_PARTTRANS_IDCT_PERM 5 |
6600
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6488
diff
changeset
|
495 #define FF_SSE2_IDCT_PERM 6 |
1092 | 496 |
1784 | 497 int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale); |
498 void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); | |
499 #define BASIS_SHIFT 16 | |
500 #define RECON_SHIFT 6 | |
2967 | 501 |
6437 | 502 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w); |
6449
208074826b34
move EDGE_WIDTH definition allong with draw_edges where it belongs
aurel
parents:
6445
diff
changeset
|
503 #define EDGE_WIDTH 16 |
6437 | 504 |
4268 | 505 /* h264 functions */ |
8403 | 506 /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them |
507 NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them | |
508 The reason for above, is that no 2 out of one list may use a different permutation. | |
509 */ | |
7678 | 510 void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); |
511 void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); | |
512 void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); | |
513 void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); | |
4279 | 514 void (*h264_dct)(DCTELEM block[4][4]); |
8375
de2509cf3c44
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents:
8359
diff
changeset
|
515 void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); |
de2509cf3c44
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents:
8359
diff
changeset
|
516 void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); |
de2509cf3c44
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents:
8359
diff
changeset
|
517 void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); |
de2509cf3c44
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents:
8359
diff
changeset
|
518 void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3175
diff
changeset
|
519 |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3175
diff
changeset
|
520 /* snow wavelet */ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5573
diff
changeset
|
521 void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5573
diff
changeset
|
522 void (*horizontal_compose97i)(IDWTELEM *b, int width); |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4311
diff
changeset
|
523 void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); |
3215
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3198
diff
changeset
|
524 |
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3198
diff
changeset
|
525 void (*prefetch)(void *mem, int stride, int h); |
3245 | 526 |
527 void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | |
3526 | 528 |
9585 | 529 /* mlp/truehd functions */ |
9647
d0fe5dc427f0
mlp: Simplify adressing of state and coeffs arrays for both filters by making
ramiro
parents:
9585
diff
changeset
|
530 void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff, |
d0fe5dc427f0
mlp: Simplify adressing of state and coeffs arrays for both filters by making
ramiro
parents:
9585
diff
changeset
|
531 int firorder, int iirorder, |
9585 | 532 unsigned int filter_shift, int32_t mask, int blocksize, |
533 int32_t *sample_buffer); | |
534 | |
3526 | 535 /* vc1 functions */ |
536 void (*vc1_inv_trans_8x8)(DCTELEM *b); | |
5997
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5907
diff
changeset
|
537 void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block); |
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5907
diff
changeset
|
538 void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block); |
90de28dfd8d6
Switch VC-1 decoder to output decoded residual immediately.
kostya
parents:
5907
diff
changeset
|
539 void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block); |
9859
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9658
diff
changeset
|
540 void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block); |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9658
diff
changeset
|
541 void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block); |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9658
diff
changeset
|
542 void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block); |
7a116de63777
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
darkshikari
parents:
9658
diff
changeset
|
543 void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block); |
4239 | 544 void (*vc1_v_overlap)(uint8_t* src, int stride); |
545 void (*vc1_h_overlap)(uint8_t* src, int stride); | |
9443
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
546 void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq); |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
547 void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq); |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
548 void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq); |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
549 void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq); |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
550 void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq); |
3970fe47fea3
Split VC1 loop filter into separate functions for h/v and size
conrad
parents:
9442
diff
changeset
|
551 void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq); |
3526 | 552 /* put 8x8 block with bicubic interpolation and quarterpel precision |
553 * last argument is actually round value instead of height | |
554 */ | |
555 op_pixels_func put_vc1_mspel_pixels_tab[16]; | |
9437 | 556 op_pixels_func avg_vc1_mspel_pixels_tab[16]; |
5887 | 557 |
558 /* intrax8 functions */ | |
5907
fbd10e6dfbe1
cosmetics: Fix spacial --> spatial typo in function names.
diego
parents:
5887
diff
changeset
|
559 void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize); |
fbd10e6dfbe1
cosmetics: Fix spacial --> spatial typo in function names.
diego
parents:
5887
diff
changeset
|
560 void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize, |
5887 | 561 int * range, int * sum, int edges); |
562 | |
7203
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
7135
diff
changeset
|
563 /** |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
7135
diff
changeset
|
564 * Calculate scalar product of two vectors. |
7232
cc55dd004819
Monkey's Audio decoder vector functions work on input with length
kostya
parents:
7218
diff
changeset
|
565 * @param len length of vectors, should be multiple of 16 |
7203
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
7135
diff
changeset
|
566 * @param shift number of bits to discard from product |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
7135
diff
changeset
|
567 */ |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
7135
diff
changeset
|
568 int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift); |
10644 | 569 /* ape functions */ |
570 /** | |
571 * Calculate scalar product of v1 and v2, | |
572 * and v1[i] += v3[i] * mul | |
573 * @param len length of vectors, should be multiple of 16 | |
574 */ | |
575 int32_t (*scalarproduct_and_madd_int16)(int16_t *v1/*align 16*/, int16_t *v2, int16_t *v3, int len, int mul); | |
8232 | 576 |
8233 | 577 /* rv30 functions */ |
578 qpel_mc_func put_rv30_tpel_pixels_tab[4][16]; | |
579 qpel_mc_func avg_rv30_tpel_pixels_tab[4][16]; | |
580 | |
8232 | 581 /* rv40 functions */ |
582 qpel_mc_func put_rv40_qpel_pixels_tab[4][16]; | |
583 qpel_mc_func avg_rv40_qpel_pixels_tab[4][16]; | |
584 h264_chroma_mc_func put_rv40_chroma_pixels_tab[3]; | |
585 h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3]; | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
586 } DSPContext; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
587 |
4197 | 588 void dsputil_static_init(void); |
1092 | 589 void dsputil_init(DSPContext* p, AVCodecContext *avctx); |
0 | 590 |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4279
diff
changeset
|
591 int ff_check_alignment(void); |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4279
diff
changeset
|
592 |
764 | 593 /** |
594 * permute block according to permuatation. | |
595 * @param last last non zero element in scantable order | |
596 */ | |
1064 | 597 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last); |
34 | 598 |
1729 | 599 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); |
600 | |
2979 | 601 #define BYTE_VEC32(c) ((c)*0x01010101UL) |
1264 | 602 |
603 static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) | |
604 { | |
605 return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); | |
606 } | |
607 | |
608 static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b) | |
609 { | |
610 return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); | |
611 } | |
612 | |
2184 | 613 static inline int get_penalty_factor(int lambda, int lambda2, int type){ |
614 switch(type&0xFF){ | |
615 default: | |
616 case FF_CMP_SAD: | |
617 return lambda>>FF_LAMBDA_SHIFT; | |
618 case FF_CMP_DCT: | |
619 return (3*lambda)>>(FF_LAMBDA_SHIFT+1); | |
620 case FF_CMP_W53: | |
621 return (4*lambda)>>(FF_LAMBDA_SHIFT); | |
622 case FF_CMP_W97: | |
623 return (2*lambda)>>(FF_LAMBDA_SHIFT); | |
624 case FF_CMP_SATD: | |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
625 case FF_CMP_DCT264: |
2184 | 626 return (2*lambda)>>FF_LAMBDA_SHIFT; |
627 case FF_CMP_RD: | |
628 case FF_CMP_PSNR: | |
629 case FF_CMP_SSE: | |
630 case FF_CMP_NSSE: | |
631 return lambda2>>FF_LAMBDA_SHIFT; | |
632 case FF_CMP_BIT: | |
633 return 1; | |
634 } | |
635 } | |
636 | |
1102 | 637 /** |
1104 | 638 * Empty mmx state. |
1102 | 639 * this must be called between any dsp function and float/double code. |
640 * for example sin(); dsp->idct_put(); emms_c(); cos() | |
641 */ | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
642 #define emms_c() |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
643 |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
984
diff
changeset
|
644 /* should be defined by architectures supporting |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
984
diff
changeset
|
645 one or more MultiMedia extension */ |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
984
diff
changeset
|
646 int mm_support(void); |
10115 | 647 extern int mm_flags; |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
984
diff
changeset
|
648 |
5149 | 649 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); |
8359 | 650 void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx); |
5149 | 651 void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx); |
652 void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); | |
653 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx); | |
654 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); | |
655 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); | |
656 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); | |
657 void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); | |
658 | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10944
diff
changeset
|
659 #define DECLARE_ALIGNED_16(t, v, ...) DECLARE_ALIGNED(16, t, v) |
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10944
diff
changeset
|
660 #define DECLARE_ALIGNED_8(t, v, ...) DECLARE_ALIGNED(8, t, v) |
1974
8c5489b2cf3e
move __align16 some place where non-MMX machines can see it
melanson
parents:
1972
diff
changeset
|
661 |
8590 | 662 #if HAVE_MMX |
0 | 663 |
862 | 664 #undef emms_c |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
838
diff
changeset
|
665 |
0 | 666 static inline void emms(void) |
667 { | |
8031 | 668 __asm__ volatile ("emms;":::"memory"); |
6
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
669 } |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
670 |
936 | 671 |
6
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
672 #define emms_c() \ |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
673 {\ |
8104
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
674 if (mm_flags & FF_MM_MMX)\ |
6
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
675 emms();\ |
0 | 676 } |
677 | |
8590 | 678 #elif ARCH_ARM |
62 | 679 |
8590 | 680 #if HAVE_NEON |
7687
e5b5a9af1b68
ARM: set STRIDE_ALIGN and DECLARE_ALIGNED_8 to 16 for NEON
mru
parents:
7678
diff
changeset
|
681 # define STRIDE_ALIGN 16 |
e5b5a9af1b68
ARM: set STRIDE_ALIGN and DECLARE_ALIGNED_8 to 16 for NEON
mru
parents:
7678
diff
changeset
|
682 #endif |
e5b5a9af1b68
ARM: set STRIDE_ALIGN and DECLARE_ALIGNED_8 to 16 for NEON
mru
parents:
7678
diff
changeset
|
683 |
8590 | 684 #elif ARCH_PPC |
623
92e99e506920
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
612
diff
changeset
|
685 |
2324 | 686 #define STRIDE_ALIGN 16 |
623
92e99e506920
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
612
diff
changeset
|
687 |
8590 | 688 #elif HAVE_MMI |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
687
diff
changeset
|
689 |
2324 | 690 #define STRIDE_ALIGN 16 |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
687
diff
changeset
|
691 |
7135
6bd6a2da306e
Define mm_flags/support to be 0 on architectures where they don't exist.
astrange
parents:
7094
diff
changeset
|
692 #else |
6bd6a2da306e
Define mm_flags/support to be 0 on architectures where they don't exist.
astrange
parents:
7094
diff
changeset
|
693 |
6bd6a2da306e
Define mm_flags/support to be 0 on architectures where they don't exist.
astrange
parents:
7094
diff
changeset
|
694 #define mm_flags 0 |
6bd6a2da306e
Define mm_flags/support to be 0 on architectures where they don't exist.
astrange
parents:
7094
diff
changeset
|
695 #define mm_support() 0 |
6bd6a2da306e
Define mm_flags/support to be 0 on architectures where they don't exist.
astrange
parents:
7094
diff
changeset
|
696 |
6363
7ebd1cdb2142
clean up definition of DECLARE_ALIGNED_8 and STRIDE_ALIGN
mru
parents:
6241
diff
changeset
|
697 #endif |
3728 | 698 |
6363
7ebd1cdb2142
clean up definition of DECLARE_ALIGNED_8 and STRIDE_ALIGN
mru
parents:
6241
diff
changeset
|
699 #ifndef STRIDE_ALIGN |
7ebd1cdb2142
clean up definition of DECLARE_ALIGNED_8 and STRIDE_ALIGN
mru
parents:
6241
diff
changeset
|
700 # define STRIDE_ALIGN 8 |
0 | 701 #endif |
702 | |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
703 /* PSNR */ |
1064 | 704 void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3], |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
705 int orig_linesize[3], int coded_linesize, |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
706 AVCodecContext *avctx); |
781 | 707 |
708 /* FFT computation */ | |
709 | |
710 /* NOTE: soon integer code will be added, so you must use the | |
711 FFTSample type */ | |
712 typedef float FFTSample; | |
713 | |
714 typedef struct FFTComplex { | |
715 FFTSample re, im; | |
716 } FFTComplex; | |
717 | |
718 typedef struct FFTContext { | |
719 int nbits; | |
720 int inverse; | |
721 uint16_t *revtab; | |
722 FFTComplex *exptab; | |
723 FFTComplex *exptab1; /* only used by SSE code */ | |
7542 | 724 FFTComplex *tmp_buf; |
10199 | 725 int mdct_size; /* size of MDCT (i.e. number of input data * 2) */ |
726 int mdct_bits; /* n = 2^nbits */ | |
727 /* pre/post rotation tables */ | |
728 FFTSample *tcos; | |
729 FFTSample *tsin; | |
7542 | 730 void (*fft_permute)(struct FFTContext *s, FFTComplex *z); |
781 | 731 void (*fft_calc)(struct FFTContext *s, FFTComplex *z); |
10199 | 732 void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input); |
733 void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input); | |
734 void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input); | |
10175
5cf49858179a
Move per-arch fft init bits into the corresponding subdirs
mru
parents:
10174
diff
changeset
|
735 int split_radix; |
10204
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
736 int permutation; |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
737 #define FF_MDCT_PERM_NONE 0 |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
738 #define FF_MDCT_PERM_INTERLEAVE 1 |
781 | 739 } FFTContext; |
740 | |
10400
866dffa620d1
Use hardcoded instead of runtime-calculated ff_cos_* tables if
reimar
parents:
10370
diff
changeset
|
741 #if CONFIG_HARDCODED_TABLES |
10407
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
742 #define COSTABLE_CONST const |
10408 | 743 #define SINTABLE_CONST const |
10827
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
744 #define SINETABLE_CONST const |
10400
866dffa620d1
Use hardcoded instead of runtime-calculated ff_cos_* tables if
reimar
parents:
10370
diff
changeset
|
745 #else |
10407
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
746 #define COSTABLE_CONST |
10408 | 747 #define SINTABLE_CONST |
10827
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
748 #define SINETABLE_CONST |
10400
866dffa620d1
Use hardcoded instead of runtime-calculated ff_cos_* tables if
reimar
parents:
10370
diff
changeset
|
749 #endif |
8694
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
750 |
10407
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
751 #define COSTABLE(size) \ |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10944
diff
changeset
|
752 COSTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_cos_##size)[size/2] |
10407
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
753 #define SINTABLE(size) \ |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10944
diff
changeset
|
754 SINTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_sin_##size)[size/2] |
10827
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
755 #define SINETABLE(size) \ |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10944
diff
changeset
|
756 SINETABLE_CONST DECLARE_ALIGNED_16(float, ff_sine_##size)[size] |
10407
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
757 extern COSTABLE(16); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
758 extern COSTABLE(32); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
759 extern COSTABLE(64); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
760 extern COSTABLE(128); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
761 extern COSTABLE(256); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
762 extern COSTABLE(512); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
763 extern COSTABLE(1024); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
764 extern COSTABLE(2048); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
765 extern COSTABLE(4096); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
766 extern COSTABLE(8192); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
767 extern COSTABLE(16384); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
768 extern COSTABLE(32768); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
769 extern COSTABLE(65536); |
10492
63910f7ba293
Pad ff_cos_tabs and ff_sin_tabs so that index n points to the table for n bits.
reimar
parents:
10431
diff
changeset
|
770 extern COSTABLE_CONST FFTSample* const ff_cos_tabs[17]; |
10407
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
771 |
10496
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
772 /** |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
773 * Initializes the cosine table in ff_cos_tabs[index] |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
774 * \param index index in ff_cos_tabs array of the table to initialize |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
775 */ |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
776 void ff_init_ff_cos_tabs(int index); |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
777 |
10407
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
778 extern SINTABLE(16); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
779 extern SINTABLE(32); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
780 extern SINTABLE(64); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
781 extern SINTABLE(128); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
782 extern SINTABLE(256); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
783 extern SINTABLE(512); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
784 extern SINTABLE(1024); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
785 extern SINTABLE(2048); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
786 extern SINTABLE(4096); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
787 extern SINTABLE(8192); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
788 extern SINTABLE(16384); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
789 extern SINTABLE(32768); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
790 extern SINTABLE(65536); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10402
diff
changeset
|
791 |
8636 | 792 /** |
793 * Sets up a complex FFT. | |
794 * @param nbits log2 of the length of the input array | |
795 * @param inverse if 0 perform the forward transform, if 1 perform the inverse | |
796 */ | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1866
diff
changeset
|
797 int ff_fft_init(FFTContext *s, int nbits, int inverse); |
7542 | 798 void ff_fft_permute_c(FFTContext *s, FFTComplex *z); |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1866
diff
changeset
|
799 void ff_fft_calc_c(FFTContext *s, FFTComplex *z); |
10175
5cf49858179a
Move per-arch fft init bits into the corresponding subdirs
mru
parents:
10174
diff
changeset
|
800 |
5cf49858179a
Move per-arch fft init bits into the corresponding subdirs
mru
parents:
10174
diff
changeset
|
801 void ff_fft_init_altivec(FFTContext *s); |
5cf49858179a
Move per-arch fft init bits into the corresponding subdirs
mru
parents:
10174
diff
changeset
|
802 void ff_fft_init_mmx(FFTContext *s); |
10176 | 803 void ff_fft_init_arm(FFTContext *s); |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
954
diff
changeset
|
804 |
8636 | 805 /** |
806 * Do the permutation needed BEFORE calling ff_fft_calc(). | |
807 */ | |
7542 | 808 static inline void ff_fft_permute(FFTContext *s, FFTComplex *z) |
809 { | |
810 s->fft_permute(s, z); | |
811 } | |
8636 | 812 /** |
813 * Do a complex FFT with the parameters defined in ff_fft_init(). The | |
814 * input data must be permuted before. No 1.0/sqrt(n) normalization is done. | |
815 */ | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1866
diff
changeset
|
816 static inline void ff_fft_calc(FFTContext *s, FFTComplex *z) |
781 | 817 { |
818 s->fft_calc(s, z); | |
819 } | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1866
diff
changeset
|
820 void ff_fft_end(FFTContext *s); |
781 | 821 |
822 /* MDCT computation */ | |
823 | |
10199 | 824 static inline void ff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input) |
7547 | 825 { |
10199 | 826 s->imdct_calc(s, output, input); |
7547 | 827 } |
10199 | 828 static inline void ff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input) |
7547 | 829 { |
10199 | 830 s->imdct_half(s, output, input); |
7547 | 831 } |
832 | |
10199 | 833 static inline void ff_mdct_calc(FFTContext *s, FFTSample *output, |
10161 | 834 const FFTSample *input) |
835 { | |
10199 | 836 s->mdct_calc(s, output, input); |
10161 | 837 } |
838 | |
6139
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
6056
diff
changeset
|
839 /** |
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
6056
diff
changeset
|
840 * Generate a Kaiser-Bessel Derived Window. |
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
6056
diff
changeset
|
841 * @param window pointer to half window |
6142
a35b838ab955
Add variable alpha and size of half window for Kaiser-Bessel Derived window
superdump
parents:
6139
diff
changeset
|
842 * @param alpha determines window shape |
a35b838ab955
Add variable alpha and size of half window for Kaiser-Bessel Derived window
superdump
parents:
6139
diff
changeset
|
843 * @param n size of half window |
6139
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
6056
diff
changeset
|
844 */ |
6142
a35b838ab955
Add variable alpha and size of half window for Kaiser-Bessel Derived window
superdump
parents:
6139
diff
changeset
|
845 void ff_kbd_window_init(float *window, float alpha, int n); |
6139
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
6056
diff
changeset
|
846 |
7094
b0820b8bd4dd
Add generic ff_sine_window_init function and implement in codecs appropriately
superdump
parents:
6600
diff
changeset
|
847 /** |
b0820b8bd4dd
Add generic ff_sine_window_init function and implement in codecs appropriately
superdump
parents:
6600
diff
changeset
|
848 * Generate a sine window. |
b0820b8bd4dd
Add generic ff_sine_window_init function and implement in codecs appropriately
superdump
parents:
6600
diff
changeset
|
849 * @param window pointer to half window |
b0820b8bd4dd
Add generic ff_sine_window_init function and implement in codecs appropriately
superdump
parents:
6600
diff
changeset
|
850 * @param n size of half window |
b0820b8bd4dd
Add generic ff_sine_window_init function and implement in codecs appropriately
superdump
parents:
6600
diff
changeset
|
851 */ |
b0820b8bd4dd
Add generic ff_sine_window_init function and implement in codecs appropriately
superdump
parents:
6600
diff
changeset
|
852 void ff_sine_window_init(float *window, int n); |
10827
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
853 /** |
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
854 * initialize the specified entry of ff_sine_windows |
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
855 */ |
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
856 void ff_init_ff_sine_windows(int index); |
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
857 extern SINETABLE( 32); |
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
858 extern SINETABLE( 64); |
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
859 extern SINETABLE( 128); |
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
860 extern SINETABLE( 256); |
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
861 extern SINETABLE( 512); |
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
862 extern SINETABLE(1024); |
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
863 extern SINETABLE(2048); |
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
864 extern SINETABLE(4096); |
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10644
diff
changeset
|
865 extern SINETABLE_CONST float * const ff_sine_windows[13]; |
7094
b0820b8bd4dd
Add generic ff_sine_window_init function and implement in codecs appropriately
superdump
parents:
6600
diff
changeset
|
866 |
10199 | 867 int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale); |
868 void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input); | |
869 void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input); | |
870 void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input); | |
871 void ff_mdct_end(FFTContext *s); | |
781 | 872 |
8694
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
873 /* Real Discrete Fourier Transform */ |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
874 |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
875 enum RDFTransformType { |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
876 RDFT, |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
877 IRDFT, |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
878 RIDFT, |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
879 IRIDFT, |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
880 }; |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
881 |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
882 typedef struct { |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
883 int nbits; |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
884 int inverse; |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
885 int sign_convention; |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
886 |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
887 /* pre/post rotation tables */ |
10402
06bdadf410a9
Do not initialize ff_cos_* tables again in rdft_init, they are already
reimar
parents:
10400
diff
changeset
|
888 const FFTSample *tcos; |
10408 | 889 SINTABLE_CONST FFTSample *tsin; |
8694
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
890 FFTContext fft; |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
891 } RDFTContext; |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
892 |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
893 /** |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
894 * Sets up a real FFT. |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
895 * @param nbits log2 of the length of the input array |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
896 * @param trans the type of transform |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
897 */ |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
898 int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans); |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
899 void ff_rdft_calc(RDFTContext *s, FFTSample *data); |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
900 void ff_rdft_end(RDFTContext *s); |
68fd157bab48
Add the rdft family of transforms (fft/ifft of an all real sequence) to dsputil.
alexc
parents:
8636
diff
changeset
|
901 |
10944 | 902 /* Discrete Cosine Transform */ |
903 | |
904 typedef struct { | |
905 int nbits; | |
906 int inverse; | |
907 FFTSample *data; | |
908 RDFTContext rdft; | |
909 const float *costab; | |
910 FFTSample *csc2; | |
911 } DCTContext; | |
912 | |
913 /** | |
914 * Sets up (Inverse)DCT. | |
915 * @param nbits log2 of the length of the input array | |
916 * @param inverse >0 forward transform, <0 inverse transform | |
917 */ | |
918 int ff_dct_init(DCTContext *s, int nbits, int inverse); | |
919 void ff_dct_calc(DCTContext *s, FFTSample *data); | |
920 void ff_dct_end (DCTContext *s); | |
921 | |
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
5997
diff
changeset
|
922 #define WRAPPER8_16(name8, name16)\ |
1708 | 923 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ |
924 return name8(s, dst , src , stride, h)\ | |
925 +name8(s, dst+8 , src+8 , stride, h);\ | |
926 } | |
927 | |
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
5997
diff
changeset
|
928 #define WRAPPER8_16_SQ(name8, name16)\ |
1708 | 929 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ |
930 int score=0;\ | |
931 score +=name8(s, dst , src , stride, 8);\ | |
932 score +=name8(s, dst+8 , src+8 , stride, 8);\ | |
933 if(h==16){\ | |
934 dst += 8*stride;\ | |
935 src += 8*stride;\ | |
936 score +=name8(s, dst , src , stride, 8);\ | |
937 score +=name8(s, dst+8 , src+8 , stride, 8);\ | |
938 }\ | |
939 return score;\ | |
936 | 940 } |
941 | |
4240 | 942 |
9436 | 943 static inline void copy_block2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) |
4240 | 944 { |
945 int i; | |
946 for(i=0; i<h; i++) | |
947 { | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
948 AV_WN16(dst , AV_RN16(src )); |
4240 | 949 dst+=dstStride; |
950 src+=srcStride; | |
951 } | |
952 } | |
953 | |
9436 | 954 static inline void copy_block4(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) |
4240 | 955 { |
956 int i; | |
957 for(i=0; i<h; i++) | |
958 { | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
959 AV_WN32(dst , AV_RN32(src )); |
4240 | 960 dst+=dstStride; |
961 src+=srcStride; | |
962 } | |
963 } | |
964 | |
9436 | 965 static inline void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) |
4240 | 966 { |
967 int i; | |
968 for(i=0; i<h; i++) | |
969 { | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
970 AV_WN32(dst , AV_RN32(src )); |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
971 AV_WN32(dst+4 , AV_RN32(src+4 )); |
4240 | 972 dst+=dstStride; |
973 src+=srcStride; | |
974 } | |
975 } | |
976 | |
9436 | 977 static inline void copy_block9(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) |
4240 | 978 { |
979 int i; | |
980 for(i=0; i<h; i++) | |
981 { | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
982 AV_WN32(dst , AV_RN32(src )); |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
983 AV_WN32(dst+4 , AV_RN32(src+4 )); |
4240 | 984 dst[8]= src[8]; |
985 dst+=dstStride; | |
986 src+=srcStride; | |
987 } | |
988 } | |
989 | |
9436 | 990 static inline void copy_block16(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) |
4240 | 991 { |
992 int i; | |
993 for(i=0; i<h; i++) | |
994 { | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
995 AV_WN32(dst , AV_RN32(src )); |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
996 AV_WN32(dst+4 , AV_RN32(src+4 )); |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
997 AV_WN32(dst+8 , AV_RN32(src+8 )); |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
998 AV_WN32(dst+12, AV_RN32(src+12)); |
4240 | 999 dst+=dstStride; |
1000 src+=srcStride; | |
1001 } | |
1002 } | |
1003 | |
9436 | 1004 static inline void copy_block17(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) |
4240 | 1005 { |
1006 int i; | |
1007 for(i=0; i<h; i++) | |
1008 { | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
1009 AV_WN32(dst , AV_RN32(src )); |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
1010 AV_WN32(dst+4 , AV_RN32(src+4 )); |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
1011 AV_WN32(dst+8 , AV_RN32(src+8 )); |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5291
diff
changeset
|
1012 AV_WN32(dst+12, AV_RN32(src+12)); |
4240 | 1013 dst[16]= src[16]; |
1014 dst+=dstStride; | |
1015 src+=srcStride; | |
1016 } | |
1017 } | |
1018 | |
7760 | 1019 #endif /* AVCODEC_DSPUTIL_H */ |