Mercurial > libavcodec.hg
annotate arm/dsputil_arm.c @ 8964:d08dc32559e7 libavcodec
Handle conditional compilation of CPU-specific FFT code in the Makefile.
Also replace one coarse MMX condition with more fine-grained
3DNow!/3DNowExt!/SSE conditions.
author | diego |
---|---|
date | Wed, 18 Feb 2009 00:11:49 +0000 |
parents | 04423b2f6e0b |
children | 2c1c28f26a27 |
rev | line source |
---|---|
61 | 1 /* |
8359 | 2 * ARM optimized DSP utils |
8629
04423b2f6e0b
cosmetics: Remove pointless period after copyright statement non-sentences.
diego
parents:
8596
diff
changeset
|
3 * Copyright (c) 2001 Lionel Ulmer |
61 | 4 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
429 | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
61 | 11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
61 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 * Lesser General Public License for more details. | |
61 | 16 * |
429 | 17 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
61 | 20 */ |
21 | |
6763 | 22 #include "libavcodec/dsputil.h" |
8590 | 23 #if HAVE_IPP |
6760
e6a7be7d85f2
ipp.h is a system header, use <> when #including it.
diego
parents:
6180
diff
changeset
|
24 #include <ipp.h> |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
25 #endif |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
26 |
8250 | 27 void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx); |
28 void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx); | |
8334 | 29 void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx); |
61 | 30 |
8250 | 31 void j_rev_dct_ARM(DCTELEM *data); |
32 void simple_idct_ARM(DCTELEM *data); | |
61 | 33 |
8250 | 34 void simple_idct_armv5te(DCTELEM *data); |
35 void simple_idct_put_armv5te(uint8_t *dest, int line_size, DCTELEM *data); | |
36 void simple_idct_add_armv5te(uint8_t *dest, int line_size, DCTELEM *data); | |
3769 | 37 |
8250 | 38 void ff_simple_idct_armv6(DCTELEM *data); |
39 void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); | |
40 void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); | |
4427 | 41 |
8335 | 42 void ff_simple_idct_neon(DCTELEM *data); |
43 void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); | |
44 void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data); | |
45 | |
1092 | 46 /* XXX: local hack */ |
47 static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); | |
48 static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); | |
49 | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
50 void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
51 void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
52 void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
53 void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
54 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
55 void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
56 void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
57 void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
58 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
59 void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
60 |
8250 | 61 void ff_prefetch_arm(void *mem, int stride, int h); |
8070 | 62 |
2735 | 63 CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8) |
64 CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8) | |
65 CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8) | |
66 CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8) | |
67 CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8) | |
68 CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8) | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
69 |
8250 | 70 void ff_add_pixels_clamped_ARM(short *block, unsigned char *dest, |
8072 | 71 int line_size); |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
72 |
1092 | 73 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
74 converted */ | |
1347
cca26199ab17
Optimized simple idct for arm by Frederic 'dilb' Boulay <dilb@handhelds.org>. Currently licensed under the GPLv2, but the author allowed to license it under the LGPL, feel free to change
al3x
parents:
1324
diff
changeset
|
75 static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block) |
1092 | 76 { |
77 j_rev_dct_ARM (block); | |
78 ff_put_pixels_clamped(block, dest, line_size); | |
79 } | |
1347
cca26199ab17
Optimized simple idct for arm by Frederic 'dilb' Boulay <dilb@handhelds.org>. Currently licensed under the GPLv2, but the author allowed to license it under the LGPL, feel free to change
al3x
parents:
1324
diff
changeset
|
80 static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block) |
61 | 81 { |
1092 | 82 j_rev_dct_ARM (block); |
83 ff_add_pixels_clamped(block, dest, line_size); | |
61 | 84 } |
1347
cca26199ab17
Optimized simple idct for arm by Frederic 'dilb' Boulay <dilb@handhelds.org>. Currently licensed under the GPLv2, but the author allowed to license it under the LGPL, feel free to change
al3x
parents:
1324
diff
changeset
|
85 static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block) |
cca26199ab17
Optimized simple idct for arm by Frederic 'dilb' Boulay <dilb@handhelds.org>. Currently licensed under the GPLv2, but the author allowed to license it under the LGPL, feel free to change
al3x
parents:
1324
diff
changeset
|
86 { |
cca26199ab17
Optimized simple idct for arm by Frederic 'dilb' Boulay <dilb@handhelds.org>. Currently licensed under the GPLv2, but the author allowed to license it under the LGPL, feel free to change
al3x
parents:
1324
diff
changeset
|
87 simple_idct_ARM (block); |
cca26199ab17
Optimized simple idct for arm by Frederic 'dilb' Boulay <dilb@handhelds.org>. Currently licensed under the GPLv2, but the author allowed to license it under the LGPL, feel free to change
al3x
parents:
1324
diff
changeset
|
88 ff_put_pixels_clamped(block, dest, line_size); |
cca26199ab17
Optimized simple idct for arm by Frederic 'dilb' Boulay <dilb@handhelds.org>. Currently licensed under the GPLv2, but the author allowed to license it under the LGPL, feel free to change
al3x
parents:
1324
diff
changeset
|
89 } |
cca26199ab17
Optimized simple idct for arm by Frederic 'dilb' Boulay <dilb@handhelds.org>. Currently licensed under the GPLv2, but the author allowed to license it under the LGPL, feel free to change
al3x
parents:
1324
diff
changeset
|
90 static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block) |
cca26199ab17
Optimized simple idct for arm by Frederic 'dilb' Boulay <dilb@handhelds.org>. Currently licensed under the GPLv2, but the author allowed to license it under the LGPL, feel free to change
al3x
parents:
1324
diff
changeset
|
91 { |
cca26199ab17
Optimized simple idct for arm by Frederic 'dilb' Boulay <dilb@handhelds.org>. Currently licensed under the GPLv2, but the author allowed to license it under the LGPL, feel free to change
al3x
parents:
1324
diff
changeset
|
92 simple_idct_ARM (block); |
cca26199ab17
Optimized simple idct for arm by Frederic 'dilb' Boulay <dilb@handhelds.org>. Currently licensed under the GPLv2, but the author allowed to license it under the LGPL, feel free to change
al3x
parents:
1324
diff
changeset
|
93 ff_add_pixels_clamped(block, dest, line_size); |
cca26199ab17
Optimized simple idct for arm by Frederic 'dilb' Boulay <dilb@handhelds.org>. Currently licensed under the GPLv2, but the author allowed to license it under the LGPL, feel free to change
al3x
parents:
1324
diff
changeset
|
94 } |
3726 | 95 |
8590 | 96 #if HAVE_IPP |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
97 static void simple_idct_ipp(DCTELEM *block) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
98 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
99 ippiDCT8x8Inv_Video_16s_C1I(block); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
100 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
101 static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
102 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
103 ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
104 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
105 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
106 void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
107 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
108 static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
109 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
110 ippiDCT8x8Inv_Video_16s_C1I(block); |
8590 | 111 #if HAVE_IWMMXT |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
112 add_pixels_clamped_iwmmxt(block, dest, line_size); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
113 #else |
8072 | 114 ff_add_pixels_clamped_ARM(block, dest, line_size); |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
115 #endif |
3726 | 116 } |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
117 #endif |
1092 | 118 |
7165 | 119 int mm_support(void) |
120 { | |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
121 return HAVE_IWMMXT * FF_MM_IWMMXT; |
7165 | 122 } |
123 | |
8359 | 124 void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) |
1092 | 125 { |
3726 | 126 int idct_algo= avctx->idct_algo; |
1092 | 127 |
128 ff_put_pixels_clamped = c->put_pixels_clamped; | |
129 ff_add_pixels_clamped = c->add_pixels_clamped; | |
130 | |
6179 | 131 if (avctx->lowres == 0) { |
6180 | 132 if(idct_algo == FF_IDCT_AUTO){ |
8590 | 133 #if HAVE_IPP |
6180 | 134 idct_algo = FF_IDCT_IPP; |
8590 | 135 #elif HAVE_NEON |
8335 | 136 idct_algo = FF_IDCT_SIMPLENEON; |
8590 | 137 #elif HAVE_ARMV6 |
6180 | 138 idct_algo = FF_IDCT_SIMPLEARMV6; |
8590 | 139 #elif HAVE_ARMV5TE |
6180 | 140 idct_algo = FF_IDCT_SIMPLEARMV5TE; |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
141 #else |
6180 | 142 idct_algo = FF_IDCT_ARM; |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
143 #endif |
6180 | 144 } |
3726 | 145 |
6180 | 146 if(idct_algo==FF_IDCT_ARM){ |
147 c->idct_put= j_rev_dct_ARM_put; | |
148 c->idct_add= j_rev_dct_ARM_add; | |
149 c->idct = j_rev_dct_ARM; | |
8267 | 150 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; |
6180 | 151 } else if (idct_algo==FF_IDCT_SIMPLEARM){ |
152 c->idct_put= simple_idct_ARM_put; | |
153 c->idct_add= simple_idct_ARM_add; | |
154 c->idct = simple_idct_ARM; | |
155 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
8590 | 156 #if HAVE_ARMV6 |
6180 | 157 } else if (idct_algo==FF_IDCT_SIMPLEARMV6){ |
158 c->idct_put= ff_simple_idct_put_armv6; | |
159 c->idct_add= ff_simple_idct_add_armv6; | |
160 c->idct = ff_simple_idct_armv6; | |
161 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | |
4427 | 162 #endif |
8590 | 163 #if HAVE_ARMV5TE |
6180 | 164 } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){ |
165 c->idct_put= simple_idct_put_armv5te; | |
166 c->idct_add= simple_idct_add_armv5te; | |
167 c->idct = simple_idct_armv5te; | |
168 c->idct_permutation_type = FF_NO_IDCT_PERM; | |
3769 | 169 #endif |
8590 | 170 #if HAVE_IPP |
6180 | 171 } else if (idct_algo==FF_IDCT_IPP){ |
172 c->idct_put= simple_idct_ipp_put; | |
173 c->idct_add= simple_idct_ipp_add; | |
174 c->idct = simple_idct_ipp; | |
175 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
3726 | 176 #endif |
8590 | 177 #if HAVE_NEON |
8335 | 178 } else if (idct_algo==FF_IDCT_SIMPLENEON){ |
179 c->idct_put= ff_simple_idct_put_neon; | |
180 c->idct_add= ff_simple_idct_add_neon; | |
181 c->idct = ff_simple_idct_neon; | |
182 c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM; | |
183 #endif | |
6180 | 184 } |
6179 | 185 } |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
186 |
5641
1e93e637fa21
Fix put_*_xy2_arm bug and enable put_pixels16_arm and put_pixels8_y2_arm.
diego
parents:
5010
diff
changeset
|
187 c->put_pixels_tab[0][0] = put_pixels16_arm; |
8267 | 188 c->put_pixels_tab[0][1] = put_pixels16_x2_arm; |
189 c->put_pixels_tab[0][2] = put_pixels16_y2_arm; | |
5641
1e93e637fa21
Fix put_*_xy2_arm bug and enable put_pixels16_arm and put_pixels8_y2_arm.
diego
parents:
5010
diff
changeset
|
190 c->put_pixels_tab[0][3] = put_pixels16_xy2_arm; |
1e93e637fa21
Fix put_*_xy2_arm bug and enable put_pixels16_arm and put_pixels8_y2_arm.
diego
parents:
5010
diff
changeset
|
191 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm; |
8267 | 192 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm; |
193 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm; | |
5641
1e93e637fa21
Fix put_*_xy2_arm bug and enable put_pixels16_arm and put_pixels8_y2_arm.
diego
parents:
5010
diff
changeset
|
194 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm; |
8267 | 195 c->put_pixels_tab[1][0] = put_pixels8_arm; |
196 c->put_pixels_tab[1][1] = put_pixels8_x2_arm; | |
5641
1e93e637fa21
Fix put_*_xy2_arm bug and enable put_pixels16_arm and put_pixels8_y2_arm.
diego
parents:
5010
diff
changeset
|
197 c->put_pixels_tab[1][2] = put_pixels8_y2_arm; |
1e93e637fa21
Fix put_*_xy2_arm bug and enable put_pixels16_arm and put_pixels8_y2_arm.
diego
parents:
5010
diff
changeset
|
198 c->put_pixels_tab[1][3] = put_pixels8_xy2_arm; |
8267 | 199 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm; |
200 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm; | |
201 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm; | |
5641
1e93e637fa21
Fix put_*_xy2_arm bug and enable put_pixels16_arm and put_pixels8_y2_arm.
diego
parents:
5010
diff
changeset
|
202 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm; |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
203 |
8590 | 204 #if HAVE_ARMV5TE |
8070 | 205 c->prefetch = ff_prefetch_arm; |
7688
160d5c1ae60a
ARM: add prefetch function using ARMv5 PLD instruction
mru
parents:
7166
diff
changeset
|
206 #endif |
160d5c1ae60a
ARM: add prefetch function using ARMv5 PLD instruction
mru
parents:
7166
diff
changeset
|
207 |
8590 | 208 #if HAVE_IWMMXT |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
209 dsputil_init_iwmmxt(c, avctx); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
1347
diff
changeset
|
210 #endif |
8590 | 211 #if HAVE_ARMVFP |
6786
18084aaa277b
Add some initial optimizations for ARM VFP (floating
gpoirier
parents:
6763
diff
changeset
|
212 ff_float_init_arm_vfp(c, avctx); |
18084aaa277b
Add some initial optimizations for ARM VFP (floating
gpoirier
parents:
6763
diff
changeset
|
213 #endif |
8590 | 214 #if HAVE_NEON |
8334 | 215 ff_dsputil_init_neon(c, avctx); |
216 #endif | |
1092 | 217 } |