Mercurial > libavcodec.hg
comparison arm/dsputil_iwmmxt.c @ 8359:9281a8a9387a libavcodec
ARM: replace "armv4l" with "arm"
author | mru |
---|---|
date | Wed, 17 Dec 2008 00:54:54 +0000 |
parents | armv4l/dsputil_iwmmxt.c@0d108ec85620 |
children | 08573f5f587d |
comparison
equal
deleted
inserted
replaced
8358:c30b92cf446b | 8359:9281a8a9387a |
---|---|
1 /* | |
2 * iWMMXt optimized DSP utils | |
3 * Copyright (c) 2004 AGAWA Koji | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 #include "libavcodec/dsputil.h" | |
23 | |
24 #define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt | |
25 #define SET_RND(regd) __asm__ volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12"); | |
26 #define WAVG2B "wavg2b" | |
27 #include "dsputil_iwmmxt_rnd_template.c" | |
28 #undef DEF | |
29 #undef SET_RND | |
30 #undef WAVG2B | |
31 | |
32 #define DEF(x, y) x ## _ ## y ##_iwmmxt | |
33 #define SET_RND(regd) __asm__ volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12"); | |
34 #define WAVG2B "wavg2br" | |
35 #include "dsputil_iwmmxt_rnd_template.c" | |
36 #undef DEF | |
37 #undef SET_RND | |
38 #undef WAVG2BR | |
39 | |
40 // need scheduling | |
41 #define OP(AVG) \ | |
42 __asm__ volatile ( \ | |
43 /* alignment */ \ | |
44 "and r12, %[pixels], #7 \n\t" \ | |
45 "bic %[pixels], %[pixels], #7 \n\t" \ | |
46 "tmcr wcgr1, r12 \n\t" \ | |
47 \ | |
48 "wldrd wr0, [%[pixels]] \n\t" \ | |
49 "wldrd wr1, [%[pixels], #8] \n\t" \ | |
50 "add %[pixels], %[pixels], %[line_size] \n\t" \ | |
51 "walignr1 wr4, wr0, wr1 \n\t" \ | |
52 \ | |
53 "1: \n\t" \ | |
54 \ | |
55 "wldrd wr2, [%[pixels]] \n\t" \ | |
56 "wldrd wr3, [%[pixels], #8] \n\t" \ | |
57 "add %[pixels], %[pixels], %[line_size] \n\t" \ | |
58 "pld [%[pixels]] \n\t" \ | |
59 "walignr1 wr5, wr2, wr3 \n\t" \ | |
60 AVG " wr6, wr4, wr5 \n\t" \ | |
61 "wstrd wr6, [%[block]] \n\t" \ | |
62 "add %[block], %[block], %[line_size] \n\t" \ | |
63 \ | |
64 "wldrd wr0, [%[pixels]] \n\t" \ | |
65 "wldrd wr1, [%[pixels], #8] \n\t" \ | |
66 "add %[pixels], %[pixels], %[line_size] \n\t" \ | |
67 "walignr1 wr4, wr0, wr1 \n\t" \ | |
68 "pld [%[pixels]] \n\t" \ | |
69 AVG " wr6, wr4, wr5 \n\t" \ | |
70 "wstrd wr6, [%[block]] \n\t" \ | |
71 "add %[block], %[block], %[line_size] \n\t" \ | |
72 \ | |
73 "subs %[h], %[h], #2 \n\t" \ | |
74 "bne 1b \n\t" \ | |
75 : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) \ | |
76 : [line_size]"r"(line_size) \ | |
77 : "memory", "r12"); | |
78 void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h) | |
79 { | |
80 OP("wavg2br"); | |
81 } | |
82 void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h) | |
83 { | |
84 OP("wavg2b"); | |
85 } | |
86 #undef OP | |
87 | |
88 void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size) | |
89 { | |
90 uint8_t *pixels2 = pixels + line_size; | |
91 | |
92 __asm__ volatile ( | |
93 "mov r12, #4 \n\t" | |
94 "1: \n\t" | |
95 "pld [%[pixels], %[line_size2]] \n\t" | |
96 "pld [%[pixels2], %[line_size2]] \n\t" | |
97 "wldrd wr4, [%[pixels]] \n\t" | |
98 "wldrd wr5, [%[pixels2]] \n\t" | |
99 "pld [%[block], #32] \n\t" | |
100 "wunpckelub wr6, wr4 \n\t" | |
101 "wldrd wr0, [%[block]] \n\t" | |
102 "wunpckehub wr7, wr4 \n\t" | |
103 "wldrd wr1, [%[block], #8] \n\t" | |
104 "wunpckelub wr8, wr5 \n\t" | |
105 "wldrd wr2, [%[block], #16] \n\t" | |
106 "wunpckehub wr9, wr5 \n\t" | |
107 "wldrd wr3, [%[block], #24] \n\t" | |
108 "add %[block], %[block], #32 \n\t" | |
109 "waddhss wr10, wr0, wr6 \n\t" | |
110 "waddhss wr11, wr1, wr7 \n\t" | |
111 "waddhss wr12, wr2, wr8 \n\t" | |
112 "waddhss wr13, wr3, wr9 \n\t" | |
113 "wpackhus wr14, wr10, wr11 \n\t" | |
114 "wpackhus wr15, wr12, wr13 \n\t" | |
115 "wstrd wr14, [%[pixels]] \n\t" | |
116 "add %[pixels], %[pixels], %[line_size2] \n\t" | |
117 "subs r12, r12, #1 \n\t" | |
118 "wstrd wr15, [%[pixels2]] \n\t" | |
119 "add %[pixels2], %[pixels2], %[line_size2] \n\t" | |
120 "bne 1b \n\t" | |
121 : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2) | |
122 : [line_size2]"r"(line_size << 1) | |
123 : "cc", "memory", "r12"); | |
124 } | |
125 | |
126 static void clear_blocks_iwmmxt(DCTELEM *blocks) | |
127 { | |
128 __asm__ volatile( | |
129 "wzero wr0 \n\t" | |
130 "mov r1, #(128 * 6 / 32) \n\t" | |
131 "1: \n\t" | |
132 "wstrd wr0, [%0] \n\t" | |
133 "wstrd wr0, [%0, #8] \n\t" | |
134 "wstrd wr0, [%0, #16] \n\t" | |
135 "wstrd wr0, [%0, #24] \n\t" | |
136 "subs r1, r1, #1 \n\t" | |
137 "add %0, %0, #32 \n\t" | |
138 "bne 1b \n\t" | |
139 : "+r"(blocks) | |
140 : | |
141 : "r1" | |
142 ); | |
143 } | |
144 | |
145 static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
146 { | |
147 return; | |
148 } | |
149 | |
150 /* A run time test is not simple. If this file is compiled in | |
151 * then we should install the functions | |
152 */ | |
153 int mm_flags = FF_MM_IWMMXT; /* multimedia extension flags */ | |
154 | |
155 void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) | |
156 { | |
157 if (avctx->dsp_mask) { | |
158 if (avctx->dsp_mask & FF_MM_FORCE) | |
159 mm_flags |= (avctx->dsp_mask & 0xffff); | |
160 else | |
161 mm_flags &= ~(avctx->dsp_mask & 0xffff); | |
162 } | |
163 | |
164 if (!(mm_flags & FF_MM_IWMMXT)) return; | |
165 | |
166 c->add_pixels_clamped = add_pixels_clamped_iwmmxt; | |
167 | |
168 c->clear_blocks = clear_blocks_iwmmxt; | |
169 | |
170 c->put_pixels_tab[0][0] = put_pixels16_iwmmxt; | |
171 c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt; | |
172 c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt; | |
173 c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt; | |
174 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt; | |
175 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt; | |
176 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt; | |
177 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt; | |
178 | |
179 c->put_pixels_tab[1][0] = put_pixels8_iwmmxt; | |
180 c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt; | |
181 c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt; | |
182 c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt; | |
183 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt; | |
184 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt; | |
185 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt; | |
186 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt; | |
187 | |
188 c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt; | |
189 c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt; | |
190 c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt; | |
191 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt; | |
192 c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt; | |
193 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt; | |
194 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt; | |
195 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt; | |
196 | |
197 c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt; | |
198 c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt; | |
199 c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt; | |
200 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt; | |
201 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt; | |
202 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt; | |
203 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt; | |
204 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt; | |
205 } |