Mercurial > libavcodec.hg
comparison bfin/dsputil_bfin.c @ 4765:85298e8c55c4 libavcodec
bfin dsputils, basic pixel operations sads, diffs, motion compensation
and standard IEEE 8x8 block transforms
patch by Marc Hoffman, mmh pleasantst com
author | diego |
---|---|
date | Sun, 01 Apr 2007 22:28:45 +0000 |
parents | c8c591fe26f8 |
children | 65ee324848ac |
comparison
equal
deleted
inserted
replaced
4764:da0598df2e53 | 4765:85298e8c55c4 |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2006 Michael Benjamin | 2 * BlackFin DSPUTILS |
3 * | |
4 * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> | |
5 * Copyright (c) 2006 Michael Benjamin <michael.benjamin@analog.com> | |
3 * | 6 * |
4 * This file is part of FFmpeg. | 7 * This file is part of FFmpeg. |
5 * | 8 * |
6 * FFmpeg is free software; you can redistribute it and/or | 9 * FFmpeg is free software; you can redistribute it and/or |
7 * modify it under the terms of the GNU Lesser General Public | 10 * modify it under the terms of the GNU Lesser General Public |
16 * You should have received a copy of the GNU Lesser General Public | 19 * You should have received a copy of the GNU Lesser General Public |
17 * License along with FFmpeg; if not, write to the Free Software | 20 * License along with FFmpeg; if not, write to the Free Software |
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 */ | 22 */ |
20 | 23 |
24 #include <unistd.h> | |
25 #include <bits/bfin_sram.h> | |
21 #include "../avcodec.h" | 26 #include "../avcodec.h" |
22 #include "../dsputil.h" | 27 #include "../dsputil.h" |
23 | 28 |
24 static int sad8x8_bfin( void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h ) | 29 #define USE_L1CODE |
25 { | 30 |
26 int sum; | 31 #ifdef USE_L1CODE |
27 __asm__ __volatile__ ( | 32 #define L1CODE __attribute__ ((l1_text)) |
28 "P0 = %1;" // blk1 | 33 #else |
29 "P1 = %2;" // blk2 | 34 #define L1CODE |
30 "P2 = %3;\n" // h | 35 #endif |
31 "I0 = P0;" | 36 int off; |
32 "I1 = P1;\n" | 37 |
33 "A0 = 0;" | 38 |
34 "A1 = 0;\n" | 39 extern void ff_bfin_idct (DCTELEM *block) L1CODE; |
35 "M0 = P2;\n" | 40 extern void ff_bfin_fdct (DCTELEM *block) L1CODE; |
36 "P3 = 32;\n" | 41 extern void ff_bfin_add_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) L1CODE; |
37 "LSETUP (sad8x8LoopBegin, sad8x8LoopEnd) LC0=P3;\n" | 42 extern void ff_bfin_put_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) L1CODE; |
38 "sad8x8LoopBegin:\n" | 43 extern void ff_bfin_diff_pixels (DCTELEM *block, uint8_t *s1, uint8_t *s2, int stride) L1CODE; |
39 " DISALGNEXCPT || R0 = [I0] || R2 = [I1];\n" | 44 extern void ff_bfin_get_pixels (DCTELEM *restrict block, const uint8_t *pixels, int line_size) L1CODE; |
40 " DISALGNEXCPT || R1 = [I0++] || R3 = [I1++];\n" | 45 extern int ff_bfin_pix_norm1 (uint8_t * pix, int line_size) L1CODE; |
41 "sad8x8LoopEnd:\n" | 46 extern int ff_bfin_z_sad8x8 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) L1CODE; |
42 " SAA ( R1:0 , R3:2 );\n" | 47 extern int ff_bfin_z_sad16x16 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) L1CODE; |
43 "R3 = A1.L + A1.H, R2 = A0.L + A0.H;\n" | 48 |
44 "%0 = R2 + R3 (S);\n" | 49 extern void ff_bfin_z_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) L1CODE; |
45 : "=&d" (sum) | 50 extern void ff_bfin_z_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) L1CODE; |
46 : "m"(blk1), "m"(blk2), "m"(h) | 51 extern void ff_bfin_put_pixels16_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) L1CODE; |
47 : "P0","P1","P2","I0","I1","A0","A1","R0","R1","R2","R3"); | 52 extern void ff_bfin_put_pixels8_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) L1CODE; |
48 return sum; | 53 |
49 } | 54 |
55 extern int ff_bfin_pix_sum (uint8_t *p, int stride) L1CODE; | |
56 | |
57 extern void ff_bfin_put_pixels8uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) L1CODE; | |
58 extern void ff_bfin_put_pixels16uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) L1CODE; | |
59 extern void ff_bfin_put_pixels8uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) L1CODE; | |
60 extern void ff_bfin_put_pixels16uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) L1CODE; | |
61 | |
62 extern int ff_bfin_sse4 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) L1CODE; | |
63 extern int ff_bfin_sse8 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) L1CODE; | |
64 extern int ff_bfin_sse16 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) L1CODE; | |
65 | |
66 | |
67 #if 0 | |
68 void pblk (uint8_t *p, int w, int h, int s) | |
69 { | |
70 int i,j; | |
71 av_log (0,0,"0x%08x:\n", p); | |
72 for (i = 0;i<h;i++) { | |
73 for (j=0;j<w;j++) | |
74 av_log (0,0,"%3d ", p[j]); | |
75 p+=s; | |
76 av_log (0,0,"\n"); | |
77 } | |
78 av_log (0,0,"\n"); | |
79 } | |
80 #endif | |
81 | |
82 static void bfin_idct_add (uint8_t *dest, int line_size, DCTELEM *block) | |
83 { | |
84 ff_bfin_idct (block); | |
85 ff_bfin_add_pixels_clamped (block, dest, line_size); | |
86 } | |
87 | |
88 static void bfin_idct_put (uint8_t *dest, int line_size, DCTELEM *block) | |
89 { | |
90 ff_bfin_idct (block); | |
91 ff_bfin_put_pixels_clamped (block, dest, line_size); | |
92 } | |
93 | |
94 | |
95 static void bfin_clear_blocks (DCTELEM *blocks) | |
96 { | |
97 // This is just a simple memset. | |
98 // | |
99 asm("P0=192; " | |
100 "I0=%0; " | |
101 "R0=0; " | |
102 "LSETUP(clear_blocks_blkfn_lab,clear_blocks_blkfn_lab)LC0=P0;" | |
103 "clear_blocks_blkfn_lab:" | |
104 "[I0++]=R0;" | |
105 ::"a" (blocks):"P0","I0","R0"); | |
106 } | |
107 | |
108 | |
109 | |
110 static void bfin_put_pixels8 (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
111 { | |
112 ff_bfin_put_pixels8uc (block, pixels, pixels, line_size, line_size, h); | |
113 } | |
114 | |
115 static void bfin_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
116 { | |
117 ff_bfin_put_pixels8uc (block, pixels, pixels+1, line_size, line_size, h); | |
118 } | |
119 | |
120 static void bfin_put_pixels8_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
121 { | |
122 ff_bfin_put_pixels8uc (block, pixels, pixels+line_size, line_size, line_size, h); | |
123 } | |
124 | |
125 static void bfin_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h) | |
126 { | |
127 ff_bfin_z_put_pixels8_xy2 (block,s0,line_size, line_size, h); | |
128 } | |
129 | |
130 static void bfin_put_pixels16 (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
131 { | |
132 ff_bfin_put_pixels16uc (block, pixels, pixels, line_size, line_size, h); | |
133 } | |
134 | |
135 static void bfin_put_pixels16_x2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
136 { | |
137 ff_bfin_put_pixels16uc (block, pixels, pixels+1, line_size, line_size, h); | |
138 } | |
139 | |
140 static void bfin_put_pixels16_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
141 { | |
142 ff_bfin_put_pixels16uc (block, pixels, pixels+line_size, line_size, line_size, h); | |
143 } | |
144 | |
145 static void bfin_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h) | |
146 { | |
147 ff_bfin_z_put_pixels16_xy2 (block,s0,line_size, line_size, h); | |
148 } | |
149 | |
150 void bfin_put_pixels8_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
151 { | |
152 ff_bfin_put_pixels8uc_nornd (block, pixels, pixels, line_size, h); | |
153 } | |
154 | |
155 static void bfin_put_pixels8_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
156 { | |
157 ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+1, line_size, h); | |
158 } | |
159 | |
160 static void bfin_put_pixels8_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
161 { | |
162 ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+line_size, line_size, h); | |
163 } | |
164 | |
165 | |
166 void bfin_put_pixels16_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
167 { | |
168 ff_bfin_put_pixels16uc_nornd (block, pixels, pixels, line_size, h); | |
169 } | |
170 | |
171 static void bfin_put_pixels16_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
172 { | |
173 ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+1, line_size, h); | |
174 } | |
175 | |
176 static void bfin_put_pixels16_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
177 { | |
178 ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+line_size, line_size, h); | |
179 } | |
180 | |
181 static int bfin_pix_abs16 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) | |
182 { | |
183 return ff_bfin_z_sad16x16 (blk1,blk2,line_size,line_size,h); | |
184 } | |
185 | |
186 static uint8_t vtmp_blk[256] __attribute__((l1_data_B)); | |
187 | |
188 static int bfin_pix_abs16_x2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) | |
189 { | |
190 ff_bfin_put_pixels16uc (vtmp_blk, blk2, blk2+1, 16, line_size, h); | |
191 return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h); | |
192 } | |
193 | |
194 static int bfin_pix_abs16_y2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) | |
195 { | |
196 ff_bfin_put_pixels16uc (vtmp_blk, blk2, blk2+line_size, 16, line_size, h); | |
197 return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h); | |
198 } | |
199 | |
200 static int bfin_pix_abs16_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) | |
201 { | |
202 ff_bfin_z_put_pixels16_xy2 (vtmp_blk, blk2, 16, line_size, h); | |
203 return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h); | |
204 } | |
205 | |
206 static int bfin_pix_abs8 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) | |
207 { | |
208 return ff_bfin_z_sad8x8 (blk1,blk2,line_size,line_size, h); | |
209 } | |
210 | |
211 static int bfin_pix_abs8_x2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) | |
212 { | |
213 ff_bfin_put_pixels8uc (vtmp_blk, blk2, blk2+1, 8, line_size, h); | |
214 return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h); | |
215 } | |
216 | |
217 static int bfin_pix_abs8_y2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) | |
218 { | |
219 ff_bfin_put_pixels8uc (vtmp_blk, blk2, blk2+line_size, 8, line_size, h); | |
220 return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h); | |
221 } | |
222 | |
223 static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) | |
224 { | |
225 ff_bfin_z_put_pixels8_xy2 (vtmp_blk, blk2, 8, line_size, h); | |
226 return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h); | |
227 } | |
228 | |
229 | |
230 /* | |
231 decoder optimization | |
232 start on 2/11 100 frames of 352x240@25 compiled with no optimization -g debugging | |
233 9.824s ~ 2.44x off | |
234 6.360s ~ 1.58x off with -O2 | |
235 5.740s ~ 1.43x off with idcts | |
236 | |
237 2.64s 2/20 same sman.mp4 decode only | |
238 | |
239 */ | |
50 | 240 |
51 void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx ) | 241 void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx ) |
52 { | 242 { |
53 c->pix_abs[1][0] = sad8x8_bfin; | 243 c->get_pixels = ff_bfin_get_pixels; |
54 c->sad[1] = sad8x8_bfin; | 244 c->diff_pixels = ff_bfin_diff_pixels; |
55 } | 245 c->put_pixels_clamped = ff_bfin_put_pixels_clamped; |
246 c->add_pixels_clamped = ff_bfin_add_pixels_clamped; | |
247 | |
248 c->clear_blocks = bfin_clear_blocks; | |
249 c->pix_sum = ff_bfin_pix_sum; | |
250 c->pix_norm1 = ff_bfin_pix_norm1; | |
251 | |
252 c->sad[0] = bfin_pix_abs16; | |
253 c->sad[1] = bfin_pix_abs8; | |
254 | |
255 /* TODO [0] 16 [1] 8 */ | |
256 c->pix_abs[0][0] = bfin_pix_abs16; | |
257 c->pix_abs[0][1] = bfin_pix_abs16_x2; | |
258 c->pix_abs[0][2] = bfin_pix_abs16_y2; | |
259 c->pix_abs[0][3] = bfin_pix_abs16_xy2; | |
260 | |
261 c->pix_abs[1][0] = bfin_pix_abs8; | |
262 c->pix_abs[1][1] = bfin_pix_abs8_x2; | |
263 c->pix_abs[1][2] = bfin_pix_abs8_y2; | |
264 c->pix_abs[1][3] = bfin_pix_abs8_xy2; | |
265 | |
266 | |
267 c->sse[0] = ff_bfin_sse16; | |
268 c->sse[1] = ff_bfin_sse8; | |
269 c->sse[2] = ff_bfin_sse4; | |
270 | |
271 | |
272 /** | |
273 * Halfpel motion compensation with rounding (a+b+1)>>1. | |
274 * This is an array[4][4] of motion compensation functions for 4 | |
275 * horizontal blocksizes (8,16) and the 4 halfpel positions | |
276 * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] | |
277 * @param block destination where the result is stored | |
278 * @param pixels source | |
279 * @param line_size number of bytes in a horizontal line of block | |
280 * @param h height | |
281 */ | |
282 | |
283 c->put_pixels_tab[0][0] = bfin_put_pixels16; | |
284 c->put_pixels_tab[0][1] = bfin_put_pixels16_x2; | |
285 c->put_pixels_tab[0][2] = bfin_put_pixels16_y2; | |
286 c->put_pixels_tab[0][3] = bfin_put_pixels16_xy2; | |
287 | |
288 c->put_pixels_tab[1][0] = bfin_put_pixels8; | |
289 c->put_pixels_tab[1][1] = bfin_put_pixels8_x2; | |
290 c->put_pixels_tab[1][2] = bfin_put_pixels8_y2; | |
291 c->put_pixels_tab[1][3] = bfin_put_pixels8_xy2; | |
292 | |
293 c->put_no_rnd_pixels_tab[1][0] = bfin_put_pixels8_nornd; | |
294 c->put_no_rnd_pixels_tab[1][1] = bfin_put_pixels8_x2_nornd; | |
295 c->put_no_rnd_pixels_tab[1][2] = bfin_put_pixels8_y2_nornd; | |
296 c->put_no_rnd_pixels_tab[1][3] = ff_bfin_put_pixels8_xy2_nornd; | |
297 | |
298 c->put_no_rnd_pixels_tab[0][0] = bfin_put_pixels16_nornd; | |
299 c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd; | |
300 c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd; | |
301 c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; | |
302 | |
303 c->fdct = ff_bfin_fdct; | |
304 c->idct = ff_bfin_idct; | |
305 c->idct_add = bfin_idct_add; | |
306 c->idct_put = bfin_idct_put; | |
307 } | |
308 | |
309 | |
310 |