Mercurial > libavcodec.hg
annotate sh4/dsputil_sh4.c @ 3603:42b6cefc6c1a libavcodec
replacing MULH by asm for x86
about 30% faster imdct36()
author | michael |
---|---|
date | Tue, 22 Aug 2006 11:51:09 +0000 |
parents | 0b546eab515d |
children | c8c591fe26f8 |
rev | line source |
---|---|
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
1 /* |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
2 * sh4 dsputil |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
3 * |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
4 * Copyright (c) 2003 BERO <bero@geocities.co.jp> |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
5 * |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
6 * This library is free software; you can redistribute it and/or |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
7 * modify it under the terms of the GNU Lesser General Public |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
8 * License as published by the Free Software Foundation; either |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
9 * version 2 of the License, or (at your option) any later version. |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
10 * |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
11 * This library is distributed in the hope that it will be useful, |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
14 * Lesser General Public License for more details. |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
15 * |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
16 * You should have received a copy of the GNU Lesser General Public |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
17 * License along with this library; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
19 */ |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
20 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
21 #include "../avcodec.h" |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
22 #include "../dsputil.h" |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
23 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
24 static void memzero_align8(void *dst,size_t size) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
25 { |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
26 #if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) |
2979 | 27 (char*)dst+=size; |
28 size/=8*4; | |
29 asm( | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
30 #if defined(__SH4__) |
2979 | 31 " fschg\n" //single float mode |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
32 #endif |
2979 | 33 " fldi0 fr0\n" |
34 " fldi0 fr1\n" | |
35 " fschg\n" // double | |
36 "1: \n" \ | |
37 " dt %1\n" | |
38 " fmov dr0,@-%0\n" | |
39 " fmov dr0,@-%0\n" | |
40 " fmov dr0,@-%0\n" | |
41 " bf.s 1b\n" | |
42 " fmov dr0,@-%0\n" | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
43 #if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) |
2979 | 44 " fschg" //back to single |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
45 #endif |
2979 | 46 : : "r"(dst),"r"(size): "memory" ); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
47 #else |
2979 | 48 double *d = dst; |
49 size/=8*4; | |
50 do { | |
51 d[0] = 0.0; | |
52 d[1] = 0.0; | |
53 d[2] = 0.0; | |
54 d[3] = 0.0; | |
55 d+=4; | |
56 } while(--size); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
57 #endif |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
58 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
59 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
60 static void clear_blocks_sh4(DCTELEM *blocks) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
61 { |
2979 | 62 // if (((int)blocks&7)==0) |
63 memzero_align8(blocks,sizeof(DCTELEM)*6*64); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
64 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
65 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
66 extern void idct_sh4(DCTELEM *block); |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
67 static void idct_put(uint8_t *dest, int line_size, DCTELEM *block) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
68 { |
2979 | 69 idct_sh4(block); |
70 int i; | |
71 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
72 for(i=0;i<8;i++) { | |
73 dest[0] = cm[block[0]]; | |
74 dest[1] = cm[block[1]]; | |
75 dest[2] = cm[block[2]]; | |
76 dest[3] = cm[block[3]]; | |
77 dest[4] = cm[block[4]]; | |
78 dest[5] = cm[block[5]]; | |
79 dest[6] = cm[block[6]]; | |
80 dest[7] = cm[block[7]]; | |
81 dest+=line_size; | |
82 block+=8; | |
83 } | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
84 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
85 static void idct_add(uint8_t *dest, int line_size, DCTELEM *block) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
86 { |
2979 | 87 idct_sh4(block); |
88 int i; | |
89 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
90 for(i=0;i<8;i++) { | |
91 dest[0] = cm[dest[0]+block[0]]; | |
92 dest[1] = cm[dest[1]+block[1]]; | |
93 dest[2] = cm[dest[2]+block[2]]; | |
94 dest[3] = cm[dest[3]+block[3]]; | |
95 dest[4] = cm[dest[4]+block[4]]; | |
96 dest[5] = cm[dest[5]+block[5]]; | |
97 dest[6] = cm[dest[6]+block[6]]; | |
98 dest[7] = cm[dest[7]+block[7]]; | |
99 dest+=line_size; | |
100 block+=8; | |
101 } | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
102 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
103 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
104 extern void dsputil_init_align(DSPContext* c, AVCodecContext *avctx); |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
105 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
106 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
107 { |
2979 | 108 const int idct_algo= avctx->idct_algo; |
109 dsputil_init_align(c,avctx); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
110 |
2979 | 111 c->clear_blocks = clear_blocks_sh4; |
112 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){ | |
113 c->idct_put = idct_put; | |
114 c->idct_add = idct_add; | |
1324
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1260
diff
changeset
|
115 c->idct = idct_sh4; |
2979 | 116 c->idct_permutation_type= FF_NO_IDCT_PERM; //FF_SIMPLE_IDCT_PERM; //FF_LIBMPEG2_IDCT_PERM; |
117 } | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
118 } |