annotate sh4/dsputil_sh4.c @ 3603:42b6cefc6c1a libavcodec

replacing MULH by asm for x86 about 30% faster imdct36()
author michael
date Tue, 22 Aug 2006 11:51:09 +0000
parents 0b546eab515d
children c8c591fe26f8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
1 /*
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
2 * sh4 dsputil
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
3 *
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
4 * Copyright (c) 2003 BERO <bero@geocities.co.jp>
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
5 *
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
6 * This library is free software; you can redistribute it and/or
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
9 * version 2 of the License, or (at your option) any later version.
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
10 *
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
11 * This library is distributed in the hope that it will be useful,
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
14 * Lesser General Public License for more details.
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
15 *
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
17 * License along with this library; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
19 */
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
20
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
21 #include "../avcodec.h"
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
22 #include "../dsputil.h"
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
23
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
24 static void memzero_align8(void *dst,size_t size)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
25 {
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
26 #if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
27 (char*)dst+=size;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
28 size/=8*4;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
29 asm(
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
30 #if defined(__SH4__)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
31 " fschg\n" //single float mode
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
32 #endif
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
33 " fldi0 fr0\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
34 " fldi0 fr1\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
35 " fschg\n" // double
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
36 "1: \n" \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
37 " dt %1\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
38 " fmov dr0,@-%0\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
39 " fmov dr0,@-%0\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
40 " fmov dr0,@-%0\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
41 " bf.s 1b\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
42 " fmov dr0,@-%0\n"
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
43 #if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
44 " fschg" //back to single
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
45 #endif
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
46 : : "r"(dst),"r"(size): "memory" );
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
47 #else
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
48 double *d = dst;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
49 size/=8*4;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
50 do {
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
51 d[0] = 0.0;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
52 d[1] = 0.0;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
53 d[2] = 0.0;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
54 d[3] = 0.0;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
55 d+=4;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
56 } while(--size);
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
57 #endif
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
58 }
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
59
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
60 static void clear_blocks_sh4(DCTELEM *blocks)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
61 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
62 // if (((int)blocks&7)==0)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
63 memzero_align8(blocks,sizeof(DCTELEM)*6*64);
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
64 }
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
65
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
66 extern void idct_sh4(DCTELEM *block);
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
67 static void idct_put(uint8_t *dest, int line_size, DCTELEM *block)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
68 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
69 idct_sh4(block);
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
70 int i;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
71 uint8_t *cm = cropTbl + MAX_NEG_CROP;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
72 for(i=0;i<8;i++) {
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
73 dest[0] = cm[block[0]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
74 dest[1] = cm[block[1]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
75 dest[2] = cm[block[2]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
76 dest[3] = cm[block[3]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
77 dest[4] = cm[block[4]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
78 dest[5] = cm[block[5]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
79 dest[6] = cm[block[6]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
80 dest[7] = cm[block[7]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
81 dest+=line_size;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
82 block+=8;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
83 }
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
84 }
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
85 static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
86 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
87 idct_sh4(block);
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
88 int i;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
89 uint8_t *cm = cropTbl + MAX_NEG_CROP;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
90 for(i=0;i<8;i++) {
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
91 dest[0] = cm[dest[0]+block[0]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
92 dest[1] = cm[dest[1]+block[1]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
93 dest[2] = cm[dest[2]+block[2]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
94 dest[3] = cm[dest[3]+block[3]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
95 dest[4] = cm[dest[4]+block[4]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
96 dest[5] = cm[dest[5]+block[5]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
97 dest[6] = cm[dest[6]+block[6]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
98 dest[7] = cm[dest[7]+block[7]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
99 dest+=line_size;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
100 block+=8;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
101 }
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
102 }
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
103
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
104 extern void dsputil_init_align(DSPContext* c, AVCodecContext *avctx);
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
105
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
106 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
107 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
108 const int idct_algo= avctx->idct_algo;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
109 dsputil_init_align(c,avctx);
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
110
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
111 c->clear_blocks = clear_blocks_sh4;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
112 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
113 c->idct_put = idct_put;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
114 c->idct_add = idct_add;
1324
7d328fd9d8a5 the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents: 1260
diff changeset
115 c->idct = idct_sh4;
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
116 c->idct_permutation_type= FF_NO_IDCT_PERM; //FF_SIMPLE_IDCT_PERM; //FF_LIBMPEG2_IDCT_PERM;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
117 }
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
118 }