annotate sh4/dsputil_sh4.c @ 3990:746a60ba3177 libavcodec

enable CMOV_IS_FAST as its faster or equal speed on every cpu (duron, athlon, PM, P3) from which ive seen benchmarks, it might be slower on P4 but noone has posted benchmarks ...
author michael
date Wed, 11 Oct 2006 12:23:40 +0000
parents c8c591fe26f8
children 23da44e8fd05
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
1 /*
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
2 * sh4 dsputil
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
3 *
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
4 * Copyright (c) 2003 BERO <bero@geocities.co.jp>
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
5 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
6 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
7 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
8 * FFmpeg is free software; you can redistribute it and/or
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
9 * modify it under the terms of the GNU Lesser General Public
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
10 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
11 * version 2.1 of the License, or (at your option) any later version.
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
12 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
13 * FFmpeg is distributed in the hope that it will be useful,
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
16 * Lesser General Public License for more details.
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
17 *
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
18 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
19 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
21 */
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
22
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
23 #include "../avcodec.h"
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
24 #include "../dsputil.h"
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
25
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
26 static void memzero_align8(void *dst,size_t size)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
27 {
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
28 #if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
29 (char*)dst+=size;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
30 size/=8*4;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
31 asm(
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
32 #if defined(__SH4__)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
33 " fschg\n" //single float mode
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
34 #endif
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
35 " fldi0 fr0\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
36 " fldi0 fr1\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
37 " fschg\n" // double
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
38 "1: \n" \
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
39 " dt %1\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
40 " fmov dr0,@-%0\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
41 " fmov dr0,@-%0\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
42 " fmov dr0,@-%0\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
43 " bf.s 1b\n"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
44 " fmov dr0,@-%0\n"
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
45 #if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
46 " fschg" //back to single
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
47 #endif
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
48 : : "r"(dst),"r"(size): "memory" );
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
49 #else
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
50 double *d = dst;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
51 size/=8*4;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
52 do {
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
53 d[0] = 0.0;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
54 d[1] = 0.0;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
55 d[2] = 0.0;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
56 d[3] = 0.0;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
57 d+=4;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
58 } while(--size);
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
59 #endif
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
60 }
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
61
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
62 static void clear_blocks_sh4(DCTELEM *blocks)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
63 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
64 // if (((int)blocks&7)==0)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
65 memzero_align8(blocks,sizeof(DCTELEM)*6*64);
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
66 }
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
67
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
68 extern void idct_sh4(DCTELEM *block);
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
69 static void idct_put(uint8_t *dest, int line_size, DCTELEM *block)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
70 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
71 idct_sh4(block);
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
72 int i;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
73 uint8_t *cm = cropTbl + MAX_NEG_CROP;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
74 for(i=0;i<8;i++) {
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
75 dest[0] = cm[block[0]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
76 dest[1] = cm[block[1]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
77 dest[2] = cm[block[2]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
78 dest[3] = cm[block[3]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
79 dest[4] = cm[block[4]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
80 dest[5] = cm[block[5]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
81 dest[6] = cm[block[6]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
82 dest[7] = cm[block[7]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
83 dest+=line_size;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
84 block+=8;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
85 }
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
86 }
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
87 static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
88 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
89 idct_sh4(block);
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
90 int i;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
91 uint8_t *cm = cropTbl + MAX_NEG_CROP;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
92 for(i=0;i<8;i++) {
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
93 dest[0] = cm[dest[0]+block[0]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
94 dest[1] = cm[dest[1]+block[1]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
95 dest[2] = cm[dest[2]+block[2]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
96 dest[3] = cm[dest[3]+block[3]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
97 dest[4] = cm[dest[4]+block[4]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
98 dest[5] = cm[dest[5]+block[5]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
99 dest[6] = cm[dest[6]+block[6]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
100 dest[7] = cm[dest[7]+block[7]];
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
101 dest+=line_size;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
102 block+=8;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
103 }
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
104 }
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
105
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
106 extern void dsputil_init_align(DSPContext* c, AVCodecContext *avctx);
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
107
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
108 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
109 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
110 const int idct_algo= avctx->idct_algo;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
111 dsputil_init_align(c,avctx);
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
112
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
113 c->clear_blocks = clear_blocks_sh4;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
114 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
115 c->idct_put = idct_put;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
116 c->idct_add = idct_add;
1324
7d328fd9d8a5 the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents: 1260
diff changeset
117 c->idct = idct_sh4;
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
118 c->idct_permutation_type= FF_NO_IDCT_PERM; //FF_SIMPLE_IDCT_PERM; //FF_LIBMPEG2_IDCT_PERM;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
119 }
1259
e8c3884f2c7e sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff changeset
120 }