Mercurial > libavcodec.hg
annotate ps2/dsputil_mmi.c @ 10102:76eeb9e3599b libavcodec
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
1.3x and 3% on G4.
Though I think only part of this speedup is due to my optimizations per se;
some of it is that I got a better roll on the GCC random code generator.
Trivial reorderings of this function have a disproportionate effect on speed.
author | lorenm |
---|---|
date | Thu, 27 Aug 2009 13:41:11 +0000 |
parents | 04423b2f6e0b |
children |
rev | line source |
---|---|
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
1 /* |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
2 * MMI optimized DSP utils |
8629
04423b2f6e0b
cosmetics: Remove pointless period after copyright statement non-sentences.
diego
parents:
8031
diff
changeset
|
3 * Copyright (c) 2000, 2001 Fabrice Bellard |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
4 * |
5214 | 5 * MMI optimization by Leon van Stuivenberg |
6 * clear_blocks_mmi() by BroadQ | |
7 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
8 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
9 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
10 * FFmpeg is free software; you can redistribute it and/or |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
11 * modify it under the terms of the GNU Lesser General Public |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
12 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
13 * version 2.1 of the License, or (at your option) any later version. |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
14 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
15 * FFmpeg is distributed in the hope that it will be useful, |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
18 * Lesser General Public License for more details. |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
19 * |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
20 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
21 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
23 */ |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
24 |
6763 | 25 #include "libavcodec/dsputil.h" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
26 #include "mmi.h" |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
27 |
1092 | 28 void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block); |
29 void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block); | |
1324
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1146
diff
changeset
|
30 void ff_mmi_idct(DCTELEM *block); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
31 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
32 static void clear_blocks_mmi(DCTELEM * blocks) |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
33 { |
8031 | 34 __asm__ volatile( |
1143
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
35 ".set noreorder \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
36 "addiu $9, %0, 768 \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
37 "nop \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
38 "1: \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
39 "sq $0, 0(%0) \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
40 "move $8, %0 \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
41 "addi %0, %0, 64 \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
42 "sq $0, 16($8) \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
43 "slt $10, %0, $9 \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
44 "sq $0, 32($8) \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
45 "bnez $10, 1b \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
46 "sq $0, 48($8) \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
47 ".set reorder \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
48 : "+r" (blocks) :: "$8", "$9", "memory" ); |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
49 } |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
50 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
51 |
1064 | 52 static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size) |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
53 { |
8031 | 54 __asm__ volatile( |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
55 ".set push \n\t" |
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
56 ".set mips3 \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
57 "ld $8, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
58 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
59 "ld $9, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
60 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
61 "ld $10, 0(%0) \n\t" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
62 "pextlb $8, $0, $8 \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
63 "sq $8, 0(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
64 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
65 "ld $8, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
66 "pextlb $9, $0, $9 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
67 "sq $9, 16(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
68 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
69 "ld $9, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
70 "pextlb $10, $0, $10 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
71 "sq $10, 32(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
72 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
73 "ld $10, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
74 "pextlb $8, $0, $8 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
75 "sq $8, 48(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
76 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
77 "ld $8, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
78 "pextlb $9, $0, $9 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
79 "sq $9, 64(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
80 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
81 "ld $9, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
82 "pextlb $10, $0, $10 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
83 "sq $10, 80(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
84 "pextlb $8, $0, $8 \n\t" |
2979 | 85 "sq $8, 96(%1) \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
86 "pextlb $9, $0, $9 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
87 "sq $9, 112(%1) \n\t" |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
88 ".set pop \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
89 : "+r" (pixels) : "r" (block), "r" (line_size) : "$8", "$9", "$10", "memory" ); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
90 } |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
91 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
92 |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
93 static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
94 { |
8031 | 95 __asm__ volatile( |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
96 ".set push \n\t" |
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
97 ".set mips3 \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
98 "1: \n\t" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
99 "ldr $8, 0(%1) \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
100 "addiu %2, %2, -1 \n\t" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
101 "ldl $8, 7(%1) \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
102 "add %1, %1, %3 \n\t" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
103 "sd $8, 0(%0) \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
104 "add %0, %0, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
105 "bgtz %2, 1b \n\t" |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
106 ".set pop \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
107 : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size) |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
108 : "$8", "memory" ); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
109 } |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
110 |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
111 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
112 static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
113 { |
8031 | 114 __asm__ volatile ( |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
115 ".set push \n\t" |
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
116 ".set mips3 \n\t" |
2979 | 117 "1: \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
118 "ldr $8, 0(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
119 "add $11, %1, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
120 "ldl $8, 7(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
121 "add $10, %0, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
122 "ldr $9, 8(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
123 "ldl $9, 15(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
124 "ldr $12, 0($11) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
125 "add %1, $11, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
126 "ldl $12, 7($11) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
127 "pcpyld $8, $9, $8 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
128 "sq $8, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
129 "ldr $13, 8($11) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
130 "addiu %2, %2, -2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
131 "ldl $13, 15($11) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
132 "add %0, $10, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
133 "pcpyld $12, $13, $12 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
134 "sq $12, 0($10) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
135 "bgtz %2, 1b \n\t" |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
136 ".set pop \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
137 : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size) |
2979 | 138 : "$8", "$9", "$10", "$11", "$12", "$13", "memory" ); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
139 } |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
140 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
141 |
1092 | 142 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx) |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
143 { |
1092 | 144 const int idct_algo= avctx->idct_algo; |
145 | |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
146 c->clear_blocks = clear_blocks_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
147 |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
148 c->put_pixels_tab[1][0] = put_pixels8_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
149 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
150 |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
151 c->put_pixels_tab[0][0] = put_pixels16_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
152 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
153 |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
154 c->get_pixels = get_pixels_mmi; |
2967 | 155 |
1092 | 156 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){ |
157 c->idct_put= ff_mmi_idct_put; | |
158 c->idct_add= ff_mmi_idct_add; | |
1324
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1146
diff
changeset
|
159 c->idct = ff_mmi_idct; |
1092 | 160 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; |
161 } | |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
162 } |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
163 |