Mercurial > libavcodec.hg
annotate ps2/dsputil_mmi.c @ 3851:a778f86c28e2 libavcodec
Original Commit: r49 | ods15 | 2006-09-23 12:52:21 +0300 (Sat, 23 Sep 2006) | 3 lines
fix off-by-one in both encoder and decoder.
libvorbis and tremor work now, ffvorbis still doesn't
author | ods15 |
---|---|
date | Mon, 02 Oct 2006 06:07:28 +0000 |
parents | 0b546eab515d |
children | c8c591fe26f8 |
rev | line source |
---|---|
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
1 /* |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
2 * MMI optimized DSP utils |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
3 * Copyright (c) 2000, 2001 Fabrice Bellard. |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
4 * |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
5 * This library is free software; you can redistribute it and/or |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
6 * modify it under the terms of the GNU Lesser General Public |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
7 * License as published by the Free Software Foundation; either |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
8 * version 2 of the License, or (at your option) any later version. |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
9 * |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
10 * This library is distributed in the hope that it will be useful, |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
13 * Lesser General Public License for more details. |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
14 * |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
15 * You should have received a copy of the GNU Lesser General Public |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
16 * License along with this library; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
18 * |
1876 | 19 * MMI optimization by Leon van Stuivenberg |
1143
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
20 * clear_blocks_mmi() by BroadQ |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
21 */ |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
22 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
23 #include "../dsputil.h" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
24 #include "mmi.h" |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
25 |
1092 | 26 void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block); |
27 void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block); | |
1324
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1146
diff
changeset
|
28 void ff_mmi_idct(DCTELEM *block); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
29 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
30 static void clear_blocks_mmi(DCTELEM * blocks) |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
31 { |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
32 asm volatile( |
1143
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
33 ".set noreorder \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
34 "addiu $9, %0, 768 \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
35 "nop \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
36 "1: \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
37 "sq $0, 0(%0) \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
38 "move $8, %0 \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
39 "addi %0, %0, 64 \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
40 "sq $0, 16($8) \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
41 "slt $10, %0, $9 \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
42 "sq $0, 32($8) \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
43 "bnez $10, 1b \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
44 "sq $0, 48($8) \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
45 ".set reorder \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
46 : "+r" (blocks) :: "$8", "$9", "memory" ); |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
47 } |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
48 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
49 |
1064 | 50 static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size) |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
51 { |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
52 asm volatile( |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
53 ".set push \n\t" |
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
54 ".set mips3 \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
55 "ld $8, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
56 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
57 "ld $9, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
58 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
59 "ld $10, 0(%0) \n\t" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
60 "pextlb $8, $0, $8 \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
61 "sq $8, 0(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
62 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
63 "ld $8, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
64 "pextlb $9, $0, $9 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
65 "sq $9, 16(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
66 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
67 "ld $9, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
68 "pextlb $10, $0, $10 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
69 "sq $10, 32(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
70 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
71 "ld $10, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
72 "pextlb $8, $0, $8 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
73 "sq $8, 48(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
74 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
75 "ld $8, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
76 "pextlb $9, $0, $9 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
77 "sq $9, 64(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
78 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
79 "ld $9, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
80 "pextlb $10, $0, $10 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
81 "sq $10, 80(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
82 "pextlb $8, $0, $8 \n\t" |
2979 | 83 "sq $8, 96(%1) \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
84 "pextlb $9, $0, $9 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
85 "sq $9, 112(%1) \n\t" |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
86 ".set pop \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
87 : "+r" (pixels) : "r" (block), "r" (line_size) : "$8", "$9", "$10", "memory" ); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
88 } |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
89 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
90 |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
91 static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
92 { |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
93 asm volatile( |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
94 ".set push \n\t" |
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
95 ".set mips3 \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
96 "1: \n\t" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
97 "ldr $8, 0(%1) \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
98 "addiu %2, %2, -1 \n\t" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
99 "ldl $8, 7(%1) \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
100 "add %1, %1, %3 \n\t" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
101 "sd $8, 0(%0) \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
102 "add %0, %0, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
103 "bgtz %2, 1b \n\t" |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
104 ".set pop \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
105 : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size) |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
106 : "$8", "memory" ); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
107 } |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
108 |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
109 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
110 static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
111 { |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
112 asm volatile ( |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
113 ".set push \n\t" |
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
114 ".set mips3 \n\t" |
2979 | 115 "1: \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
116 "ldr $8, 0(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
117 "add $11, %1, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
118 "ldl $8, 7(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
119 "add $10, %0, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
120 "ldr $9, 8(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
121 "ldl $9, 15(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
122 "ldr $12, 0($11) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
123 "add %1, $11, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
124 "ldl $12, 7($11) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
125 "pcpyld $8, $9, $8 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
126 "sq $8, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
127 "ldr $13, 8($11) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
128 "addiu %2, %2, -2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
129 "ldl $13, 15($11) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
130 "add %0, $10, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
131 "pcpyld $12, $13, $12 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
132 "sq $12, 0($10) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
133 "bgtz %2, 1b \n\t" |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
134 ".set pop \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
135 : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size) |
2979 | 136 : "$8", "$9", "$10", "$11", "$12", "$13", "memory" ); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
137 } |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
138 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
139 |
1092 | 140 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx) |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
141 { |
1092 | 142 const int idct_algo= avctx->idct_algo; |
143 | |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
144 c->clear_blocks = clear_blocks_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
145 |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
146 c->put_pixels_tab[1][0] = put_pixels8_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
147 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
148 |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
149 c->put_pixels_tab[0][0] = put_pixels16_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
150 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
151 |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
152 c->get_pixels = get_pixels_mmi; |
2967 | 153 |
1092 | 154 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){ |
155 c->idct_put= ff_mmi_idct_put; | |
156 c->idct_add= ff_mmi_idct_add; | |
1324
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1146
diff
changeset
|
157 c->idct = ff_mmi_idct; |
1092 | 158 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; |
159 } | |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
160 } |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
161 |