Mercurial > libavcodec.hg
annotate ps2/dsputil_mmi.c @ 12454:f4355cd85faa libavcodec
Port latest x264 deblock asm (before they moved to using NV12 as internal
format), LGPL'ed with permission from Jason and Loren. This includes mmx2
code, so remove inline asm from h264dsp_mmx.c accordingly.
author | rbultje |
---|---|
date | Fri, 03 Sep 2010 16:52:46 +0000 |
parents | 04423b2f6e0b |
children |
rev | line source |
---|---|
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
1 /* |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
2 * MMI optimized DSP utils |
8629
04423b2f6e0b
cosmetics: Remove pointless period after copyright statement non-sentences.
diego
parents:
8031
diff
changeset
|
3 * Copyright (c) 2000, 2001 Fabrice Bellard |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
4 * |
5214 | 5 * MMI optimization by Leon van Stuivenberg |
6 * clear_blocks_mmi() by BroadQ | |
7 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
8 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
9 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
10 * FFmpeg is free software; you can redistribute it and/or |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
11 * modify it under the terms of the GNU Lesser General Public |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
12 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
13 * version 2.1 of the License, or (at your option) any later version. |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
14 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
15 * FFmpeg is distributed in the hope that it will be useful, |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
18 * Lesser General Public License for more details. |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
19 * |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
20 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
21 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
23 */ |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
24 |
6763 | 25 #include "libavcodec/dsputil.h" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
26 #include "mmi.h" |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
27 |
1092 | 28 void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block); |
29 void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block); | |
1324
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1146
diff
changeset
|
30 void ff_mmi_idct(DCTELEM *block); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
31 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
32 static void clear_blocks_mmi(DCTELEM * blocks) |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
33 { |
8031 | 34 __asm__ volatile( |
1143
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
35 ".set noreorder \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
36 "addiu $9, %0, 768 \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
37 "nop \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
38 "1: \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
39 "sq $0, 0(%0) \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
40 "move $8, %0 \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
41 "addi %0, %0, 64 \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
42 "sq $0, 16($8) \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
43 "slt $10, %0, $9 \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
44 "sq $0, 32($8) \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
45 "bnez $10, 1b \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
46 "sq $0, 48($8) \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
47 ".set reorder \n" |
a4facfd78935
clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1092
diff
changeset
|
48 : "+r" (blocks) :: "$8", "$9", "memory" ); |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
49 } |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
50 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
51 |
1064 | 52 static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size) |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
53 { |
8031 | 54 __asm__ volatile( |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
55 ".set push \n\t" |
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
56 ".set mips3 \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
57 "ld $8, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
58 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
59 "ld $9, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
60 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
61 "ld $10, 0(%0) \n\t" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
62 "pextlb $8, $0, $8 \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
63 "sq $8, 0(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
64 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
65 "ld $8, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
66 "pextlb $9, $0, $9 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
67 "sq $9, 16(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
68 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
69 "ld $9, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
70 "pextlb $10, $0, $10 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
71 "sq $10, 32(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
72 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
73 "ld $10, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
74 "pextlb $8, $0, $8 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
75 "sq $8, 48(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
76 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
77 "ld $8, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
78 "pextlb $9, $0, $9 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
79 "sq $9, 64(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
80 "add %0, %0, %2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
81 "ld $9, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
82 "pextlb $10, $0, $10 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
83 "sq $10, 80(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
84 "pextlb $8, $0, $8 \n\t" |
2979 | 85 "sq $8, 96(%1) \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
86 "pextlb $9, $0, $9 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
87 "sq $9, 112(%1) \n\t" |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
88 ".set pop \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
89 : "+r" (pixels) : "r" (block), "r" (line_size) : "$8", "$9", "$10", "memory" ); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
90 } |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
91 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
92 |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
93 static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
94 { |
8031 | 95 __asm__ volatile( |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
96 ".set push \n\t" |
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
97 ".set mips3 \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
98 "1: \n\t" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
99 "ldr $8, 0(%1) \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
100 "addiu %2, %2, -1 \n\t" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
101 "ldl $8, 7(%1) \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
102 "add %1, %1, %3 \n\t" |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
103 "sd $8, 0(%0) \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
104 "add %0, %0, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
105 "bgtz %2, 1b \n\t" |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
106 ".set pop \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
107 : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size) |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
108 : "$8", "memory" ); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
109 } |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
110 |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
111 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
112 static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
113 { |
8031 | 114 __asm__ volatile ( |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
115 ".set push \n\t" |
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
116 ".set mips3 \n\t" |
2979 | 117 "1: \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
118 "ldr $8, 0(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
119 "add $11, %1, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
120 "ldl $8, 7(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
121 "add $10, %0, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
122 "ldr $9, 8(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
123 "ldl $9, 15(%1) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
124 "ldr $12, 0($11) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
125 "add %1, $11, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
126 "ldl $12, 7($11) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
127 "pcpyld $8, $9, $8 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
128 "sq $8, 0(%0) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
129 "ldr $13, 8($11) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
130 "addiu %2, %2, -2 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
131 "ldl $13, 15($11) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
132 "add %0, $10, %3 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
133 "pcpyld $12, $13, $12 \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
134 "sq $12, 0($10) \n\t" |
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
135 "bgtz %2, 1b \n\t" |
772
311ac50375e4
dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
721
diff
changeset
|
136 ".set pop \n\t" |
1146
868650fc345a
new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
1143
diff
changeset
|
137 : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size) |
2979 | 138 : "$8", "$9", "$10", "$11", "$12", "$13", "memory" ); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
139 } |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
140 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
141 |
1092 | 142 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx) |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
143 { |
1092 | 144 const int idct_algo= avctx->idct_algo; |
145 | |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
146 c->clear_blocks = clear_blocks_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
147 |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
148 c->put_pixels_tab[1][0] = put_pixels8_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
149 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
150 |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
151 c->put_pixels_tab[0][0] = put_pixels16_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
152 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
153 |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
772
diff
changeset
|
154 c->get_pixels = get_pixels_mmi; |
2967 | 155 |
1092 | 156 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){ |
157 c->idct_put= ff_mmi_idct_put; | |
158 c->idct_add= ff_mmi_idct_add; | |
1324
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1146
diff
changeset
|
159 c->idct = ff_mmi_idct; |
1092 | 160 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; |
161 } | |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
162 } |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
163 |