annotate ps2/dsputil_mmi.c @ 1968:19c2344e800a libavcodec

support reusing mb types and field select values of the source file, but use motion vectors just as additional predictors minor cleanup segfault fix
author michael
date Sun, 25 Apr 2004 02:09:47 +0000
parents 72ac356803ea
children ef2149182f1c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
689
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
1 /*
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
2 * MMI optimized DSP utils
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
3 * Copyright (c) 2000, 2001 Fabrice Bellard.
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
4 *
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
5 * This library is free software; you can redistribute it and/or
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
6 * modify it under the terms of the GNU Lesser General Public
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
7 * License as published by the Free Software Foundation; either
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
8 * version 2 of the License, or (at your option) any later version.
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
9 *
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
10 * This library is distributed in the hope that it will be useful,
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
13 * Lesser General Public License for more details.
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
14 *
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
15 * You should have received a copy of the GNU Lesser General Public
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
16 * License along with this library; if not, write to the Free Software
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
18 *
1876
72ac356803ea credit update
melanson
parents: 1324
diff changeset
19 * MMI optimization by Leon van Stuivenberg
1143
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
20 * clear_blocks_mmi() by BroadQ
689
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
21 */
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
22
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
23 #include "../dsputil.h"
721
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
24 #include "mmi.h"
689
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
25
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1064
diff changeset
26 void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1064
diff changeset
27 void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block);
1324
7d328fd9d8a5 the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents: 1146
diff changeset
28 void ff_mmi_idct(DCTELEM *block);
689
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
29
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
30 static void clear_blocks_mmi(DCTELEM * blocks)
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
31 {
721
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
32 asm volatile(
1143
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
33 ".set noreorder \n"
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
34 "addiu $9, %0, 768 \n"
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
35 "nop \n"
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
36 "1: \n"
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
37 "sq $0, 0(%0) \n"
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
38 "move $8, %0 \n"
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
39 "addi %0, %0, 64 \n"
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
40 "sq $0, 16($8) \n"
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
41 "slt $10, %0, $9 \n"
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
42 "sq $0, 32($8) \n"
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
43 "bnez $10, 1b \n"
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
44 "sq $0, 48($8) \n"
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
45 ".set reorder \n"
a4facfd78935 clear_blocks_mmi() optimization by BroadQ (and patch by Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1092
diff changeset
46 : "+r" (blocks) :: "$8", "$9", "memory" );
721
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
47 }
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
48
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
49
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 856
diff changeset
50 static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size)
721
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
51 {
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
52 asm volatile(
772
311ac50375e4 dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 721
diff changeset
53 ".set push \n\t"
311ac50375e4 dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 721
diff changeset
54 ".set mips3 \n\t"
1146
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
55 "ld $8, 0(%0) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
56 "add %0, %0, %2 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
57 "ld $9, 0(%0) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
58 "add %0, %0, %2 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
59 "ld $10, 0(%0) \n\t"
721
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
60 "pextlb $8, $0, $8 \n\t"
1146
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
61 "sq $8, 0(%1) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
62 "add %0, %0, %2 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
63 "ld $8, 0(%0) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
64 "pextlb $9, $0, $9 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
65 "sq $9, 16(%1) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
66 "add %0, %0, %2 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
67 "ld $9, 0(%0) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
68 "pextlb $10, $0, $10 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
69 "sq $10, 32(%1) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
70 "add %0, %0, %2 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
71 "ld $10, 0(%0) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
72 "pextlb $8, $0, $8 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
73 "sq $8, 48(%1) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
74 "add %0, %0, %2 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
75 "ld $8, 0(%0) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
76 "pextlb $9, $0, $9 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
77 "sq $9, 64(%1) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
78 "add %0, %0, %2 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
79 "ld $9, 0(%0) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
80 "pextlb $10, $0, $10 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
81 "sq $10, 80(%1) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
82 "pextlb $8, $0, $8 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
83 "sq $8, 96(%1) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
84 "pextlb $9, $0, $9 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
85 "sq $9, 112(%1) \n\t"
772
311ac50375e4 dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 721
diff changeset
86 ".set pop \n\t"
1146
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
87 : "+r" (pixels) : "r" (block), "r" (line_size) : "$8", "$9", "$10", "memory" );
689
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
88 }
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
89
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
90
721
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
91 static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
689
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
92 {
721
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
93 asm volatile(
772
311ac50375e4 dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 721
diff changeset
94 ".set push \n\t"
311ac50375e4 dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 721
diff changeset
95 ".set mips3 \n\t"
1146
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
96 "1: \n\t"
721
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
97 "ldr $8, 0(%1) \n\t"
1146
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
98 "addiu %2, %2, -1 \n\t"
721
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
99 "ldl $8, 7(%1) \n\t"
1146
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
100 "add %1, %1, %3 \n\t"
721
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
101 "sd $8, 0(%0) \n\t"
1146
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
102 "add %0, %0, %3 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
103 "bgtz %2, 1b \n\t"
772
311ac50375e4 dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 721
diff changeset
104 ".set pop \n\t"
1146
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
105 : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
106 : "$8", "memory" );
689
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
107 }
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
108
721
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
109
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
110 static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
689
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
111 {
721
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
112 asm volatile (
772
311ac50375e4 dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 721
diff changeset
113 ".set push \n\t"
311ac50375e4 dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 721
diff changeset
114 ".set mips3 \n\t"
1146
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
115 "1: \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
116 "ldr $8, 0(%1) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
117 "add $11, %1, %3 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
118 "ldl $8, 7(%1) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
119 "add $10, %0, %3 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
120 "ldr $9, 8(%1) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
121 "ldl $9, 15(%1) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
122 "ldr $12, 0($11) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
123 "add %1, $11, %3 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
124 "ldl $12, 7($11) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
125 "pcpyld $8, $9, $8 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
126 "sq $8, 0(%0) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
127 "ldr $13, 8($11) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
128 "addiu %2, %2, -2 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
129 "ldl $13, 15($11) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
130 "add %0, $10, %3 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
131 "pcpyld $12, $13, $12 \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
132 "sq $12, 0($10) \n\t"
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
133 "bgtz %2, 1b \n\t"
772
311ac50375e4 dsputil mmi cleanup patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 721
diff changeset
134 ".set pop \n\t"
1146
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
135 : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
868650fc345a new versions; put_pix's about 20% faster, get_pix 3.5x faster. patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 1143
diff changeset
136 : "$8", "$9", "$10", "$11", "$12", "$13", "memory" );
689
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
137 }
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
138
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
139
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1064
diff changeset
140 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
689
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
141 {
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1064
diff changeset
142 const int idct_algo= avctx->idct_algo;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1064
diff changeset
143
856
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 772
diff changeset
144 c->clear_blocks = clear_blocks_mmi;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 772
diff changeset
145
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 772
diff changeset
146 c->put_pixels_tab[1][0] = put_pixels8_mmi;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 772
diff changeset
147 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmi;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 772
diff changeset
148
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 772
diff changeset
149 c->put_pixels_tab[0][0] = put_pixels16_mmi;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 772
diff changeset
150 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 772
diff changeset
151
3c6df37177dd * using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents: 772
diff changeset
152 c->get_pixels = get_pixels_mmi;
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1064
diff changeset
153
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1064
diff changeset
154 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1064
diff changeset
155 c->idct_put= ff_mmi_idct_put;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1064
diff changeset
156 c->idct_add= ff_mmi_idct_add;
1324
7d328fd9d8a5 the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents: 1146
diff changeset
157 c->idct = ff_mmi_idct;
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1064
diff changeset
158 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1064
diff changeset
159 }
689
efcbfbd18864 ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff changeset
160 }
721
71f669e9f633 ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents: 689
diff changeset
161