annotate ppc/vc1dsp_altivec.c @ 8790:1045a26cb90d libavcodec

Fix crash when encoding using libschroedinger. Currently only pixel and half-pixel motion vector precisions are supported in libschroedinger. Setting the mv_precision field to 2 (i.e. quarter pixel) causes a crash in the libschroedinger encoder calls. By not setting this parameter, we fall back to the default value used in libschroedinger. patch by Anuradha Suraparaju, anuradha rd.bbc.co uk
author diego
date Tue, 10 Feb 2009 14:27:16 +0000
parents f7cbb7733146
children 7cee7292d5cc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3537
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
1 /*
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
2 * VC-1 and WMV3 decoder - DSP functions AltiVec-optimized
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
3 * Copyright (c) 2006 Konstantin Shishkov
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3537
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3537
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3537
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
3537
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3537
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
3537
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3537
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
3537
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
15 * Lesser General Public License for more details.
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
16 *
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3537
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3537
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
20 */
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
21
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 6028
diff changeset
22 #include "libavcodec/dsputil.h"
3537
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
23
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
24 #include "gcc_fixes.h"
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
25
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents: 5215
diff changeset
26 #include "util_altivec.h"
3537
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
27
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
28 // main steps of 8x8 transform
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
29 #define STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_rnd) \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
30 do { \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
31 t0 = vec_sl(vec_add(s0, s4), vec_2); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
32 t0 = vec_add(vec_sl(t0, vec_1), t0); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
33 t0 = vec_add(t0, vec_rnd); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
34 t1 = vec_sl(vec_sub(s0, s4), vec_2); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
35 t1 = vec_add(vec_sl(t1, vec_1), t1); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
36 t1 = vec_add(t1, vec_rnd); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
37 t2 = vec_add(vec_sl(s6, vec_2), vec_sl(s6, vec_1)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
38 t2 = vec_add(t2, vec_sl(s2, vec_4)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
39 t3 = vec_add(vec_sl(s2, vec_2), vec_sl(s2, vec_1)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
40 t3 = vec_sub(t3, vec_sl(s6, vec_4)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
41 t4 = vec_add(t0, t2); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
42 t5 = vec_add(t1, t3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
43 t6 = vec_sub(t1, t3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
44 t7 = vec_sub(t0, t2); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
45 \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
46 t0 = vec_sl(vec_add(s1, s3), vec_4); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
47 t0 = vec_add(t0, vec_sl(s5, vec_3)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
48 t0 = vec_add(t0, vec_sl(s7, vec_2)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
49 t0 = vec_add(t0, vec_sub(s5, s3)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
50 \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
51 t1 = vec_sl(vec_sub(s1, s5), vec_4); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
52 t1 = vec_sub(t1, vec_sl(s7, vec_3)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
53 t1 = vec_sub(t1, vec_sl(s3, vec_2)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
54 t1 = vec_sub(t1, vec_add(s1, s7)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
55 \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
56 t2 = vec_sl(vec_sub(s7, s3), vec_4); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
57 t2 = vec_add(t2, vec_sl(s1, vec_3)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
58 t2 = vec_add(t2, vec_sl(s5, vec_2)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
59 t2 = vec_add(t2, vec_sub(s1, s7)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
60 \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
61 t3 = vec_sl(vec_sub(s5, s7), vec_4); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
62 t3 = vec_sub(t3, vec_sl(s3, vec_3)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
63 t3 = vec_add(t3, vec_sl(s1, vec_2)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
64 t3 = vec_sub(t3, vec_add(s3, s5)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
65 \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
66 s0 = vec_add(t4, t0); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
67 s1 = vec_add(t5, t1); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
68 s2 = vec_add(t6, t2); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
69 s3 = vec_add(t7, t3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
70 s4 = vec_sub(t7, t3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
71 s5 = vec_sub(t6, t2); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
72 s6 = vec_sub(t5, t1); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
73 s7 = vec_sub(t4, t0); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
74 }while(0)
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
75
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
76 #define SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7) \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
77 do { \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
78 s0 = vec_sra(s0, vec_3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
79 s1 = vec_sra(s1, vec_3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
80 s2 = vec_sra(s2, vec_3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
81 s3 = vec_sra(s3, vec_3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
82 s4 = vec_sra(s4, vec_3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
83 s5 = vec_sra(s5, vec_3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
84 s6 = vec_sra(s6, vec_3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
85 s7 = vec_sra(s7, vec_3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
86 }while(0)
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
87
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
88 #define SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7) \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
89 do { \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
90 s0 = vec_sra(s0, vec_7); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
91 s1 = vec_sra(s1, vec_7); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
92 s2 = vec_sra(s2, vec_7); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
93 s3 = vec_sra(s3, vec_7); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
94 s4 = vec_sra(vec_add(s4, vec_1s), vec_7); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
95 s5 = vec_sra(vec_add(s5, vec_1s), vec_7); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
96 s6 = vec_sra(vec_add(s6, vec_1s), vec_7); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
97 s7 = vec_sra(vec_add(s7, vec_1s), vec_7); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
98 }while(0)
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
99
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
100 /* main steps of 4x4 transform */
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
101 #define STEP4(s0, s1, s2, s3, vec_rnd) \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
102 do { \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
103 t1 = vec_add(vec_sl(s0, vec_4), s0); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
104 t1 = vec_add(t1, vec_rnd); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
105 t2 = vec_add(vec_sl(s2, vec_4), s2); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
106 t0 = vec_add(t1, t2); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
107 t1 = vec_sub(t1, t2); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
108 t3 = vec_sl(vec_sub(s3, s1), vec_1); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
109 t3 = vec_add(t3, vec_sl(t3, vec_2)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
110 t2 = vec_add(t3, vec_sl(s1, vec_5)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
111 t3 = vec_add(t3, vec_sl(s3, vec_3)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
112 t3 = vec_add(t3, vec_sl(s3, vec_2)); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
113 s0 = vec_add(t0, t2); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
114 s1 = vec_sub(t1, t3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
115 s2 = vec_add(t1, t3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
116 s3 = vec_sub(t0, t2); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
117 }while (0)
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
118
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
119 #define SHIFT_HOR4(s0, s1, s2, s3) \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
120 s0 = vec_sra(s0, vec_3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
121 s1 = vec_sra(s1, vec_3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
122 s2 = vec_sra(s2, vec_3); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
123 s3 = vec_sra(s3, vec_3);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
124
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
125 #define SHIFT_VERT4(s0, s1, s2, s3) \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
126 s0 = vec_sra(s0, vec_7); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
127 s1 = vec_sra(s1, vec_7); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
128 s2 = vec_sra(s2, vec_7); \
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
129 s3 = vec_sra(s3, vec_7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
130
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
131 /** Do inverse transform on 8x8 block
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
132 */
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
133 static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
134 {
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
135 vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
136 vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
137 vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
138 vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
139 const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
140 const vector unsigned int vec_7 = vec_splat_u32(7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
141 const vector unsigned int vec_4 = vec_splat_u32(4);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
142 const vector signed int vec_4s = vec_splat_s32(4);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
143 const vector unsigned int vec_3 = vec_splat_u32(3);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
144 const vector unsigned int vec_2 = vec_splat_u32(2);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
145 const vector signed int vec_1s = vec_splat_s32(1);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
146 const vector unsigned int vec_1 = vec_splat_u32(1);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
147
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
148
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
149 src0 = vec_ld( 0, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
150 src1 = vec_ld( 16, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
151 src2 = vec_ld( 32, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
152 src3 = vec_ld( 48, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
153 src4 = vec_ld( 64, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
154 src5 = vec_ld( 80, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
155 src6 = vec_ld( 96, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
156 src7 = vec_ld(112, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
157
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
158 TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
159 s0 = vec_unpackl(src0);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
160 s1 = vec_unpackl(src1);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
161 s2 = vec_unpackl(src2);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
162 s3 = vec_unpackl(src3);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
163 s4 = vec_unpackl(src4);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
164 s5 = vec_unpackl(src5);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
165 s6 = vec_unpackl(src6);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
166 s7 = vec_unpackl(src7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
167 s8 = vec_unpackh(src0);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
168 s9 = vec_unpackh(src1);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
169 sA = vec_unpackh(src2);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
170 sB = vec_unpackh(src3);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
171 sC = vec_unpackh(src4);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
172 sD = vec_unpackh(src5);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
173 sE = vec_unpackh(src6);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
174 sF = vec_unpackh(src7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
175 STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
176 SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
177 STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
178 SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
179 src0 = vec_pack(s8, s0);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
180 src1 = vec_pack(s9, s1);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
181 src2 = vec_pack(sA, s2);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
182 src3 = vec_pack(sB, s3);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
183 src4 = vec_pack(sC, s4);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
184 src5 = vec_pack(sD, s5);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
185 src6 = vec_pack(sE, s6);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
186 src7 = vec_pack(sF, s7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
187 TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
188
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
189 s0 = vec_unpackl(src0);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
190 s1 = vec_unpackl(src1);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
191 s2 = vec_unpackl(src2);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
192 s3 = vec_unpackl(src3);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
193 s4 = vec_unpackl(src4);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
194 s5 = vec_unpackl(src5);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
195 s6 = vec_unpackl(src6);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
196 s7 = vec_unpackl(src7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
197 s8 = vec_unpackh(src0);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
198 s9 = vec_unpackh(src1);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
199 sA = vec_unpackh(src2);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
200 sB = vec_unpackh(src3);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
201 sC = vec_unpackh(src4);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
202 sD = vec_unpackh(src5);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
203 sE = vec_unpackh(src6);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
204 sF = vec_unpackh(src7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
205 STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_64);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
206 SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
207 STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_64);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
208 SHIFT_VERT8(s8, s9, sA, sB, sC, sD, sE, sF);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
209 src0 = vec_pack(s8, s0);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
210 src1 = vec_pack(s9, s1);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
211 src2 = vec_pack(sA, s2);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
212 src3 = vec_pack(sB, s3);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
213 src4 = vec_pack(sC, s4);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
214 src5 = vec_pack(sD, s5);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
215 src6 = vec_pack(sE, s6);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
216 src7 = vec_pack(sF, s7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
217
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
218 vec_st(src0, 0, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
219 vec_st(src1, 16, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
220 vec_st(src2, 32, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
221 vec_st(src3, 48, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
222 vec_st(src4, 64, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
223 vec_st(src5, 80, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
224 vec_st(src6, 96, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
225 vec_st(src7,112, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
226 }
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
227
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
228 /** Do inverse transform on 8x4 part of block
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
229 */
5999
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
230 static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, DCTELEM *block)
3537
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
231 {
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
232 vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
233 vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
234 vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
235 vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
236 const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
237 const vector unsigned int vec_7 = vec_splat_u32(7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
238 const vector unsigned int vec_5 = vec_splat_u32(5);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
239 const vector unsigned int vec_4 = vec_splat_u32(4);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
240 const vector signed int vec_4s = vec_splat_s32(4);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
241 const vector unsigned int vec_3 = vec_splat_u32(3);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
242 const vector unsigned int vec_2 = vec_splat_u32(2);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
243 const vector unsigned int vec_1 = vec_splat_u32(1);
5999
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
244 vector unsigned char tmp;
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
245 vector signed short tmp2, tmp3;
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
246 vector unsigned char perm0, perm1, p0, p1, p;
3537
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
247
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
248 src0 = vec_ld( 0, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
249 src1 = vec_ld( 16, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
250 src2 = vec_ld( 32, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
251 src3 = vec_ld( 48, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
252 src4 = vec_ld( 64, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
253 src5 = vec_ld( 80, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
254 src6 = vec_ld( 96, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
255 src7 = vec_ld(112, block);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
256
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
257 TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
258 s0 = vec_unpackl(src0);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
259 s1 = vec_unpackl(src1);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
260 s2 = vec_unpackl(src2);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
261 s3 = vec_unpackl(src3);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
262 s4 = vec_unpackl(src4);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
263 s5 = vec_unpackl(src5);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
264 s6 = vec_unpackl(src6);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
265 s7 = vec_unpackl(src7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
266 s8 = vec_unpackh(src0);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
267 s9 = vec_unpackh(src1);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
268 sA = vec_unpackh(src2);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
269 sB = vec_unpackh(src3);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
270 sC = vec_unpackh(src4);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
271 sD = vec_unpackh(src5);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
272 sE = vec_unpackh(src6);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
273 sF = vec_unpackh(src7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
274 STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
275 SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
276 STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
277 SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
278 src0 = vec_pack(s8, s0);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
279 src1 = vec_pack(s9, s1);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
280 src2 = vec_pack(sA, s2);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
281 src3 = vec_pack(sB, s3);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
282 src4 = vec_pack(sC, s4);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
283 src5 = vec_pack(sD, s5);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
284 src6 = vec_pack(sE, s6);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
285 src7 = vec_pack(sF, s7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
286 TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
287
6000
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
288 s0 = vec_unpackh(src0);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
289 s1 = vec_unpackh(src1);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
290 s2 = vec_unpackh(src2);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
291 s3 = vec_unpackh(src3);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
292 s8 = vec_unpackl(src0);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
293 s9 = vec_unpackl(src1);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
294 sA = vec_unpackl(src2);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
295 sB = vec_unpackl(src3);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
296 STEP4(s0, s1, s2, s3, vec_64);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
297 SHIFT_VERT4(s0, s1, s2, s3);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
298 STEP4(s8, s9, sA, sB, vec_64);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
299 SHIFT_VERT4(s8, s9, sA, sB);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
300 src0 = vec_pack(s0, s8);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
301 src1 = vec_pack(s1, s9);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
302 src2 = vec_pack(s2, sA);
791240825ac4 Reindent after last commit
kostya
parents: 5999
diff changeset
303 src3 = vec_pack(s3, sB);
3537
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
304
5999
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
305 p0 = vec_lvsl (0, dest);
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
306 p1 = vec_lvsl (stride, dest);
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
307 p = vec_splat_u8 (-1);
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
308 perm0 = vec_mergeh (p, p0);
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
309 perm1 = vec_mergeh (p, p1);
3537
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
310
5999
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
311 #define ADD(dest,src,perm) \
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
312 /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
313 tmp = vec_ld (0, dest); \
6028
1ba8ee13e5b9 Make strict altivec parsers happy (gcc-4.3 and others)
lu_zero
parents: 6000
diff changeset
314 tmp2 = (vector signed short)vec_perm (tmp, vec_splat_u8(0), perm); \
5999
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
315 tmp3 = vec_adds (tmp2, src); \
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
316 tmp = vec_packsu (tmp3, tmp3); \
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
317 vec_ste ((vector unsigned int)tmp, 0, (unsigned int *)dest); \
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
318 vec_ste ((vector unsigned int)tmp, 4, (unsigned int *)dest);
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
319
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
320 ADD (dest, src0, perm0) dest += stride;
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
321 ADD (dest, src1, perm1) dest += stride;
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
322 ADD (dest, src2, perm0) dest += stride;
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
323 ADD (dest, src3, perm1)
3537
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
324 }
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
325
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
326
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
327 void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) {
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
328 dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec;
5999
f22faee96323 Update Altivec variant of vc1_inv_trans_8x4
kostya
parents: 5997
diff changeset
329 dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
3537
f52e3f60481b Some AltiVec optimizations for VC-1
kostya
parents:
diff changeset
330 }