annotate ppc/int_altivec.c @ 9003:b595a8a59967 libavcodec

Change the type of pblocks from pointers to short array into pointers to array of 64 DCTELEM, similarly to other block fields. This also get rid of some casts and fixes a warning.
author iive
date Sun, 22 Feb 2009 09:02:06 +0000
parents e9d9d946f213
children 7cee7292d5cc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4838
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
1 /*
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
2 * Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org>
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
3 *
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
4 * This file is part of FFmpeg.
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
5 *
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
10 *
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
14 * Lesser General Public License for more details.
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
15 *
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
19 */
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
20
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
21 /**
8718
e9d9d946f213 Use full internal pathname in doxygen @file directives.
diego
parents: 8494
diff changeset
22 ** @file libavcodec/ppc/int_altivec.c
4838
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
23 ** integer misc ops.
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
24 **/
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
25
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 5255
diff changeset
26 #include "libavcodec/dsputil.h"
4838
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
27
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
28 #include "gcc_fixes.h"
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
29
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
30 #include "dsputil_altivec.h"
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
31
7204
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
32 #include "types_altivec.h"
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
33
5255
669a97223dc7 make arguments to ssd_int8_vs_int16() const
mru
parents: 5010
diff changeset
34 static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
669a97223dc7 make arguments to ssd_int8_vs_int16() const
mru
parents: 5010
diff changeset
35 int size) {
4838
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
36 int i, size16;
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
37 vector signed char vpix1;
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
38 vector signed short vpix2, vdiff, vpix1l,vpix1h;
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
39 union { vector signed int vscore;
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
40 int32_t score[4];
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 7204
diff changeset
41 } u;
4838
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
42 u.vscore = vec_splat_s32(0);
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
43 //
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
44 //XXX lazy way, fix it later
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
45
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
46 #define vec_unaligned_load(b) \
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
47 vec_perm(vec_ld(0,b),vec_ld(15,b),vec_lvsl(0, b));
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
48
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
49 size16 = size >> 4;
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
50 while(size16) {
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
51 // score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
52 //load pix1 and the first batch of pix2
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
53
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
54 vpix1 = vec_unaligned_load(pix1);
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
55 vpix2 = vec_unaligned_load(pix2);
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
56 pix2 += 8;
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
57 //unpack
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
58 vpix1h = vec_unpackh(vpix1);
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
59 vdiff = vec_sub(vpix1h, vpix2);
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
60 vpix1l = vec_unpackl(vpix1);
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
61 // load another batch from pix2
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
62 vpix2 = vec_unaligned_load(pix2);
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
63 u.vscore = vec_msum(vdiff, vdiff, u.vscore);
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
64 vdiff = vec_sub(vpix1l, vpix2);
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
65 u.vscore = vec_msum(vdiff, vdiff, u.vscore);
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
66 pix1 += 16;
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
67 pix2 += 8;
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
68 size16--;
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
69 }
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
70 u.vscore = vec_sums(u.vscore, vec_splat_s32(0));
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
71
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
72 size %= 16;
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
73 for (i = 0; i < size; i++) {
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
74 u.score[3] += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
75 }
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
76 return u.score[3];
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
77 }
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
78
7204
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
79 static void add_int16_altivec(int16_t * v1, int16_t * v2, int order)
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
80 {
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
81 int i;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7333
diff changeset
82 register vec_s16 vec, *pv;
7204
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
83
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
84 for(i = 0; i < order; i += 8){
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7333
diff changeset
85 pv = (vec_s16*)v2;
7204
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
86 vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2));
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
87 vec_st(vec_add(vec_ld(0, v1), vec), 0, v1);
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
88 v1 += 8;
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
89 v2 += 8;
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
90 }
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
91 }
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
92
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
93 static void sub_int16_altivec(int16_t * v1, int16_t * v2, int order)
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
94 {
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
95 int i;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7333
diff changeset
96 register vec_s16 vec, *pv;
7204
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
97
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
98 for(i = 0; i < order; i += 8){
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7333
diff changeset
99 pv = (vec_s16*)v2;
7204
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
100 vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2));
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
101 vec_st(vec_sub(vec_ld(0, v1), vec), 0, v1);
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
102 v1 += 8;
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
103 v2 += 8;
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
104 }
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
105 }
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
106
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
107 static int32_t scalarproduct_int16_altivec(int16_t * v1, int16_t * v2, int order, const int shift)
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
108 {
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
109 int i;
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
110 LOAD_ZERO;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7333
diff changeset
111 register vec_s16 vec1, *pv;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7333
diff changeset
112 register vec_s32 res = vec_splat_s32(0), t;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7333
diff changeset
113 register vec_u32 shifts;
7204
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
114 DECLARE_ALIGNED_16(int32_t, ires);
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
115
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
116 shifts = zero_u32v;
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
117 if(shift & 0x10) shifts = vec_add(shifts, vec_sl(vec_splat_u32(0x08), vec_splat_u32(0x1)));
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
118 if(shift & 0x08) shifts = vec_add(shifts, vec_splat_u32(0x08));
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
119 if(shift & 0x04) shifts = vec_add(shifts, vec_splat_u32(0x04));
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
120 if(shift & 0x02) shifts = vec_add(shifts, vec_splat_u32(0x02));
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
121 if(shift & 0x01) shifts = vec_add(shifts, vec_splat_u32(0x01));
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
122
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
123 for(i = 0; i < order; i += 8){
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7333
diff changeset
124 pv = (vec_s16*)v1;
7204
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
125 vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1));
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
126 t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
127 t = vec_sr(t, shifts);
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
128 res = vec_sums(t, res);
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
129 v1 += 8;
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
130 v2 += 8;
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
131 }
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
132 res = vec_splat(res, 3);
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
133 vec_ste(res, 0, &ires);
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
134 return ires;
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
135 }
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
136
4838
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
137 void int_init_altivec(DSPContext* c, AVCodecContext *avctx)
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
138 {
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
139 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
7204
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
140 c->add_int16 = add_int16_altivec;
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
141 c->sub_int16 = sub_int16_altivec;
83ffe1bed06e Altivec implementation of APE vector functions
kostya
parents: 6763
diff changeset
142 c->scalarproduct_int16 = scalarproduct_int16_altivec;
4838
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff changeset
143 }