Mercurial > libavcodec.hg
comparison ppc/int_altivec.c @ 7204:83ffe1bed06e libavcodec
Altivec implementation of APE vector functions
author | kostya |
---|---|
date | Sun, 06 Jul 2008 07:33:09 +0000 |
parents | f7cbb7733146 |
children | a8a79f5385f6 |
comparison
equal
deleted
inserted
replaced
7203:87b1dfb5a98d | 7204:83ffe1bed06e |
---|---|
26 #include "libavcodec/dsputil.h" | 26 #include "libavcodec/dsputil.h" |
27 | 27 |
28 #include "gcc_fixes.h" | 28 #include "gcc_fixes.h" |
29 | 29 |
30 #include "dsputil_altivec.h" | 30 #include "dsputil_altivec.h" |
31 | |
32 #include "types_altivec.h" | |
31 | 33 |
32 static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2, | 34 static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2, |
33 int size) { | 35 int size) { |
34 int i, size16; | 36 int i, size16; |
35 vector signed char vpix1; | 37 vector signed char vpix1; |
72 u.score[3] += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); | 74 u.score[3] += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); |
73 } | 75 } |
74 return u.score[3]; | 76 return u.score[3]; |
75 } | 77 } |
76 | 78 |
79 static void add_int16_altivec(int16_t * v1, int16_t * v2, int order) | |
80 { | |
81 int i; | |
82 register vec_s16_t vec, *pv; | |
83 | |
84 for(i = 0; i < order; i += 8){ | |
85 pv = (vec_s16_t*)v2; | |
86 vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2)); | |
87 vec_st(vec_add(vec_ld(0, v1), vec), 0, v1); | |
88 v1 += 8; | |
89 v2 += 8; | |
90 } | |
91 } | |
92 | |
93 static void sub_int16_altivec(int16_t * v1, int16_t * v2, int order) | |
94 { | |
95 int i; | |
96 register vec_s16_t vec, *pv; | |
97 | |
98 for(i = 0; i < order; i += 8){ | |
99 pv = (vec_s16_t*)v2; | |
100 vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2)); | |
101 vec_st(vec_sub(vec_ld(0, v1), vec), 0, v1); | |
102 v1 += 8; | |
103 v2 += 8; | |
104 } | |
105 } | |
106 | |
107 static int32_t scalarproduct_int16_altivec(int16_t * v1, int16_t * v2, int order, const int shift) | |
108 { | |
109 int i; | |
110 LOAD_ZERO; | |
111 register vec_s16_t vec1, *pv; | |
112 register vec_s32_t res = vec_splat_s32(0), t; | |
113 register vec_u32_t shifts; | |
114 DECLARE_ALIGNED_16(int32_t, ires); | |
115 | |
116 shifts = zero_u32v; | |
117 if(shift & 0x10) shifts = vec_add(shifts, vec_sl(vec_splat_u32(0x08), vec_splat_u32(0x1))); | |
118 if(shift & 0x08) shifts = vec_add(shifts, vec_splat_u32(0x08)); | |
119 if(shift & 0x04) shifts = vec_add(shifts, vec_splat_u32(0x04)); | |
120 if(shift & 0x02) shifts = vec_add(shifts, vec_splat_u32(0x02)); | |
121 if(shift & 0x01) shifts = vec_add(shifts, vec_splat_u32(0x01)); | |
122 | |
123 for(i = 0; i < order; i += 8){ | |
124 pv = (vec_s16_t*)v1; | |
125 vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1)); | |
126 t = vec_msum(vec1, vec_ld(0, v2), zero_s32v); | |
127 t = vec_sr(t, shifts); | |
128 res = vec_sums(t, res); | |
129 v1 += 8; | |
130 v2 += 8; | |
131 } | |
132 res = vec_splat(res, 3); | |
133 vec_ste(res, 0, &ires); | |
134 return ires; | |
135 } | |
136 | |
77 void int_init_altivec(DSPContext* c, AVCodecContext *avctx) | 137 void int_init_altivec(DSPContext* c, AVCodecContext *avctx) |
78 { | 138 { |
79 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec; | 139 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec; |
140 c->add_int16 = add_int16_altivec; | |
141 c->sub_int16 = sub_int16_altivec; | |
142 c->scalarproduct_int16 = scalarproduct_int16_altivec; | |
80 } | 143 } |