Mercurial > libavcodec.hg
comparison ppc/dsputil_altivec.c @ 9421:dd2b5e52336a libavcodec
Remove gcc_fixes.h. It only contains workarounds for unsupported gcc versions.
author | diego |
---|---|
date | Sun, 12 Apr 2009 21:45:26 +0000 |
parents | 68e959302527 |
children | 5a730e38f1b3 |
comparison
equal
deleted
inserted
replaced
9420:5b68d22e5ec9 | 9421:dd2b5e52336a |
---|---|
18 * You should have received a copy of the GNU Lesser General Public | 18 * You should have received a copy of the GNU Lesser General Public |
19 * License along with FFmpeg; if not, write to the Free Software | 19 * License along with FFmpeg; if not, write to the Free Software |
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 */ | 21 */ |
22 | 22 |
23 #include "config.h" | |
24 #if HAVE_ALTIVEC_H | |
25 #include <altivec.h> | |
26 #endif | |
23 #include "libavcodec/dsputil.h" | 27 #include "libavcodec/dsputil.h" |
24 | |
25 #include "gcc_fixes.h" | |
26 | |
27 #include "dsputil_ppc.h" | 28 #include "dsputil_ppc.h" |
28 #include "util_altivec.h" | 29 #include "util_altivec.h" |
29 #include "types_altivec.h" | 30 #include "types_altivec.h" |
30 | 31 |
31 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 32 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
1122 */ | 1123 */ |
1123 | 1124 |
1124 static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { | 1125 static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { |
1125 int sum; | 1126 int sum; |
1126 register vector signed short | 1127 register vector signed short |
1127 temp0 REG_v(v0), | 1128 temp0 __asm__ ("v0"), |
1128 temp1 REG_v(v1), | 1129 temp1 __asm__ ("v1"), |
1129 temp2 REG_v(v2), | 1130 temp2 __asm__ ("v2"), |
1130 temp3 REG_v(v3), | 1131 temp3 __asm__ ("v3"), |
1131 temp4 REG_v(v4), | 1132 temp4 __asm__ ("v4"), |
1132 temp5 REG_v(v5), | 1133 temp5 __asm__ ("v5"), |
1133 temp6 REG_v(v6), | 1134 temp6 __asm__ ("v6"), |
1134 temp7 REG_v(v7); | 1135 temp7 __asm__ ("v7"); |
1135 register vector signed short | 1136 register vector signed short |
1136 temp0S REG_v(v8), | 1137 temp0S __asm__ ("v8"), |
1137 temp1S REG_v(v9), | 1138 temp1S __asm__ ("v9"), |
1138 temp2S REG_v(v10), | 1139 temp2S __asm__ ("v10"), |
1139 temp3S REG_v(v11), | 1140 temp3S __asm__ ("v11"), |
1140 temp4S REG_v(v12), | 1141 temp4S __asm__ ("v12"), |
1141 temp5S REG_v(v13), | 1142 temp5S __asm__ ("v13"), |
1142 temp6S REG_v(v14), | 1143 temp6S __asm__ ("v14"), |
1143 temp7S REG_v(v15); | 1144 temp7S __asm__ ("v15"); |
1144 register const vector unsigned char vzero REG_v(v31)= | 1145 register const vector unsigned char vzero __asm__ ("v31") = |
1145 (const vector unsigned char)vec_splat_u8(0); | 1146 (const vector unsigned char)vec_splat_u8(0); |
1146 { | 1147 { |
1147 register const vector signed short vprod1 REG_v(v16)= | 1148 register const vector signed short vprod1 __asm__ ("v16") = |
1148 (const vector signed short){ 1,-1, 1,-1, 1,-1, 1,-1 }; | 1149 (const vector signed short){ 1,-1, 1,-1, 1,-1, 1,-1 }; |
1149 register const vector signed short vprod2 REG_v(v17)= | 1150 register const vector signed short vprod2 __asm__ ("v17") = |
1150 (const vector signed short){ 1, 1,-1,-1, 1, 1,-1,-1 }; | 1151 (const vector signed short){ 1, 1,-1,-1, 1, 1,-1,-1 }; |
1151 register const vector signed short vprod3 REG_v(v18)= | 1152 register const vector signed short vprod3 __asm__ ("v18") = |
1152 (const vector signed short){ 1, 1, 1, 1,-1,-1,-1,-1 }; | 1153 (const vector signed short){ 1, 1, 1, 1,-1,-1,-1,-1 }; |
1153 register const vector unsigned char perm1 REG_v(v19)= | 1154 register const vector unsigned char perm1 __asm__ ("v19") = |
1154 (const vector unsigned char) | 1155 (const vector unsigned char) |
1155 {0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, | 1156 {0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, |
1156 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D}; | 1157 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D}; |
1157 register const vector unsigned char perm2 REG_v(v20)= | 1158 register const vector unsigned char perm2 __asm__ ("v20") = |
1158 (const vector unsigned char) | 1159 (const vector unsigned char) |
1159 {0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, | 1160 {0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, |
1160 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B}; | 1161 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B}; |
1161 register const vector unsigned char perm3 REG_v(v21)= | 1162 register const vector unsigned char perm3 __asm__ ("v21") = |
1162 (const vector unsigned char) | 1163 (const vector unsigned char) |
1163 {0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, | 1164 {0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
1164 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; | 1165 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; |
1165 | 1166 |
1166 #define ONEITERBUTTERFLY(i, res1, res2) \ | 1167 #define ONEITERBUTTERFLY(i, res1, res2) \ |
1167 { \ | 1168 { \ |
1168 register vector unsigned char src1 REG_v(v22), \ | 1169 register vector unsigned char src1 __asm__ ("v22"), \ |
1169 src2 REG_v(v23), \ | 1170 src2 __asm__ ("v23"), \ |
1170 dst1 REG_v(v24), \ | 1171 dst1 __asm__ ("v24"), \ |
1171 dst2 REG_v(v25), \ | 1172 dst2 __asm__ ("v25"), \ |
1172 srcO REG_v(v22), \ | 1173 srcO __asm__ ("v22"), \ |
1173 dstO REG_v(v23); \ | 1174 dstO __asm__ ("v23"); \ |
1174 \ | 1175 \ |
1175 register vector signed short srcV REG_v(v24), \ | 1176 register vector signed short srcV __asm__ ("v24"), \ |
1176 dstV REG_v(v25), \ | 1177 dstV __asm__ ("v25"), \ |
1177 srcW REG_v(v26), \ | 1178 srcW __asm__ ("v26"), \ |
1178 dstW REG_v(v27), \ | 1179 dstW __asm__ ("v27"), \ |
1179 but0 REG_v(v28), \ | 1180 but0 __asm__ ("v28"), \ |
1180 but0S REG_v(v29), \ | 1181 but0S __asm__ ("v29"), \ |
1181 op1 REG_v(v30), \ | 1182 op1 __asm__ ("v30"), \ |
1182 but1 REG_v(v22), \ | 1183 but1 __asm__ ("v22"), \ |
1183 op1S REG_v(v23), \ | 1184 op1S __asm__ ("v23"), \ |
1184 but1S REG_v(v24), \ | 1185 but1S __asm__ ("v24"), \ |
1185 op2 REG_v(v25), \ | 1186 op2 __asm__ ("v25"), \ |
1186 but2 REG_v(v26), \ | 1187 but2 __asm__ ("v26"), \ |
1187 op2S REG_v(v27), \ | 1188 op2S __asm__ ("v27"), \ |
1188 but2S REG_v(v28), \ | 1189 but2S __asm__ ("v28"), \ |
1189 op3 REG_v(v29), \ | 1190 op3 __asm__ ("v29"), \ |
1190 op3S REG_v(v30); \ | 1191 op3S __asm__ ("v30"); \ |
1191 \ | 1192 \ |
1192 src1 = vec_ld(stride * i, src); \ | 1193 src1 = vec_ld(stride * i, src); \ |
1193 src2 = vec_ld((stride * i) + 16, src); \ | 1194 src2 = vec_ld((stride * i) + 16, src); \ |
1194 srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ | 1195 srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ |
1195 dst1 = vec_ld(stride * i, dst); \ | 1196 dst1 = vec_ld(stride * i, dst); \ |