comparison ppc/dsputil_altivec.c @ 9421:dd2b5e52336a libavcodec

Remove gcc_fixes.h. It only contains workarounds for unsupported gcc versions.
author diego
date Sun, 12 Apr 2009 21:45:26 +0000
parents 68e959302527
children 5a730e38f1b3
comparison
equal deleted inserted replaced
9420:5b68d22e5ec9 9421:dd2b5e52336a
18 * You should have received a copy of the GNU Lesser General Public 18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software 19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */ 21 */
22 22
23 #include "config.h"
24 #if HAVE_ALTIVEC_H
25 #include <altivec.h>
26 #endif
23 #include "libavcodec/dsputil.h" 27 #include "libavcodec/dsputil.h"
24
25 #include "gcc_fixes.h"
26
27 #include "dsputil_ppc.h" 28 #include "dsputil_ppc.h"
28 #include "util_altivec.h" 29 #include "util_altivec.h"
29 #include "types_altivec.h" 30 #include "types_altivec.h"
30 31
31 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 32 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1122 */ 1123 */
1123 1124
1124 static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { 1125 static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) {
1125 int sum; 1126 int sum;
1126 register vector signed short 1127 register vector signed short
1127 temp0 REG_v(v0), 1128 temp0 __asm__ ("v0"),
1128 temp1 REG_v(v1), 1129 temp1 __asm__ ("v1"),
1129 temp2 REG_v(v2), 1130 temp2 __asm__ ("v2"),
1130 temp3 REG_v(v3), 1131 temp3 __asm__ ("v3"),
1131 temp4 REG_v(v4), 1132 temp4 __asm__ ("v4"),
1132 temp5 REG_v(v5), 1133 temp5 __asm__ ("v5"),
1133 temp6 REG_v(v6), 1134 temp6 __asm__ ("v6"),
1134 temp7 REG_v(v7); 1135 temp7 __asm__ ("v7");
1135 register vector signed short 1136 register vector signed short
1136 temp0S REG_v(v8), 1137 temp0S __asm__ ("v8"),
1137 temp1S REG_v(v9), 1138 temp1S __asm__ ("v9"),
1138 temp2S REG_v(v10), 1139 temp2S __asm__ ("v10"),
1139 temp3S REG_v(v11), 1140 temp3S __asm__ ("v11"),
1140 temp4S REG_v(v12), 1141 temp4S __asm__ ("v12"),
1141 temp5S REG_v(v13), 1142 temp5S __asm__ ("v13"),
1142 temp6S REG_v(v14), 1143 temp6S __asm__ ("v14"),
1143 temp7S REG_v(v15); 1144 temp7S __asm__ ("v15");
1144 register const vector unsigned char vzero REG_v(v31)= 1145 register const vector unsigned char vzero __asm__ ("v31") =
1145 (const vector unsigned char)vec_splat_u8(0); 1146 (const vector unsigned char)vec_splat_u8(0);
1146 { 1147 {
1147 register const vector signed short vprod1 REG_v(v16)= 1148 register const vector signed short vprod1 __asm__ ("v16") =
1148 (const vector signed short){ 1,-1, 1,-1, 1,-1, 1,-1 }; 1149 (const vector signed short){ 1,-1, 1,-1, 1,-1, 1,-1 };
1149 register const vector signed short vprod2 REG_v(v17)= 1150 register const vector signed short vprod2 __asm__ ("v17") =
1150 (const vector signed short){ 1, 1,-1,-1, 1, 1,-1,-1 }; 1151 (const vector signed short){ 1, 1,-1,-1, 1, 1,-1,-1 };
1151 register const vector signed short vprod3 REG_v(v18)= 1152 register const vector signed short vprod3 __asm__ ("v18") =
1152 (const vector signed short){ 1, 1, 1, 1,-1,-1,-1,-1 }; 1153 (const vector signed short){ 1, 1, 1, 1,-1,-1,-1,-1 };
1153 register const vector unsigned char perm1 REG_v(v19)= 1154 register const vector unsigned char perm1 __asm__ ("v19") =
1154 (const vector unsigned char) 1155 (const vector unsigned char)
1155 {0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, 1156 {0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
1156 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D}; 1157 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D};
1157 register const vector unsigned char perm2 REG_v(v20)= 1158 register const vector unsigned char perm2 __asm__ ("v20") =
1158 (const vector unsigned char) 1159 (const vector unsigned char)
1159 {0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 1160 {0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
1160 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B}; 1161 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B};
1161 register const vector unsigned char perm3 REG_v(v21)= 1162 register const vector unsigned char perm3 __asm__ ("v21") =
1162 (const vector unsigned char) 1163 (const vector unsigned char)
1163 {0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 1164 {0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
1164 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; 1165 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
1165 1166
1166 #define ONEITERBUTTERFLY(i, res1, res2) \ 1167 #define ONEITERBUTTERFLY(i, res1, res2) \
1167 { \ 1168 { \
1168 register vector unsigned char src1 REG_v(v22), \ 1169 register vector unsigned char src1 __asm__ ("v22"), \
1169 src2 REG_v(v23), \ 1170 src2 __asm__ ("v23"), \
1170 dst1 REG_v(v24), \ 1171 dst1 __asm__ ("v24"), \
1171 dst2 REG_v(v25), \ 1172 dst2 __asm__ ("v25"), \
1172 srcO REG_v(v22), \ 1173 srcO __asm__ ("v22"), \
1173 dstO REG_v(v23); \ 1174 dstO __asm__ ("v23"); \
1174 \ 1175 \
1175 register vector signed short srcV REG_v(v24), \ 1176 register vector signed short srcV __asm__ ("v24"), \
1176 dstV REG_v(v25), \ 1177 dstV __asm__ ("v25"), \
1177 srcW REG_v(v26), \ 1178 srcW __asm__ ("v26"), \
1178 dstW REG_v(v27), \ 1179 dstW __asm__ ("v27"), \
1179 but0 REG_v(v28), \ 1180 but0 __asm__ ("v28"), \
1180 but0S REG_v(v29), \ 1181 but0S __asm__ ("v29"), \
1181 op1 REG_v(v30), \ 1182 op1 __asm__ ("v30"), \
1182 but1 REG_v(v22), \ 1183 but1 __asm__ ("v22"), \
1183 op1S REG_v(v23), \ 1184 op1S __asm__ ("v23"), \
1184 but1S REG_v(v24), \ 1185 but1S __asm__ ("v24"), \
1185 op2 REG_v(v25), \ 1186 op2 __asm__ ("v25"), \
1186 but2 REG_v(v26), \ 1187 but2 __asm__ ("v26"), \
1187 op2S REG_v(v27), \ 1188 op2S __asm__ ("v27"), \
1188 but2S REG_v(v28), \ 1189 but2S __asm__ ("v28"), \
1189 op3 REG_v(v29), \ 1190 op3 __asm__ ("v29"), \
1190 op3S REG_v(v30); \ 1191 op3S __asm__ ("v30"); \
1191 \ 1192 \
1192 src1 = vec_ld(stride * i, src); \ 1193 src1 = vec_ld(stride * i, src); \
1193 src2 = vec_ld((stride * i) + 16, src); \ 1194 src2 = vec_ld((stride * i) + 16, src); \
1194 srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ 1195 srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
1195 dst1 = vec_ld(stride * i, dst); \ 1196 dst1 = vec_ld(stride * i, dst); \