# HG changeset patch # User bellard # Date 1043620187 0 # Node ID b4172ff70d27643e7e6ad19108790ec43c93a601 # Parent 8f440ca8e0b0c898579148bfd9e082b27843caaf Altivec on non darwin systems patch by Romain Dolbeau diff -r 8f440ca8e0b0 -r b4172ff70d27 Makefile --- a/Makefile Thu Jan 23 23:18:42 2003 +0000 +++ b/Makefile Sun Jan 26 22:29:47 2003 +0000 @@ -86,7 +86,11 @@ endif ifeq ($(TARGET_ALTIVEC),yes) +ifeq ($(TARGET_OS),Darwin) CFLAGS += -faltivec +else +CFLAGS += -maltivec -mabi=altivec +endif OBJS += ppc/dsputil_altivec.o ppc/mpegvideo_altivec.o ppc/idct_altivec.o \ ppc/fft_altivec.o ppc/gmc_altivec.o endif diff -r 8f440ca8e0b0 -r b4172ff70d27 dsputil.h --- a/dsputil.h Thu Jan 23 23:18:42 2003 +0000 +++ b/dsputil.h Sun Jan 26 22:29:47 2003 +0000 @@ -224,6 +224,10 @@ extern int mm_flags; +#if defined(HAVE_ALTIVEC) && !defined(CONFIG_DARWIN) +#include +#endif + #define __align8 __attribute__ ((aligned (16))) void dsputil_init_ppc(DSPContext* c, unsigned mask); diff -r 8f440ca8e0b0 -r b4172ff70d27 ppc/dsputil_altivec.c --- a/ppc/dsputil_altivec.c Thu Jan 23 23:18:42 2003 +0000 +++ b/ppc/dsputil_altivec.c Sun Jan 26 22:29:47 2003 +0000 @@ -21,22 +21,39 @@ #include "../dsputil.h" #include "dsputil_altivec.h" -#if CONFIG_DARWIN +#ifdef CONFIG_DARWIN #include -#endif +#else /* CONFIG_DARWIN */ +#include +#include + +static sigjmp_buf jmpbuf; +static volatile sig_atomic_t canjump = 0; + +static void sigill_handler (int sig) +{ + if (!canjump) { + signal (sig, SIG_DFL); + raise (sig); + } + + canjump = 0; + siglongjmp (jmpbuf, 1); +} +#endif /* CONFIG_DARWIN */ int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) { int i; int s __attribute__((aligned(16))); - const vector unsigned char zero = (const vector unsigned char)(0); + const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); vector unsigned char *tv; vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; vector unsigned int sad; vector signed int sumdiffs; s = 0; - sad = (vector unsigned int)(0); + sad = (vector unsigned int)vec_splat_u32(0); for(i=0;i<16;i++) { /* Read unaligned pixels into our vectors. The vectors are as follows: @@ -76,7 +93,7 @@ { int i; int s __attribute__((aligned(16))); - const vector unsigned char zero = (const vector unsigned char)(0); + const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); vector unsigned char *tv; vector unsigned char pix1v, pix2v, pix3v, avgv, t5; vector unsigned int sad; @@ -84,7 +101,7 @@ uint8_t *pix3 = pix2 + line_size; s = 0; - sad = (vector unsigned int)(0); + sad = (vector unsigned int)vec_splat_u32(0); /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one @@ -137,8 +154,8 @@ int i; int s __attribute__((aligned(16))); uint8_t *pix3 = pix2 + line_size; - const vector unsigned char zero = (const vector unsigned char)(0); - const vector unsigned short two = (const vector unsigned short)(2); + const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); + const vector unsigned short two = (const vector unsigned short)vec_splat_u16(2); vector unsigned char *tv, avgv, t5; vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; @@ -148,7 +165,7 @@ vector unsigned int sad; vector signed int sumdiffs; - sad = (vector unsigned int)(0); + sad = (vector unsigned int)vec_splat_u32(0); s = 0; @@ -237,13 +254,13 @@ { int i; int s __attribute__((aligned(16))); - const vector unsigned int zero = (const vector unsigned int)(0); + const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); vector unsigned char perm1, perm2, *pix1v, *pix2v; vector unsigned char t1, t2, t3,t4, t5; vector unsigned int sad; vector signed int sumdiffs; - sad = (vector unsigned int) (0); + sad = (vector unsigned int)vec_splat_u32(0); for(i=0;i<16;i++) { @@ -279,14 +296,18 @@ { int i; int s __attribute__((aligned(16))); - const vector unsigned int zero = (const vector unsigned int)(0); + const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; vector unsigned char t1, t2, t3,t4, t5; vector unsigned int sad; vector signed int sumdiffs; - sad = (vector unsigned int)(0); - permclear = (vector unsigned char) (255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); + sad = (vector unsigned int)vec_splat_u32(0); +#ifdef CONFIG_DARWIN + permclear = (vector unsigned char)(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); +#else + permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0}; +#endif for(i=0;i<8;i++) { /* Read potentially unaligned pixels into t1 and t2 @@ -323,13 +344,13 @@ { int i; int s __attribute__((aligned(16))); - const vector unsigned int zero = (const vector unsigned int)(0); + const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); vector unsigned char *tv; vector unsigned char pixv; vector unsigned int sv; vector signed int sum; - sv = (vector unsigned int)(0); + sv = (vector unsigned int)vec_splat_u32(0); s = 0; for (i = 0; i < 16; i++) { @@ -359,14 +380,18 @@ { int i; int s __attribute__((aligned(16))); - const vector unsigned int zero = (const vector unsigned int)(0); + const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; vector unsigned char t1, t2, t3,t4, t5; vector unsigned int sum; vector signed int sumsqr; - sum = (vector unsigned int)(0); - permclear = (vector unsigned char)(0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00); + sum = (vector unsigned int)vec_splat_u32(0); +#ifdef CONFIG_DARWIN + permclear = (vector unsigned char)(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); +#else + permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0}; +#endif for(i=0;i<8;i++) { /* Read potentially unaligned pixels into t1 and t2 @@ -413,13 +438,13 @@ { int i; int s __attribute__((aligned(16))); - const vector unsigned int zero = (const vector unsigned int)(0); + const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); vector unsigned char perm1, perm2, *pix1v, *pix2v; vector unsigned char t1, t2, t3,t4, t5; vector unsigned int sum; vector signed int sumsqr; - sum = (vector unsigned int)(0); + sum = (vector unsigned int)vec_splat_u32(0); for(i=0;i<16;i++) { /* Read potentially unaligned pixels into t1 and t2 */ @@ -457,7 +482,7 @@ int pix_sum_altivec(UINT8 * pix, int line_size) { - const vector unsigned int zero = (const vector unsigned int)(0); + const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); vector unsigned char perm, *pixv; vector unsigned char t1; vector unsigned int sad; @@ -466,7 +491,7 @@ int i; int s __attribute__((aligned(16))); - sad = (vector unsigned int) (0); + sad = (vector unsigned int)vec_splat_u32(0); for (i = 0; i < 16; i++) { /* Read the potentially unaligned 16 pixels into t1 */ @@ -492,7 +517,7 @@ { int i; vector unsigned char perm, bytes, *pixv; - const vector unsigned char zero = (const vector unsigned char) (0); + const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); vector signed short shorts; for(i=0;i<8;i++) @@ -519,7 +544,7 @@ { int i; vector unsigned char perm, bytes, *pixv; - const vector unsigned char zero = (const vector unsigned char) (0); + const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); vector signed short shorts1, shorts2; for(i=0;i<4;i++) @@ -830,8 +855,8 @@ blockv, temp1, temp2; register vector unsigned short pixelssum1, pixelssum2, temp3; - register const vector unsigned char vczero = (const vector unsigned char)(0); - register const vector unsigned short vctwo = (const vector unsigned short)(2); + register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); + register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); temp1 = vec_ld(0, pixels); temp2 = vec_ld(16, pixels); @@ -945,9 +970,9 @@ blockv, temp1, temp2; register vector unsigned short pixelssum1, pixelssum2, temp3; - register const vector unsigned char vczero = (const vector unsigned char)(0); - register const vector unsigned short vcone = (const vector unsigned short)(1); - register const vector unsigned short vctwo = (const vector unsigned short)(2); + register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); + register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); + register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); temp1 = vec_ld(0, pixels); temp2 = vec_ld(16, pixels); @@ -1061,8 +1086,8 @@ register vector unsigned short pixelssum1, pixelssum2, temp3, pixelssum3, pixelssum4, temp4; - register const vector unsigned char vczero = (const vector unsigned char)(0); - register const vector unsigned short vctwo = (const vector unsigned short)(2); + register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); + register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); temp1 = vec_ld(0, pixels); temp2 = vec_ld(16, pixels); @@ -1181,9 +1206,9 @@ register vector unsigned short pixelssum1, pixelssum2, temp3, pixelssum3, pixelssum4, temp4; - register const vector unsigned char vczero = (const vector unsigned char)(0); - register const vector unsigned short vcone = (const vector unsigned short)(1); - register const vector unsigned short vctwo = (const vector unsigned short)(2); + register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); + register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); + register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); temp1 = vec_ld(0, pixels); temp2 = vec_ld(16, pixels); @@ -1254,7 +1279,7 @@ int has_altivec(void) { -#if CONFIG_DARWIN +#ifdef CONFIG_DARWIN int sels[2] = {CTL_HW, HW_VECTORUNIT}; int has_vu = 0; size_t len = sizeof(has_vu); @@ -1263,6 +1288,25 @@ err = sysctl(sels, 2, &has_vu, &len, NULL, 0); if (err == 0) return (has_vu != 0); -#endif +#else /* CONFIG_DARWIN */ +/* no Darwin, do it the brute-force way */ +/* this is borrowed from the libmpeg2 library */ + { + signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, SIG_DFL); + } else { + canjump = 1; + + asm volatile ("mtspr 256, %0\n\t" + "vand %%v0, %%v0, %%v0" + : + : "r" (-1)); + + signal (SIGILL, SIG_DFL); + return 1; + } + } +#endif /* CONFIG_DARWIN */ return 0; } diff -r 8f440ca8e0b0 -r b4172ff70d27 ppc/dsputil_altivec.h --- a/ppc/dsputil_altivec.h Thu Jan 23 23:18:42 2003 +0000 +++ b/ppc/dsputil_altivec.h Sun Jan 26 22:29:47 2003 +0000 @@ -63,7 +63,11 @@ #define WORD_s2 0x18,0x19,0x1a,0x1b #define WORD_s3 0x1c,0x1d,0x1e,0x1f +#ifdef CONFIG_DARWIN #define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) +#else +#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} +#endif // vcprmle is used to keep the same index as in the SSE version. // it's the same as vcprm, with the index inversed @@ -75,7 +79,12 @@ #define FLOAT_n -1. #define FLOAT_p 1. + +#ifdef CONFIG_DARWIN #define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d) +#else +#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} +#endif #else /* HAVE_ALTIVEC */ #ifdef ALTIVEC_USE_REFERENCE_C_CODE diff -r 8f440ca8e0b0 -r b4172ff70d27 ppc/dsputil_ppc.c --- a/ppc/dsputil_ppc.c Thu Jan 23 23:18:42 2003 +0000 +++ b/ppc/dsputil_ppc.c Sun Jan 26 22:29:47 2003 +0000 @@ -120,7 +120,6 @@ ((unsigned long*)blocks)[1] = 0L; ((unsigned long*)blocks)[2] = 0L; ((unsigned long*)blocks)[3] = 0L; - vec_st((vector short)(0), 0, blocks); i += 16; } for ( ; i < sizeof(DCTELEM)*6*64 ; i += 32) { @@ -142,18 +141,17 @@ /* check dcbz report how many bytes are set to 0 by dcbz */ long check_dcbz_effect(void) { - register char *fakedata = (char*)malloc(1024); + register char *fakedata = (char*)av_malloc(1024); register char *fakedata_middle; register long zero = 0; register long i = 0; long count = 0; - if (fakedata == NULL) + if (!fakedata) { return 0L; } - fakedata_middle = (fakedata + 512); memset(fakedata, 0xFF, 1024); @@ -166,7 +164,7 @@ count++; } - free(fakedata); + av_free(fakedata); return count; } diff -r 8f440ca8e0b0 -r b4172ff70d27 ppc/fft_altivec.c --- a/ppc/fft_altivec.c Thu Jan 23 23:18:42 2003 +0000 +++ b/ppc/fft_altivec.c Sun Jan 26 22:29:47 2003 +0000 @@ -138,7 +138,11 @@ POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6); #else /* ALTIVEC_USE_REFERENCE_C_CODE */ +#ifdef CONFIG_DARWIN register const vector float vczero = (const vector float)(0.); +#else + register const vector float vczero = (const vector float){0.,0.,0.,0.}; +#endif int ln = s->nbits; int j, np, np2; diff -r 8f440ca8e0b0 -r b4172ff70d27 ppc/gmc_altivec.c --- a/ppc/gmc_altivec.c Thu Jan 23 23:18:42 2003 +0000 +++ b/ppc/gmc_altivec.c Sun Jan 26 22:29:47 2003 +0000 @@ -66,8 +66,8 @@ ( x16)*( y16), /* D */ 0, 0, 0, 0 /* padding */ }; - register const vector unsigned char vczero = (const vector unsigned char)(0); - register const vector unsigned short vcsr8 = (const vector unsigned short)(8); + register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); + register const vector unsigned short vcsr8 = (const vector unsigned short)vec_splat_u16(8); register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD; register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD; int i; diff -r 8f440ca8e0b0 -r b4172ff70d27 ppc/idct_altivec.c --- a/ppc/idct_altivec.c Thu Jan 23 23:18:42 2003 +0000 +++ b/ppc/idct_altivec.c Sun Jan 26 22:29:47 2003 +0000 @@ -151,6 +151,8 @@ vx6 = vec_sra (vy6, shift); \ vx7 = vec_sra (vy7, shift); + +#ifdef CONFIG_DARWIN static const vector_s16_t constants[5] = { (vector_s16_t)(23170, 13573, 6518, 21895, -23170, -21895, 32, 31), (vector_s16_t)(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725), @@ -158,6 +160,16 @@ (vector_s16_t)(21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692), (vector_s16_t)(19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722) }; +#else +// broken gcc +static const vector_s16_t constants[5] = { + (vector_s16_t){23170, 13573, 6518, 21895, -23170, -21895, 32, 31}, + (vector_s16_t){16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725}, + (vector_s16_t){22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521}, + (vector_s16_t){21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692}, + (vector_s16_t){19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722} +}; +#endif void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block) { diff -r 8f440ca8e0b0 -r b4172ff70d27 ppc/mpegvideo_altivec.c --- a/ppc/mpegvideo_altivec.c Thu Jan 23 23:18:42 2003 +0000 +++ b/ppc/mpegvideo_altivec.c Sun Jan 26 22:29:47 2003 +0000 @@ -90,6 +90,13 @@ vec = vec_splat(vec, 0); \ } + +#ifdef CONFIG_DARWIN +#define FOUROF(a) (a) +#else +// slower, for dumb non-apple GCC +#define FOUROF(a) {a,a,a,a} +#endif int dct_quantize_altivec(MpegEncContext* s, DCTELEM* data, int n, int qscale, int* overflow) @@ -97,7 +104,7 @@ int lastNonZero; vector float row0, row1, row2, row3, row4, row5, row6, row7; vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7; - const vector float zero = (const vector float)(0.0f); + const vector float zero = (const vector float)FOUROF(0.); // Load the data into the row/alt vectors { @@ -141,18 +148,18 @@ // in the vector local variables, as floats, which we'll use during the // quantize step... { - const vector float vec_0_298631336 = (vector float)(0.298631336f); - const vector float vec_0_390180644 = (vector float)(-0.390180644f); - const vector float vec_0_541196100 = (vector float)(0.541196100f); - const vector float vec_0_765366865 = (vector float)(0.765366865f); - const vector float vec_0_899976223 = (vector float)(-0.899976223f); - const vector float vec_1_175875602 = (vector float)(1.175875602f); - const vector float vec_1_501321110 = (vector float)(1.501321110f); - const vector float vec_1_847759065 = (vector float)(-1.847759065f); - const vector float vec_1_961570560 = (vector float)(-1.961570560f); - const vector float vec_2_053119869 = (vector float)(2.053119869f); - const vector float vec_2_562915447 = (vector float)(-2.562915447f); - const vector float vec_3_072711026 = (vector float)(3.072711026f); + const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f); + const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f); + const vector float vec_0_541196100 = (vector float)FOUROF(0.541196100f); + const vector float vec_0_765366865 = (vector float)FOUROF(0.765366865f); + const vector float vec_0_899976223 = (vector float)FOUROF(-0.899976223f); + const vector float vec_1_175875602 = (vector float)FOUROF(1.175875602f); + const vector float vec_1_501321110 = (vector float)FOUROF(1.501321110f); + const vector float vec_1_847759065 = (vector float)FOUROF(-1.847759065f); + const vector float vec_1_961570560 = (vector float)FOUROF(-1.961570560f); + const vector float vec_2_053119869 = (vector float)FOUROF(2.053119869f); + const vector float vec_2_562915447 = (vector float)FOUROF(-2.562915447f); + const vector float vec_3_072711026 = (vector float)FOUROF(3.072711026f); int whichPass, whichHalf; @@ -306,7 +313,7 @@ // rounding when we convert to int, instead of flooring.) { vector signed int biasInt; - const vector float negOneFloat = (vector float)(-1.0f); + const vector float negOneFloat = (vector float)FOUROF(-1.0f); LOAD4(biasInt, biasAddr); bias = vec_ctf(biasInt, QUANT_BIAS_SHIFT); negBias = vec_madd(bias, negOneFloat, zero); @@ -503,6 +510,7 @@ return lastNonZero; } +#undef FOUROF /* AltiVec version of dct_unquantize_h263 @@ -551,7 +559,7 @@ } #else /* ALTIVEC_USE_REFERENCE_C_CODE */ { - register const vector short vczero = (const vector short)(0); + register const vector short vczero = (const vector short)vec_splat_s16(0); short __attribute__ ((aligned(16))) qmul8[] = { qmul, qmul, qmul, qmul,