Mercurial > libavcodec.hg
annotate ppc/dsputil_altivec.h @ 1553:541681146f83 libavcodec
move q_*_matrix out of MpegEncContext (40k ->23k) dct_quantize() is even slightly faster now, dont ask my why ...
author | michael |
---|---|
date | Wed, 22 Oct 2003 10:59:39 +0000 |
parents | b32afefe7d33 |
children | dea5b2946999 |
rev | line source |
---|---|
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
1 /* |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
2 * Copyright (c) 2002 Brian Foley |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
3 * Copyright (c) 2002 Dieter Shirley |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
4 * |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
5 * This library is free software; you can redistribute it and/or |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
6 * modify it under the terms of the GNU Lesser General Public |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
7 * License as published by the Free Software Foundation; either |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
8 * version 2 of the License, or (at your option) any later version. |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
9 * |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
10 * This library is distributed in the hope that it will be useful, |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
13 * Lesser General Public License for more details. |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
14 * |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
15 * You should have received a copy of the GNU Lesser General Public |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
16 * License along with this library; if not, write to the Free Software |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
638
diff
changeset
|
18 */ |
878
6ea69518e5f7
altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
828
diff
changeset
|
19 |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
20 #ifndef _DSPUTIL_ALTIVEC_ |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
21 #define _DSPUTIL_ALTIVEC_ |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
22 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
23 #include "dsputil_ppc.h" |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
24 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
25 #ifdef HAVE_ALTIVEC |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
26 |
878
6ea69518e5f7
altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
828
diff
changeset
|
27 extern int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); |
6ea69518e5f7
altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
828
diff
changeset
|
28 extern int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); |
6ea69518e5f7
altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
828
diff
changeset
|
29 extern int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
30 extern int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); |
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
31 extern int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
32 extern int sad16x16_altivec(void *s, uint8_t *a, uint8_t *b, int stride); |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
33 extern int sad8x8_altivec(void *s, uint8_t *a, uint8_t *b, int stride); |
878
6ea69518e5f7
altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
828
diff
changeset
|
34 extern int pix_norm1_altivec(uint8_t *pix, int line_size); |
981 | 35 extern int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size); |
36 extern int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size); | |
1064 | 37 extern int pix_sum_altivec(uint8_t * pix, int line_size); |
38 extern void diff_pixels_altivec(DCTELEM* block, const uint8_t* s1, const uint8_t* s2, int stride); | |
39 extern void get_pixels_altivec(DCTELEM* block, const uint8_t * pixels, int line_size); | |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
40 |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
41 extern void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w); |
1064 | 42 extern void put_pixels_clamped_altivec(const DCTELEM *block, uint8_t *restrict pixels, int line_size); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
43 extern void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
44 extern void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
45 extern void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h); |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
46 extern void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
47 extern void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); |
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
48 extern void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h); |
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
49 extern void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h); |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
50 |
1064 | 51 extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder); |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
52 |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
53 extern int has_altivec(void); |
981 | 54 |
55 // used to build registers permutation vectors (vcprm) | |
56 // the 's' are for words in the _s_econd vector | |
57 #define WORD_0 0x00,0x01,0x02,0x03 | |
58 #define WORD_1 0x04,0x05,0x06,0x07 | |
59 #define WORD_2 0x08,0x09,0x0a,0x0b | |
60 #define WORD_3 0x0c,0x0d,0x0e,0x0f | |
61 #define WORD_s0 0x10,0x11,0x12,0x13 | |
62 #define WORD_s1 0x14,0x15,0x16,0x17 | |
63 #define WORD_s2 0x18,0x19,0x1a,0x1b | |
64 #define WORD_s3 0x1c,0x1d,0x1e,0x1f | |
65 | |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
66 #ifdef CONFIG_DARWIN |
981 | 67 #define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
68 #else |
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
69 #define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} |
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
70 #endif |
981 | 71 |
72 // vcprmle is used to keep the same index as in the SSE version. | |
73 // it's the same as vcprm, with the index inversed | |
74 // ('le' is Little Endian) | |
75 #define vcprmle(a,b,c,d) vcprm(d,c,b,a) | |
76 | |
77 // used to build inverse/identity vectors (vcii) | |
78 // n is _n_egative, p is _p_ositive | |
79 #define FLOAT_n -1. | |
80 #define FLOAT_p 1. | |
81 | |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
82 |
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
83 #ifdef CONFIG_DARWIN |
981 | 84 #define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d) |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
85 #else |
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
86 #define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} |
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
87 #endif |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
88 |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
89 #else /* HAVE_ALTIVEC */ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
90 #ifdef ALTIVEC_USE_REFERENCE_C_CODE |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
91 #error "I can't use ALTIVEC_USE_REFERENCE_C_CODE if I don't use HAVE_ALTIVEC" |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
92 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
93 #endif /* HAVE_ALTIVEC */ |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
94 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
95 #endif /* _DSPUTIL_ALTIVEC_ */ |