annotate ppc/h264_altivec.c @ 12454:f4355cd85faa libavcodec

Port latest x264 deblock asm (before they moved to using NV12 as internal format), LGPL'ed with permission from Jason and Loren. This includes mmx2 code, so remove inline asm from h264dsp_mmx.c accordingly.
author rbultje
date Fri, 03 Sep 2010 16:52:46 +0000
parents 3cd4cd0509cd
children 06abedae2906
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
1 /*
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
3 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3667
diff changeset
4 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3667
diff changeset
5 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3667
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3667
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
10 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3667
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
14 * Lesser General Public License for more details.
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
15 *
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3667
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
19 */
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
20
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 6077
diff changeset
21 #include "libavcodec/dsputil.h"
8461
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
22 #include "libavcodec/h264data.h"
11499
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
23 #include "libavcodec/h264dsp.h"
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
24
6077
8baa533764d4 Add necessary #include, fixes the warnings:
diego
parents: 5750
diff changeset
25 #include "dsputil_altivec.h"
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents: 5586
diff changeset
26 #include "util_altivec.h"
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
27 #include "types_altivec.h"
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
28
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
29 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
30 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
31
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
32 #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
33 #define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9439
diff changeset
34 #define PREFIX_no_rnd_vc1_chroma_mc8_altivec put_no_rnd_vc1_chroma_mc8_altivec
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
35 #define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
36 #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
37 #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
38 #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
39 #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
40 #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
41 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
3577
5be5a936c8a9 Clean up:make dsputil subfile names consistent
lu_zero
parents: 3544
diff changeset
42 #include "h264_template_altivec.c"
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
43 #undef OP_U8_ALTIVEC
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
44 #undef PREFIX_h264_chroma_mc8_altivec
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9439
diff changeset
45 #undef PREFIX_no_rnd_vc1_chroma_mc8_altivec
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
46 #undef PREFIX_h264_chroma_mc8_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
47 #undef PREFIX_h264_qpel16_h_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
48 #undef PREFIX_h264_qpel16_h_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
49 #undef PREFIX_h264_qpel16_v_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
50 #undef PREFIX_h264_qpel16_v_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
51 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
52 #undef PREFIX_h264_qpel16_hv_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
53
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
54 #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
55 #define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9439
diff changeset
56 #define PREFIX_no_rnd_vc1_chroma_mc8_altivec avg_no_rnd_vc1_chroma_mc8_altivec
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
57 #define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
58 #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
59 #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
60 #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
61 #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
62 #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
63 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
3577
5be5a936c8a9 Clean up:make dsputil subfile names consistent
lu_zero
parents: 3544
diff changeset
64 #include "h264_template_altivec.c"
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
65 #undef OP_U8_ALTIVEC
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
66 #undef PREFIX_h264_chroma_mc8_altivec
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9439
diff changeset
67 #undef PREFIX_no_rnd_vc1_chroma_mc8_altivec
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
68 #undef PREFIX_h264_chroma_mc8_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
69 #undef PREFIX_h264_qpel16_h_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
70 #undef PREFIX_h264_qpel16_h_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
71 #undef PREFIX_h264_qpel16_v_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
72 #undef PREFIX_h264_qpel16_v_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
73 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
74 #undef PREFIX_h264_qpel16_hv_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
75
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
76 #define H264_MC(OPNAME, SIZE, CODETYPE) \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
77 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
78 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
79 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
80 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
81 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
82 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
83 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
84 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
85 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
86 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
87 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
88 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
89 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
90 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
91 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
92 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
93 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
94 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
95 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
96 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
97 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
98 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
99 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
100 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
101 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
102 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
103 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
104 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
105 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
106 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
107 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
108 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
109 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
110 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
111 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
112 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
113 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
114 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
115 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
116 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
117 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
118 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
119 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
120 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
121 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
122 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
123 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
124 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
125 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
126 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
127 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
128 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
129 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
130 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
131 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
132 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
133 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
134 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
135 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
136 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
137 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
138 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
139 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
140 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
141 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
142 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
143 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
144 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
145 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
146 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
147 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
148 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
149 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
150 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
151 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
152 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
153 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
154 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
155 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
156 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
157 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
158 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
159 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
160 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
161 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
162 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
163 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
164 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
165 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
166 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
167 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
168 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
169 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
170 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
171 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
172 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
173 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
174 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
175 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
176 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
177 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
178 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
179 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
180 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
181 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
182 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
183 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
184 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
185
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
186 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
187 const uint8_t * src2, int dst_stride,
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
188 int src_stride1, int h)
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
189 {
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
190 int i;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
191 vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
192
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
193 mask_ = vec_lvsl(0, src2);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
194
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
195 for (i = 0; i < h; i++) {
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
196
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
197 tmp1 = vec_ld(i * src_stride1, src1);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
198 mask = vec_lvsl(i * src_stride1, src1);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
199 tmp2 = vec_ld(i * src_stride1 + 15, src1);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
200
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
201 a = vec_perm(tmp1, tmp2, mask);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
202
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
203 tmp1 = vec_ld(i * 16, src2);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
204 tmp2 = vec_ld(i * 16 + 15, src2);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
205
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
206 b = vec_perm(tmp1, tmp2, mask_);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
207
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
208 tmp1 = vec_ld(0, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
209 mask = vec_lvsl(0, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
210 tmp2 = vec_ld(15, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
211
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
212 d = vec_avg(a, b);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
213
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
214 edges = vec_perm(tmp2, tmp1, mask);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
215
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
216 align = vec_lvsr(0, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
217
3659
dd55fb216497 Proper fix for the corner case that would have been corrected before, praise&blame to me and exg in equal shares
lu_zero
parents: 3658
diff changeset
218 tmp2 = vec_perm(d, edges, align);
3658
2a113750d778 Revert previous commit
lu_zero
parents: 3583
diff changeset
219 tmp1 = vec_perm(edges, d, align);
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
220
3659
dd55fb216497 Proper fix for the corner case that would have been corrected before, praise&blame to me and exg in equal shares
lu_zero
parents: 3658
diff changeset
221 vec_st(tmp2, 15, dst);
3583
562758eaf7bf 10l, thanks to Emanuele Giaquinta <exg@gentoo.org> for testing and finding the issue
lu_zero
parents: 3577
diff changeset
222 vec_st(tmp1, 0 , dst);
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
223
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
224 dst += dst_stride;
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
225 }
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
226 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
227
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
228 static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
229 const uint8_t * src2, int dst_stride,
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
230 int src_stride1, int h)
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
231 {
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
232 int i;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
233 vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
234
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
235 mask_ = vec_lvsl(0, src2);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
236
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
237 for (i = 0; i < h; i++) {
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
238
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
239 tmp1 = vec_ld(i * src_stride1, src1);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
240 mask = vec_lvsl(i * src_stride1, src1);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
241 tmp2 = vec_ld(i * src_stride1 + 15, src1);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
242
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
243 a = vec_perm(tmp1, tmp2, mask);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
244
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
245 tmp1 = vec_ld(i * 16, src2);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
246 tmp2 = vec_ld(i * 16 + 15, src2);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
247
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
248 b = vec_perm(tmp1, tmp2, mask_);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
249
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
250 tmp1 = vec_ld(0, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
251 mask = vec_lvsl(0, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
252 tmp2 = vec_ld(15, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
253
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
254 d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
255
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
256 edges = vec_perm(tmp2, tmp1, mask);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
257
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
258 align = vec_lvsr(0, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
259
3659
dd55fb216497 Proper fix for the corner case that would have been corrected before, praise&blame to me and exg in equal shares
lu_zero
parents: 3658
diff changeset
260 tmp2 = vec_perm(d, edges, align);
3658
2a113750d778 Revert previous commit
lu_zero
parents: 3583
diff changeset
261 tmp1 = vec_perm(edges, d, align);
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
262
3659
dd55fb216497 Proper fix for the corner case that would have been corrected before, praise&blame to me and exg in equal shares
lu_zero
parents: 3658
diff changeset
263 vec_st(tmp2, 15, dst);
3583
562758eaf7bf 10l, thanks to Emanuele Giaquinta <exg@gentoo.org> for testing and finding the issue
lu_zero
parents: 3577
diff changeset
264 vec_st(tmp1, 0 , dst);
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
265
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
266 dst += dst_stride;
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
267 }
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
268 }
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
269
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
270 /* Implemented but could be faster
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
271 #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
272 #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
273 */
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
274
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
275 H264_MC(put_, 16, altivec)
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
276 H264_MC(avg_, 16, altivec)
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
277
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
278
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
279 /****************************************************************************
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
280 * IDCT transform:
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
281 ****************************************************************************/
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
282
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
283 #define VEC_1D_DCT(vb0,vb1,vb2,vb3,va0,va1,va2,va3) \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
284 /* 1st stage */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
285 vz0 = vec_add(vb0,vb2); /* temp[0] = Y[0] + Y[2] */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
286 vz1 = vec_sub(vb0,vb2); /* temp[1] = Y[0] - Y[2] */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
287 vz2 = vec_sra(vb1,vec_splat_u16(1)); \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
288 vz2 = vec_sub(vz2,vb3); /* temp[2] = Y[1].1/2 - Y[3] */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
289 vz3 = vec_sra(vb3,vec_splat_u16(1)); \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
290 vz3 = vec_add(vb1,vz3); /* temp[3] = Y[1] + Y[3].1/2 */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
291 /* 2nd stage: output */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
292 va0 = vec_add(vz0,vz3); /* x[0] = temp[0] + temp[3] */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
293 va1 = vec_add(vz1,vz2); /* x[1] = temp[1] + temp[2] */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
294 va2 = vec_sub(vz1,vz2); /* x[2] = temp[1] - temp[2] */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
295 va3 = vec_sub(vz0,vz3) /* x[3] = temp[0] - temp[3] */
5094
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
296
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
297 #define VEC_TRANSPOSE_4(a0,a1,a2,a3,b0,b1,b2,b3) \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
298 b0 = vec_mergeh( a0, a0 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
299 b1 = vec_mergeh( a1, a0 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
300 b2 = vec_mergeh( a2, a0 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
301 b3 = vec_mergeh( a3, a0 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
302 a0 = vec_mergeh( b0, b2 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
303 a1 = vec_mergel( b0, b2 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
304 a2 = vec_mergeh( b1, b3 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
305 a3 = vec_mergel( b1, b3 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
306 b0 = vec_mergeh( a0, a2 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
307 b1 = vec_mergel( a0, a2 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
308 b2 = vec_mergeh( a1, a3 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
309 b3 = vec_mergel( a1, a3 )
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
310
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
311 #define VEC_LOAD_U8_ADD_S16_STORE_U8(va) \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
312 vdst_orig = vec_ld(0, dst); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
313 vdst = vec_perm(vdst_orig, zero_u8v, vdst_mask); \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
314 vdst_ss = (vec_s16) vec_mergeh(zero_u8v, vdst); \
5094
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
315 va = vec_add(va, vdst_ss); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
316 va_u8 = vec_packsu(va, zero_s16v); \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
317 va_u32 = vec_splat((vec_u32)va_u8, 0); \
5094
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
318 vec_ste(va_u32, element, (uint32_t*)dst);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
319
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
320 static void ff_h264_idct_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
321 {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
322 vec_s16 va0, va1, va2, va3;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
323 vec_s16 vz0, vz1, vz2, vz3;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
324 vec_s16 vtmp0, vtmp1, vtmp2, vtmp3;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
325 vec_u8 va_u8;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
326 vec_u32 va_u32;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
327 vec_s16 vdst_ss;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
328 const vec_u16 v6us = vec_splat_u16(6);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
329 vec_u8 vdst, vdst_orig;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
330 vec_u8 vdst_mask = vec_lvsl(0, dst);
5094
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
331 int element = ((unsigned long)dst & 0xf) >> 2;
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
332 LOAD_ZERO;
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
333
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
334 block[0] += 32; /* add 32 as a DC-level for rounding */
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
335
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
336 vtmp0 = vec_ld(0,block);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
337 vtmp1 = vec_sld(vtmp0, vtmp0, 8);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
338 vtmp2 = vec_ld(16,block);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
339 vtmp3 = vec_sld(vtmp2, vtmp2, 8);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
340
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
341 VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
342 VEC_TRANSPOSE_4(va0,va1,va2,va3,vtmp0,vtmp1,vtmp2,vtmp3);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
343 VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
344
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
345 va0 = vec_sra(va0,v6us);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
346 va1 = vec_sra(va1,v6us);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
347 va2 = vec_sra(va2,v6us);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
348 va3 = vec_sra(va3,v6us);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
349
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
350 VEC_LOAD_U8_ADD_S16_STORE_U8(va0);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
351 dst += stride;
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
352 VEC_LOAD_U8_ADD_S16_STORE_U8(va1);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
353 dst += stride;
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
354 VEC_LOAD_U8_ADD_S16_STORE_U8(va2);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
355 dst += stride;
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
356 VEC_LOAD_U8_ADD_S16_STORE_U8(va3);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
357 }
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
358
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
359 #define IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7) {\
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
360 /* a0 = SRC(0) + SRC(4); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
361 vec_s16 a0v = vec_add(s0, s4); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
362 /* a2 = SRC(0) - SRC(4); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
363 vec_s16 a2v = vec_sub(s0, s4); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
364 /* a4 = (SRC(2)>>1) - SRC(6); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
365 vec_s16 a4v = vec_sub(vec_sra(s2, onev), s6); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
366 /* a6 = (SRC(6)>>1) + SRC(2); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
367 vec_s16 a6v = vec_add(vec_sra(s6, onev), s2); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
368 /* b0 = a0 + a6; */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
369 vec_s16 b0v = vec_add(a0v, a6v); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
370 /* b2 = a2 + a4; */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
371 vec_s16 b2v = vec_add(a2v, a4v); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
372 /* b4 = a2 - a4; */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
373 vec_s16 b4v = vec_sub(a2v, a4v); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
374 /* b6 = a0 - a6; */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
375 vec_s16 b6v = vec_sub(a0v, a6v); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
376 /* a1 = SRC(5) - SRC(3) - SRC(7) - (SRC(7)>>1); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
377 /* a1 = (SRC(5)-SRC(3)) - (SRC(7) + (SRC(7)>>1)); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
378 vec_s16 a1v = vec_sub( vec_sub(s5, s3), vec_add(s7, vec_sra(s7, onev)) ); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
379 /* a3 = SRC(7) + SRC(1) - SRC(3) - (SRC(3)>>1); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
380 /* a3 = (SRC(7)+SRC(1)) - (SRC(3) + (SRC(3)>>1)); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
381 vec_s16 a3v = vec_sub( vec_add(s7, s1), vec_add(s3, vec_sra(s3, onev)) );\
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
382 /* a5 = SRC(7) - SRC(1) + SRC(5) + (SRC(5)>>1); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
383 /* a5 = (SRC(7)-SRC(1)) + SRC(5) + (SRC(5)>>1); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
384 vec_s16 a5v = vec_add( vec_sub(s7, s1), vec_add(s5, vec_sra(s5, onev)) );\
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
385 /* a7 = SRC(5)+SRC(3) + SRC(1) + (SRC(1)>>1); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
386 vec_s16 a7v = vec_add( vec_add(s5, s3), vec_add(s1, vec_sra(s1, onev)) );\
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
387 /* b1 = (a7>>2) + a1; */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
388 vec_s16 b1v = vec_add( vec_sra(a7v, twov), a1v); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
389 /* b3 = a3 + (a5>>2); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
390 vec_s16 b3v = vec_add(a3v, vec_sra(a5v, twov)); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
391 /* b5 = (a3>>2) - a5; */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
392 vec_s16 b5v = vec_sub( vec_sra(a3v, twov), a5v); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
393 /* b7 = a7 - (a1>>2); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
394 vec_s16 b7v = vec_sub( a7v, vec_sra(a1v, twov)); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
395 /* DST(0, b0 + b7); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
396 d0 = vec_add(b0v, b7v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
397 /* DST(1, b2 + b5); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
398 d1 = vec_add(b2v, b5v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
399 /* DST(2, b4 + b3); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
400 d2 = vec_add(b4v, b3v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
401 /* DST(3, b6 + b1); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
402 d3 = vec_add(b6v, b1v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
403 /* DST(4, b6 - b1); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
404 d4 = vec_sub(b6v, b1v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
405 /* DST(5, b4 - b3); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
406 d5 = vec_sub(b4v, b3v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
407 /* DST(6, b2 - b5); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
408 d6 = vec_sub(b2v, b5v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
409 /* DST(7, b0 - b7); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
410 d7 = vec_sub(b0v, b7v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
411 }
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
412
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
413 #define ALTIVEC_STORE_SUM_CLIP(dest, idctv, perm_ldv, perm_stv, sel) { \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
414 /* unaligned load */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
415 vec_u8 hv = vec_ld( 0, dest ); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
416 vec_u8 lv = vec_ld( 7, dest ); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
417 vec_u8 dstv = vec_perm( hv, lv, (vec_u8)perm_ldv ); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
418 vec_s16 idct_sh6 = vec_sra(idctv, sixv); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
419 vec_u16 dst16 = (vec_u16)vec_mergeh(zero_u8v, dstv); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
420 vec_s16 idstsum = vec_adds(idct_sh6, (vec_s16)dst16); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
421 vec_u8 idstsum8 = vec_packsu(zero_s16v, idstsum); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
422 vec_u8 edgehv; \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
423 /* unaligned store */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
424 vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
425 vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
426 lv = vec_sel( lv, bodyv, edgelv ); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
427 vec_st( lv, 7, dest ); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
428 hv = vec_ld( 0, dest ); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
429 edgehv = vec_perm( zero_u8v, sel, perm_stv ); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
430 hv = vec_sel( hv, bodyv, edgehv ); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
431 vec_st( hv, 0, dest ); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
432 }
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
433
11382
50415a8f1451 PPC: move prototypes to headers and make some functions static
mru
parents: 11369
diff changeset
434 static void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
435 vec_s16 s0, s1, s2, s3, s4, s5, s6, s7;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
436 vec_s16 d0, d1, d2, d3, d4, d5, d6, d7;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
437 vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
438
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
439 vec_u8 perm_ldv = vec_lvsl(0, dst);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
440 vec_u8 perm_stv = vec_lvsr(8, dst);
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
441
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
442 const vec_u16 onev = vec_splat_u16(1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
443 const vec_u16 twov = vec_splat_u16(2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
444 const vec_u16 sixv = vec_splat_u16(6);
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
445
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
446 const vec_u8 sel = (vec_u8) {0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1};
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
447 LOAD_ZERO;
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
448
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
449 dct[0] += 32; // rounding for the >>6 at the end
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
450
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
451 s0 = vec_ld(0x00, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
452 s1 = vec_ld(0x10, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
453 s2 = vec_ld(0x20, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
454 s3 = vec_ld(0x30, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
455 s4 = vec_ld(0x40, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
456 s5 = vec_ld(0x50, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
457 s6 = vec_ld(0x60, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
458 s7 = vec_ld(0x70, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
459
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
460 IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
461 d0, d1, d2, d3, d4, d5, d6, d7);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
462
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
463 TRANSPOSE8( d0, d1, d2, d3, d4, d5, d6, d7 );
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
464
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
465 IDCT8_1D_ALTIVEC(d0, d1, d2, d3, d4, d5, d6, d7,
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
466 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
467
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
468 ALTIVEC_STORE_SUM_CLIP(&dst[0*stride], idct0, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
469 ALTIVEC_STORE_SUM_CLIP(&dst[1*stride], idct1, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
470 ALTIVEC_STORE_SUM_CLIP(&dst[2*stride], idct2, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
471 ALTIVEC_STORE_SUM_CLIP(&dst[3*stride], idct3, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
472 ALTIVEC_STORE_SUM_CLIP(&dst[4*stride], idct4, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
473 ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
474 ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
475 ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
476 }
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
477
8530
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
478 static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *block, int stride, int size)
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
479 {
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
480 vec_s16 dc16;
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
481 vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
482 LOAD_ZERO;
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
483 DECLARE_ALIGNED(16, int, dc);
8530
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
484 int i;
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
485
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
486 dc = (block[0] + 32) >> 6;
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
487 dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
488
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
489 if (size == 4)
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
490 dc16 = vec_sld(dc16, zero_s16v, 8);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
491 dcplus = vec_packsu(dc16, zero_s16v);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
492 dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
493
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
494 aligner = vec_lvsr(0, dst);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
495 dcplus = vec_perm(dcplus, dcplus, aligner);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
496 dcminus = vec_perm(dcminus, dcminus, aligner);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
497
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
498 for (i = 0; i < size; i += 4) {
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
499 v0 = vec_ld(0, dst+0*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
500 v1 = vec_ld(0, dst+1*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
501 v2 = vec_ld(0, dst+2*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
502 v3 = vec_ld(0, dst+3*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
503
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
504 v0 = vec_adds(v0, dcplus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
505 v1 = vec_adds(v1, dcplus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
506 v2 = vec_adds(v2, dcplus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
507 v3 = vec_adds(v3, dcplus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
508
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
509 v0 = vec_subs(v0, dcminus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
510 v1 = vec_subs(v1, dcminus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
511 v2 = vec_subs(v2, dcminus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
512 v3 = vec_subs(v3, dcminus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
513
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
514 vec_st(v0, 0, dst+0*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
515 vec_st(v1, 0, dst+1*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
516 vec_st(v2, 0, dst+2*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
517 vec_st(v3, 0, dst+3*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
518
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
519 dst += 4*stride;
8461
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
520 }
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
521 }
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
522
8530
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
523 static void h264_idct_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
524 {
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
525 h264_idct_dc_add_internal(dst, block, stride, 4);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
526 }
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
527
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
528 static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
529 {
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
530 h264_idct_dc_add_internal(dst, block, stride, 8);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
531 }
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
532
8544
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
533 static void ff_h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
534 int i;
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
535 for(i=0; i<16; i++){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
536 int nnz = nnzc[ scan8[i] ];
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
537 if(nnz){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
538 if(nnz==1 && block[i*16]) h264_idct_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
539 else ff_h264_idct_add_altivec(dst + block_offset[i], block + i*16, stride);
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
540 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
541 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
542 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
543
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
544 static void ff_h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
545 int i;
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
546 for(i=0; i<16; i++){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
547 if(nnzc[ scan8[i] ]) ff_h264_idct_add_altivec(dst + block_offset[i], block + i*16, stride);
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
548 else if(block[i*16]) h264_idct_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
549 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
550 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
551
8461
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
552 static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
553 int i;
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
554 for(i=0; i<16; i+=4){
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
555 int nnz = nnzc[ scan8[i] ];
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
556 if(nnz){
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
557 if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
558 else ff_h264_idct8_add_altivec (dst + block_offset[i], block + i*16, stride);
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
559 }
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
560 }
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
561 }
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
562
8544
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
563 static void ff_h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
564 int i;
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
565 for(i=16; i<16+8; i++){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
566 if(nnzc[ scan8[i] ])
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
567 ff_h264_idct_add_altivec(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
568 else if(block[i*16])
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
569 h264_idct_dc_add_altivec(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
570 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
571 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
572
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
573 #define transpose4x16(r0, r1, r2, r3) { \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
574 register vec_u8 r4; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
575 register vec_u8 r5; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
576 register vec_u8 r6; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
577 register vec_u8 r7; \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
578 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
579 r4 = vec_mergeh(r0, r2); /*0, 2 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
580 r5 = vec_mergel(r0, r2); /*0, 2 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
581 r6 = vec_mergeh(r1, r3); /*1, 3 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
582 r7 = vec_mergel(r1, r3); /*1, 3 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
583 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
584 r0 = vec_mergeh(r4, r6); /*all set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
585 r1 = vec_mergel(r4, r6); /*all set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
586 r2 = vec_mergeh(r5, r7); /*all set 2*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
587 r3 = vec_mergel(r5, r7); /*all set 3*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
588 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
589
5135
724e7fad19d9 cosmetics
gpoirier
parents: 5134
diff changeset
590 static inline void write16x4(uint8_t *dst, int dst_stride,
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
591 register vec_u8 r0, register vec_u8 r1,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
592 register vec_u8 r2, register vec_u8 r3) {
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
593 DECLARE_ALIGNED(16, unsigned char, result)[64];
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
594 uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst;
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
595 int int_dst_stride = dst_stride/4;
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
596
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
597 vec_st(r0, 0, result);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
598 vec_st(r1, 16, result);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
599 vec_st(r2, 32, result);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
600 vec_st(r3, 48, result);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
601 /* FIXME: there has to be a better way!!!! */
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
602 *dst_int = *src_int;
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
603 *(dst_int+ int_dst_stride) = *(src_int + 1);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
604 *(dst_int+ 2*int_dst_stride) = *(src_int + 2);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
605 *(dst_int+ 3*int_dst_stride) = *(src_int + 3);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
606 *(dst_int+ 4*int_dst_stride) = *(src_int + 4);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
607 *(dst_int+ 5*int_dst_stride) = *(src_int + 5);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
608 *(dst_int+ 6*int_dst_stride) = *(src_int + 6);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
609 *(dst_int+ 7*int_dst_stride) = *(src_int + 7);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
610 *(dst_int+ 8*int_dst_stride) = *(src_int + 8);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
611 *(dst_int+ 9*int_dst_stride) = *(src_int + 9);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
612 *(dst_int+10*int_dst_stride) = *(src_int + 10);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
613 *(dst_int+11*int_dst_stride) = *(src_int + 11);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
614 *(dst_int+12*int_dst_stride) = *(src_int + 12);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
615 *(dst_int+13*int_dst_stride) = *(src_int + 13);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
616 *(dst_int+14*int_dst_stride) = *(src_int + 14);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
617 *(dst_int+15*int_dst_stride) = *(src_int + 15);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
618 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
619
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
620 /** \brief performs a 6x16 transpose of data in src, and stores it to dst
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
621 \todo FIXME: see if we can't spare some vec_lvsl() by them factorizing
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
622 out of unaligned_load() */
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
623 #define readAndTranspose16x6(src, src_stride, r8, r9, r10, r11, r12, r13) {\
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
624 register vec_u8 r0 = unaligned_load(0, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
625 register vec_u8 r1 = unaligned_load( src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
626 register vec_u8 r2 = unaligned_load(2* src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
627 register vec_u8 r3 = unaligned_load(3* src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
628 register vec_u8 r4 = unaligned_load(4* src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
629 register vec_u8 r5 = unaligned_load(5* src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
630 register vec_u8 r6 = unaligned_load(6* src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
631 register vec_u8 r7 = unaligned_load(7* src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
632 register vec_u8 r14 = unaligned_load(14*src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
633 register vec_u8 r15 = unaligned_load(15*src_stride, src); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
634 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
635 r8 = unaligned_load( 8*src_stride, src); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
636 r9 = unaligned_load( 9*src_stride, src); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
637 r10 = unaligned_load(10*src_stride, src); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
638 r11 = unaligned_load(11*src_stride, src); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
639 r12 = unaligned_load(12*src_stride, src); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
640 r13 = unaligned_load(13*src_stride, src); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
641 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
642 /*Merge first pairs*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
643 r0 = vec_mergeh(r0, r8); /*0, 8*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
644 r1 = vec_mergeh(r1, r9); /*1, 9*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
645 r2 = vec_mergeh(r2, r10); /*2,10*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
646 r3 = vec_mergeh(r3, r11); /*3,11*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
647 r4 = vec_mergeh(r4, r12); /*4,12*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
648 r5 = vec_mergeh(r5, r13); /*5,13*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
649 r6 = vec_mergeh(r6, r14); /*6,14*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
650 r7 = vec_mergeh(r7, r15); /*7,15*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
651 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
652 /*Merge second pairs*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
653 r8 = vec_mergeh(r0, r4); /*0,4, 8,12 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
654 r9 = vec_mergel(r0, r4); /*0,4, 8,12 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
655 r10 = vec_mergeh(r1, r5); /*1,5, 9,13 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
656 r11 = vec_mergel(r1, r5); /*1,5, 9,13 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
657 r12 = vec_mergeh(r2, r6); /*2,6,10,14 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
658 r13 = vec_mergel(r2, r6); /*2,6,10,14 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
659 r14 = vec_mergeh(r3, r7); /*3,7,11,15 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
660 r15 = vec_mergel(r3, r7); /*3,7,11,15 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
661 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
662 /*Third merge*/ \
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
663 r0 = vec_mergeh(r8, r12); /*0,2,4,6,8,10,12,14 set 0*/ \
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
664 r1 = vec_mergel(r8, r12); /*0,2,4,6,8,10,12,14 set 1*/ \
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
665 r2 = vec_mergeh(r9, r13); /*0,2,4,6,8,10,12,14 set 2*/ \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
666 r4 = vec_mergeh(r10, r14); /*1,3,5,7,9,11,13,15 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
667 r5 = vec_mergel(r10, r14); /*1,3,5,7,9,11,13,15 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
668 r6 = vec_mergeh(r11, r15); /*1,3,5,7,9,11,13,15 set 2*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
669 /* Don't need to compute 3 and 7*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
670 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
671 /*Final merge*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
672 r8 = vec_mergeh(r0, r4); /*all set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
673 r9 = vec_mergel(r0, r4); /*all set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
674 r10 = vec_mergeh(r1, r5); /*all set 2*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
675 r11 = vec_mergel(r1, r5); /*all set 3*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
676 r12 = vec_mergeh(r2, r6); /*all set 4*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
677 r13 = vec_mergel(r2, r6); /*all set 5*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
678 /* Don't need to compute 14 and 15*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
679 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
680 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
681
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
682 // out: o = |x-y| < a
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
683 static inline vec_u8 diff_lt_altivec ( register vec_u8 x,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
684 register vec_u8 y,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
685 register vec_u8 a) {
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
686
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
687 register vec_u8 diff = vec_subs(x, y);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
688 register vec_u8 diffneg = vec_subs(y, x);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
689 register vec_u8 o = vec_or(diff, diffneg); /* |x-y| */
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
690 o = (vec_u8)vec_cmplt(o, a);
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
691 return o;
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
692 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
693
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
694 static inline vec_u8 h264_deblock_mask ( register vec_u8 p0,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
695 register vec_u8 p1,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
696 register vec_u8 q0,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
697 register vec_u8 q1,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
698 register vec_u8 alpha,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
699 register vec_u8 beta) {
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
700
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
701 register vec_u8 mask;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
702 register vec_u8 tempmask;
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
703
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
704 mask = diff_lt_altivec(p0, q0, alpha);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
705 tempmask = diff_lt_altivec(p1, p0, beta);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
706 mask = vec_and(mask, tempmask);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
707 tempmask = diff_lt_altivec(q1, q0, beta);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
708 mask = vec_and(mask, tempmask);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
709
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
710 return mask;
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
711 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
712
5165
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
713 // out: newp1 = clip((p2 + ((p0 + q0 + 1) >> 1)) >> 1, p1-tc0, p1+tc0)
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
714 static inline vec_u8 h264_deblock_q1(register vec_u8 p0,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
715 register vec_u8 p1,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
716 register vec_u8 p2,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
717 register vec_u8 q0,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
718 register vec_u8 tc0) {
5164
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
719
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
720 register vec_u8 average = vec_avg(p0, q0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
721 register vec_u8 temp;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
722 register vec_u8 uncliped;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
723 register vec_u8 ones;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
724 register vec_u8 max;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
725 register vec_u8 min;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
726 register vec_u8 newp1;
5164
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
727
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
728 temp = vec_xor(average, p2);
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
729 average = vec_avg(average, p2); /*avg(p2, avg(p0, q0)) */
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
730 ones = vec_splat_u8(1);
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
731 temp = vec_and(temp, ones); /*(p2^avg(p0, q0)) & 1 */
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
732 uncliped = vec_subs(average, temp); /*(p2+((p0+q0+1)>>1))>>1 */
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
733 max = vec_adds(p1, tc0);
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
734 min = vec_subs(p1, tc0);
5165
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
735 newp1 = vec_max(min, uncliped);
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
736 newp1 = vec_min(max, newp1);
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
737 return newp1;
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
738 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
739
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
740 #define h264_deblock_p0_q0(p0, p1, q0, q1, tc0masked) { \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
741 \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
742 const vec_u8 A0v = vec_sl(vec_splat_u8(10), vec_splat_u8(4)); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
743 \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
744 register vec_u8 pq0bit = vec_xor(p0,q0); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
745 register vec_u8 q1minus; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
746 register vec_u8 p0minus; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
747 register vec_u8 stage1; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
748 register vec_u8 stage2; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
749 register vec_u8 vec160; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
750 register vec_u8 delta; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
751 register vec_u8 deltaneg; \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
752 \
5159
883a5619f52f Use a faster way to compute 255-val: Instead of creating a vector of
gpoirier
parents: 5145
diff changeset
753 q1minus = vec_nor(q1, q1); /* 255 - q1 */ \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
754 stage1 = vec_avg(p1, q1minus); /* (p1 - q1 + 256)>>1 */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
755 stage2 = vec_sr(stage1, vec_splat_u8(1)); /* (p1 - q1 + 256)>>2 = 64 + (p1 - q1) >> 2 */ \
5159
883a5619f52f Use a faster way to compute 255-val: Instead of creating a vector of
gpoirier
parents: 5145
diff changeset
756 p0minus = vec_nor(p0, p0); /* 255 - p0 */ \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
757 stage1 = vec_avg(q0, p0minus); /* (q0 - p0 + 256)>>1 */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
758 pq0bit = vec_and(pq0bit, vec_splat_u8(1)); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
759 stage2 = vec_avg(stage2, pq0bit); /* 32 + ((q0 - p0)&1 + (p1 - q1) >> 2 + 1) >> 1 */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
760 stage2 = vec_adds(stage2, stage1); /* 160 + ((p0 - q0) + (p1 - q1) >> 2 + 1) >> 1 */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
761 vec160 = vec_ld(0, &A0v); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
762 deltaneg = vec_subs(vec160, stage2); /* -d */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
763 delta = vec_subs(stage2, vec160); /* d */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
764 deltaneg = vec_min(tc0masked, deltaneg); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
765 delta = vec_min(tc0masked, delta); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
766 p0 = vec_subs(p0, deltaneg); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
767 q0 = vec_subs(q0, delta); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
768 p0 = vec_adds(p0, delta); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
769 q0 = vec_adds(q0, deltaneg); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
770 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
771
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
772 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
773 DECLARE_ALIGNED(16, unsigned char, temp)[16]; \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
774 register vec_u8 alphavec; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
775 register vec_u8 betavec; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
776 register vec_u8 mask; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
777 register vec_u8 p1mask; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
778 register vec_u8 q1mask; \
5167
b59514a8d239 restore GCC3 support
gpoirier
parents: 5166
diff changeset
779 register vector signed char tc0vec; \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
780 register vec_u8 finaltc0; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
781 register vec_u8 tc0masked; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
782 register vec_u8 newp1; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
783 register vec_u8 newq1; \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
784 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
785 temp[0] = alpha; \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
786 temp[1] = beta; \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
787 alphavec = vec_ld(0, temp); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
788 betavec = vec_splat(alphavec, 0x1); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
789 alphavec = vec_splat(alphavec, 0x0); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
790 mask = h264_deblock_mask(p0, p1, q0, q1, alphavec, betavec); /*if in block */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
791 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
792 *((int *)temp) = *((int *)tc0); \
5167
b59514a8d239 restore GCC3 support
gpoirier
parents: 5166
diff changeset
793 tc0vec = vec_ld(0, (signed char*)temp); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
794 tc0vec = vec_mergeh(tc0vec, tc0vec); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
795 tc0vec = vec_mergeh(tc0vec, tc0vec); \
5165
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
796 mask = vec_and(mask, vec_cmpgt(tc0vec, vec_splat_s8(-1))); /* if tc0[i] >= 0 */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
797 finaltc0 = vec_and((vec_u8)tc0vec, mask); /* tc = tc0 */ \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
798 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
799 p1mask = diff_lt_altivec(p2, p0, betavec); \
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
800 p1mask = vec_and(p1mask, mask); /* if ( |p2 - p0| < beta) */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
801 tc0masked = vec_and(p1mask, (vec_u8)tc0vec); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
802 finaltc0 = vec_sub(finaltc0, p1mask); /* tc++ */ \
5165
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
803 newp1 = h264_deblock_q1(p0, p1, p2, q0, tc0masked); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
804 /*end if*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
805 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
806 q1mask = diff_lt_altivec(q2, q0, betavec); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
807 q1mask = vec_and(q1mask, mask); /* if ( |q2 - q0| < beta ) */\
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
808 tc0masked = vec_and(q1mask, (vec_u8)tc0vec); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
809 finaltc0 = vec_sub(finaltc0, q1mask); /* tc++ */ \
5165
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
810 newq1 = h264_deblock_q1(p0, q1, q2, q0, tc0masked); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
811 /*end if*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
812 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
813 h264_deblock_p0_q0(p0, p1, q0, q1, finaltc0); \
5165
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
814 p1 = newp1; \
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
815 q1 = newq1; \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
816 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
817
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
818 static void h264_v_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) {
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
819
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
820 if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) >= 0) {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
821 register vec_u8 p2 = vec_ld(-3*stride, pix);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
822 register vec_u8 p1 = vec_ld(-2*stride, pix);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
823 register vec_u8 p0 = vec_ld(-1*stride, pix);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
824 register vec_u8 q0 = vec_ld(0, pix);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
825 register vec_u8 q1 = vec_ld(stride, pix);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
826 register vec_u8 q2 = vec_ld(2*stride, pix);
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
827 h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
828 vec_st(p1, -2*stride, pix);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
829 vec_st(p0, -1*stride, pix);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
830 vec_st(q0, 0, pix);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
831 vec_st(q1, stride, pix);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
832 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
833 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
834
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
835 static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) {
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
836
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
837 register vec_u8 line0, line1, line2, line3, line4, line5;
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
838 if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) < 0)
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
839 return;
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
840 readAndTranspose16x6(pix-3, stride, line0, line1, line2, line3, line4, line5);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
841 h264_loop_filter_luma_altivec(line0, line1, line2, line3, line4, line5, alpha, beta, tc0);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
842 transpose4x16(line1, line2, line3, line4);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
843 write16x4(pix-2, stride, line1, line2, line3, line4);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
844 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
845
8541
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
846 static av_always_inline
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
847 void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset, int w, int h)
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
848 {
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
849 int y, aligned;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
850 vec_u8 vblock;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
851 vec_s16 vtemp, vweight, voffset, v0, v1;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
852 vec_u16 vlog2_denom;
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
853 DECLARE_ALIGNED(16, int32_t, temp)[4];
8541
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
854 LOAD_ZERO;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
855
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
856 offset <<= log2_denom;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
857 if(log2_denom) offset += 1<<(log2_denom-1);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
858 temp[0] = log2_denom;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
859 temp[1] = weight;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
860 temp[2] = offset;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
861
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
862 vtemp = (vec_s16)vec_ld(0, temp);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
863 vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
864 vweight = vec_splat(vtemp, 3);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
865 voffset = vec_splat(vtemp, 5);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
866 aligned = !((unsigned long)block & 0xf);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
867
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
868 for (y=0; y<h; y++) {
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
869 vblock = vec_ld(0, block);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
870
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
871 v0 = (vec_s16)vec_mergeh(zero_u8v, vblock);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
872 v1 = (vec_s16)vec_mergel(zero_u8v, vblock);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
873
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
874 if (w == 16 || aligned) {
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
875 v0 = vec_mladd(v0, vweight, zero_s16v);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
876 v0 = vec_adds(v0, voffset);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
877 v0 = vec_sra(v0, vlog2_denom);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
878 }
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
879 if (w == 16 || !aligned) {
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
880 v1 = vec_mladd(v1, vweight, zero_s16v);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
881 v1 = vec_adds(v1, voffset);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
882 v1 = vec_sra(v1, vlog2_denom);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
883 }
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
884 vblock = vec_packsu(v0, v1);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
885 vec_st(vblock, 0, block);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
886
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
887 block += stride;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
888 }
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
889 }
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
890
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
891 static av_always_inline
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
892 void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
893 int weightd, int weights, int offset, int w, int h)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
894 {
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
895 int y, dst_aligned, src_aligned;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
896 vec_u8 vsrc, vdst;
8535
8f3e20061aff offset and weights are signed, fixes some non-bitexact issues.
gpoirier
parents: 8531
diff changeset
897 vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3;
8f3e20061aff offset and weights are signed, fixes some non-bitexact issues.
gpoirier
parents: 8531
diff changeset
898 vec_u16 vlog2_denom;
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
899 DECLARE_ALIGNED(16, int32_t, temp)[4];
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
900 LOAD_ZERO;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
901
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
902 offset = ((offset + 1) | 1) << log2_denom;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
903 temp[0] = log2_denom+1;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
904 temp[1] = weights;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
905 temp[2] = weightd;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
906 temp[3] = offset;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
907
8536
87450160a913 fix compilation with GCC-4.3+
gpoirier
parents: 8535
diff changeset
908 vtemp = (vec_s16)vec_ld(0, temp);
87450160a913 fix compilation with GCC-4.3+
gpoirier
parents: 8535
diff changeset
909 vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
910 vweights = vec_splat(vtemp, 3);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
911 vweightd = vec_splat(vtemp, 5);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
912 voffset = vec_splat(vtemp, 7);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
913 dst_aligned = !((unsigned long)dst & 0xf);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
914 src_aligned = !((unsigned long)src & 0xf);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
915
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
916 for (y=0; y<h; y++) {
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
917 vdst = vec_ld(0, dst);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
918 vsrc = vec_ld(0, src);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
919
8536
87450160a913 fix compilation with GCC-4.3+
gpoirier
parents: 8535
diff changeset
920 v0 = (vec_s16)vec_mergeh(zero_u8v, vdst);
87450160a913 fix compilation with GCC-4.3+
gpoirier
parents: 8535
diff changeset
921 v1 = (vec_s16)vec_mergel(zero_u8v, vdst);
87450160a913 fix compilation with GCC-4.3+
gpoirier
parents: 8535
diff changeset
922 v2 = (vec_s16)vec_mergeh(zero_u8v, vsrc);
87450160a913 fix compilation with GCC-4.3+
gpoirier
parents: 8535
diff changeset
923 v3 = (vec_s16)vec_mergel(zero_u8v, vsrc);
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
924
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
925 if (w == 8) {
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
926 if (src_aligned)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
927 v3 = v2;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
928 else
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
929 v2 = v3;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
930 }
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
931
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
932 if (w == 16 || dst_aligned) {
8535
8f3e20061aff offset and weights are signed, fixes some non-bitexact issues.
gpoirier
parents: 8531
diff changeset
933 v0 = vec_mladd(v0, vweightd, zero_s16v);
8f3e20061aff offset and weights are signed, fixes some non-bitexact issues.
gpoirier
parents: 8531
diff changeset
934 v2 = vec_mladd(v2, vweights, zero_s16v);
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
935
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
936 v0 = vec_adds(v0, voffset);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
937 v0 = vec_adds(v0, v2);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
938 v0 = vec_sra(v0, vlog2_denom);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
939 }
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
940 if (w == 16 || !dst_aligned) {
8535
8f3e20061aff offset and weights are signed, fixes some non-bitexact issues.
gpoirier
parents: 8531
diff changeset
941 v1 = vec_mladd(v1, vweightd, zero_s16v);
8f3e20061aff offset and weights are signed, fixes some non-bitexact issues.
gpoirier
parents: 8531
diff changeset
942 v3 = vec_mladd(v3, vweights, zero_s16v);
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
943
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
944 v1 = vec_adds(v1, voffset);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
945 v1 = vec_adds(v1, v3);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
946 v1 = vec_sra(v1, vlog2_denom);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
947 }
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
948 vdst = vec_packsu(v0, v1);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
949 vec_st(vdst, 0, dst);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
950
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
951 dst += stride;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
952 src += stride;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
953 }
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
954 }
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
955
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
956 #define H264_WEIGHT(W,H) \
8541
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
957 static void ff_weight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
958 weight_h264_WxH_altivec(block, stride, log2_denom, weight, offset, W, H); \
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
959 }\
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
960 static void ff_biweight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
961 biweight_h264_WxH_altivec(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
962 }
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
963
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
964 H264_WEIGHT(16,16)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
965 H264_WEIGHT(16, 8)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
966 H264_WEIGHT( 8,16)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
967 H264_WEIGHT( 8, 8)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
968 H264_WEIGHT( 8, 4)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
969
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
970 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
971
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
972 if (has_altivec()) {
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
973 c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
974 c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
9439
ef3a7b711cc0 Rename put_no_rnd_h264_chroma* to reflect its usage in VC1 only
conrad
parents: 9421
diff changeset
975 c->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_altivec;
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9439
diff changeset
976 c->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_altivec;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
977
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
978 #define dspfunc(PFX, IDX, NUM) \
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
979 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
980 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
981 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
982 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
983 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
984 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
985 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
986 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
987 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
988 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
989 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
990 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
991 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
992 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
993 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
994 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
995
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
996 dspfunc(put_h264_qpel, 0, 16);
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
997 dspfunc(avg_h264_qpel, 0, 16);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
998 #undef dspfunc
11499
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
999 }
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1000 }
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1001
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1002 void ff_h264dsp_init_ppc(H264DSPContext *c)
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1003 {
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1004 if (has_altivec()) {
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1005 c->h264_idct_add = ff_h264_idct_add_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1006 c->h264_idct_add8 = ff_h264_idct_add8_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1007 c->h264_idct_add16 = ff_h264_idct_add16_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1008 c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1009 c->h264_idct_dc_add= h264_idct_dc_add_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1010 c->h264_idct8_dc_add = ff_h264_idct8_dc_add_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1011 c->h264_idct8_add = ff_h264_idct8_add_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1012 c->h264_idct8_add4 = ff_h264_idct8_add4_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1013 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1014 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec;
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
1015
8541
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
1016 c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16x16_altivec;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
1017 c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels16x8_altivec;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
1018 c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
1019 c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
1020 c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec;
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
1021 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
1022 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
1023 c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
1024 c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
1025 c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
1026 }
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
1027 }