annotate ppc/h264_altivec.c @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 9fef0a8ddd63
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
1 /*
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
3 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3667
diff changeset
4 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3667
diff changeset
5 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3667
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3667
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
10 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3667
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
14 * Lesser General Public License for more details.
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
15 *
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3667
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
19 */
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
20
12475
9fef0a8ddd63 Move mm_support() from libavcodec to libavutil, make it a public
stefano
parents: 12473
diff changeset
21 #include "libavutil/cpu.h"
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 6077
diff changeset
22 #include "libavcodec/dsputil.h"
8461
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
23 #include "libavcodec/h264data.h"
11499
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
24 #include "libavcodec/h264dsp.h"
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
25
6077
8baa533764d4 Add necessary #include, fixes the warnings:
diego
parents: 5750
diff changeset
26 #include "dsputil_altivec.h"
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents: 5586
diff changeset
27 #include "util_altivec.h"
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
28 #include "types_altivec.h"
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
29
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
30 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
31 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
32
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
33 #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
34 #define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9439
diff changeset
35 #define PREFIX_no_rnd_vc1_chroma_mc8_altivec put_no_rnd_vc1_chroma_mc8_altivec
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
36 #define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
37 #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
38 #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
39 #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
40 #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
41 #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
42 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
3577
5be5a936c8a9 Clean up:make dsputil subfile names consistent
lu_zero
parents: 3544
diff changeset
43 #include "h264_template_altivec.c"
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
44 #undef OP_U8_ALTIVEC
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
45 #undef PREFIX_h264_chroma_mc8_altivec
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9439
diff changeset
46 #undef PREFIX_no_rnd_vc1_chroma_mc8_altivec
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
47 #undef PREFIX_h264_chroma_mc8_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
48 #undef PREFIX_h264_qpel16_h_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
49 #undef PREFIX_h264_qpel16_h_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
50 #undef PREFIX_h264_qpel16_v_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
51 #undef PREFIX_h264_qpel16_v_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
52 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
53 #undef PREFIX_h264_qpel16_hv_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
54
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
55 #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
56 #define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9439
diff changeset
57 #define PREFIX_no_rnd_vc1_chroma_mc8_altivec avg_no_rnd_vc1_chroma_mc8_altivec
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
58 #define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
59 #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
60 #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
61 #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
62 #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
63 #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
64 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
3577
5be5a936c8a9 Clean up:make dsputil subfile names consistent
lu_zero
parents: 3544
diff changeset
65 #include "h264_template_altivec.c"
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
66 #undef OP_U8_ALTIVEC
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
67 #undef PREFIX_h264_chroma_mc8_altivec
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9439
diff changeset
68 #undef PREFIX_no_rnd_vc1_chroma_mc8_altivec
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
69 #undef PREFIX_h264_chroma_mc8_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
70 #undef PREFIX_h264_qpel16_h_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
71 #undef PREFIX_h264_qpel16_h_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
72 #undef PREFIX_h264_qpel16_v_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
73 #undef PREFIX_h264_qpel16_v_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
74 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
75 #undef PREFIX_h264_qpel16_hv_lowpass_num
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
76
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
77 #define H264_MC(OPNAME, SIZE, CODETYPE) \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
78 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
79 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
80 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
81 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
82 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
83 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
84 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
85 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
86 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
87 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
88 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
89 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
90 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
91 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
92 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
93 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
94 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
95 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
96 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
97 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
98 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
99 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
100 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
101 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
102 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
103 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
104 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
105 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
106 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
107 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
108 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
109 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
110 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
111 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
112 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
113 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
114 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
115 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
116 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
117 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
118 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
119 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
120 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
121 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
122 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
123 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
124 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
125 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
126 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
127 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
128 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
129 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
130 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
131 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
132 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
133 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
134 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
135 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
136 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
137 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
138 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
139 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
140 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
141 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
142 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
143 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
144 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
145 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
146 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
147 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
148 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
149 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
150 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
151 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
152 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
153 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
154 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
155 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
156 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
157 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
158 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
159 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
160 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
161 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
162 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
163 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
164 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
165 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
166 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
167 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
168 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
169 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
170 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
171 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
172 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
173 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
174 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
175 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
176 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
177 \
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
178 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
179 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
180 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
181 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
182 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
183 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
184 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
185 }\
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
186
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
187 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
188 const uint8_t * src2, int dst_stride,
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
189 int src_stride1, int h)
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
190 {
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
191 int i;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
192 vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
193
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
194 mask_ = vec_lvsl(0, src2);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
195
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
196 for (i = 0; i < h; i++) {
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
197
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
198 tmp1 = vec_ld(i * src_stride1, src1);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
199 mask = vec_lvsl(i * src_stride1, src1);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
200 tmp2 = vec_ld(i * src_stride1 + 15, src1);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
201
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
202 a = vec_perm(tmp1, tmp2, mask);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
203
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
204 tmp1 = vec_ld(i * 16, src2);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
205 tmp2 = vec_ld(i * 16 + 15, src2);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
206
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
207 b = vec_perm(tmp1, tmp2, mask_);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
208
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
209 tmp1 = vec_ld(0, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
210 mask = vec_lvsl(0, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
211 tmp2 = vec_ld(15, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
212
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
213 d = vec_avg(a, b);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
214
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
215 edges = vec_perm(tmp2, tmp1, mask);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
216
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
217 align = vec_lvsr(0, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
218
3659
dd55fb216497 Proper fix for the corner case that would have been corrected before, praise&blame to me and exg in equal shares
lu_zero
parents: 3658
diff changeset
219 tmp2 = vec_perm(d, edges, align);
3658
2a113750d778 Revert previous commit
lu_zero
parents: 3583
diff changeset
220 tmp1 = vec_perm(edges, d, align);
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
221
3659
dd55fb216497 Proper fix for the corner case that would have been corrected before, praise&blame to me and exg in equal shares
lu_zero
parents: 3658
diff changeset
222 vec_st(tmp2, 15, dst);
3583
562758eaf7bf 10l, thanks to Emanuele Giaquinta <exg@gentoo.org> for testing and finding the issue
lu_zero
parents: 3577
diff changeset
223 vec_st(tmp1, 0 , dst);
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
224
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
225 dst += dst_stride;
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
226 }
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
227 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
228
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
229 static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
230 const uint8_t * src2, int dst_stride,
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
231 int src_stride1, int h)
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
232 {
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
233 int i;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
234 vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
235
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
236 mask_ = vec_lvsl(0, src2);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
237
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
238 for (i = 0; i < h; i++) {
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
239
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
240 tmp1 = vec_ld(i * src_stride1, src1);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
241 mask = vec_lvsl(i * src_stride1, src1);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
242 tmp2 = vec_ld(i * src_stride1 + 15, src1);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
243
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
244 a = vec_perm(tmp1, tmp2, mask);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
245
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
246 tmp1 = vec_ld(i * 16, src2);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
247 tmp2 = vec_ld(i * 16 + 15, src2);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
248
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
249 b = vec_perm(tmp1, tmp2, mask_);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
250
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
251 tmp1 = vec_ld(0, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
252 mask = vec_lvsl(0, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
253 tmp2 = vec_ld(15, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
254
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
255 d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
256
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
257 edges = vec_perm(tmp2, tmp1, mask);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
258
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
259 align = vec_lvsr(0, dst);
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
260
3659
dd55fb216497 Proper fix for the corner case that would have been corrected before, praise&blame to me and exg in equal shares
lu_zero
parents: 3658
diff changeset
261 tmp2 = vec_perm(d, edges, align);
3658
2a113750d778 Revert previous commit
lu_zero
parents: 3583
diff changeset
262 tmp1 = vec_perm(edges, d, align);
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
263
3659
dd55fb216497 Proper fix for the corner case that would have been corrected before, praise&blame to me and exg in equal shares
lu_zero
parents: 3658
diff changeset
264 vec_st(tmp2, 15, dst);
3583
562758eaf7bf 10l, thanks to Emanuele Giaquinta <exg@gentoo.org> for testing and finding the issue
lu_zero
parents: 3577
diff changeset
265 vec_st(tmp1, 0 , dst);
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
266
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
267 dst += dst_stride;
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
268 }
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
269 }
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
270
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
271 /* Implemented but could be faster
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
272 #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
273 #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
3337
bec1eb6d3746 put_pixels16_l2_altivec and avg_pixels16_l2_altivec
lu_zero
parents: 3089
diff changeset
274 */
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
275
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
276 H264_MC(put_, 16, altivec)
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
277 H264_MC(avg_, 16, altivec)
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
278
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
279
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
280 /****************************************************************************
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
281 * IDCT transform:
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
282 ****************************************************************************/
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
283
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
284 #define VEC_1D_DCT(vb0,vb1,vb2,vb3,va0,va1,va2,va3) \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
285 /* 1st stage */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
286 vz0 = vec_add(vb0,vb2); /* temp[0] = Y[0] + Y[2] */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
287 vz1 = vec_sub(vb0,vb2); /* temp[1] = Y[0] - Y[2] */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
288 vz2 = vec_sra(vb1,vec_splat_u16(1)); \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
289 vz2 = vec_sub(vz2,vb3); /* temp[2] = Y[1].1/2 - Y[3] */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
290 vz3 = vec_sra(vb3,vec_splat_u16(1)); \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
291 vz3 = vec_add(vb1,vz3); /* temp[3] = Y[1] + Y[3].1/2 */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
292 /* 2nd stage: output */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
293 va0 = vec_add(vz0,vz3); /* x[0] = temp[0] + temp[3] */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
294 va1 = vec_add(vz1,vz2); /* x[1] = temp[1] + temp[2] */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
295 va2 = vec_sub(vz1,vz2); /* x[2] = temp[1] - temp[2] */ \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
296 va3 = vec_sub(vz0,vz3) /* x[3] = temp[0] - temp[3] */
5094
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
297
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
298 #define VEC_TRANSPOSE_4(a0,a1,a2,a3,b0,b1,b2,b3) \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
299 b0 = vec_mergeh( a0, a0 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
300 b1 = vec_mergeh( a1, a0 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
301 b2 = vec_mergeh( a2, a0 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
302 b3 = vec_mergeh( a3, a0 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
303 a0 = vec_mergeh( b0, b2 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
304 a1 = vec_mergel( b0, b2 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
305 a2 = vec_mergeh( b1, b3 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
306 a3 = vec_mergel( b1, b3 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
307 b0 = vec_mergeh( a0, a2 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
308 b1 = vec_mergel( a0, a2 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
309 b2 = vec_mergeh( a1, a3 ); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
310 b3 = vec_mergel( a1, a3 )
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
311
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
312 #define VEC_LOAD_U8_ADD_S16_STORE_U8(va) \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
313 vdst_orig = vec_ld(0, dst); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
314 vdst = vec_perm(vdst_orig, zero_u8v, vdst_mask); \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
315 vdst_ss = (vec_s16) vec_mergeh(zero_u8v, vdst); \
5094
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
316 va = vec_add(va, vdst_ss); \
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
317 va_u8 = vec_packsu(va, zero_s16v); \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
318 va_u32 = vec_splat((vec_u32)va_u8, 0); \
5094
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
319 vec_ste(va_u32, element, (uint32_t*)dst);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
320
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
321 static void ff_h264_idct_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
322 {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
323 vec_s16 va0, va1, va2, va3;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
324 vec_s16 vz0, vz1, vz2, vz3;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
325 vec_s16 vtmp0, vtmp1, vtmp2, vtmp3;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
326 vec_u8 va_u8;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
327 vec_u32 va_u32;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
328 vec_s16 vdst_ss;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
329 const vec_u16 v6us = vec_splat_u16(6);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
330 vec_u8 vdst, vdst_orig;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
331 vec_u8 vdst_mask = vec_lvsl(0, dst);
5094
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
332 int element = ((unsigned long)dst & 0xf) >> 2;
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
333 LOAD_ZERO;
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
334
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
335 block[0] += 32; /* add 32 as a DC-level for rounding */
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
336
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
337 vtmp0 = vec_ld(0,block);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
338 vtmp1 = vec_sld(vtmp0, vtmp0, 8);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
339 vtmp2 = vec_ld(16,block);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
340 vtmp3 = vec_sld(vtmp2, vtmp2, 8);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
341
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
342 VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
343 VEC_TRANSPOSE_4(va0,va1,va2,va3,vtmp0,vtmp1,vtmp2,vtmp3);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
344 VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
345
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
346 va0 = vec_sra(va0,v6us);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
347 va1 = vec_sra(va1,v6us);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
348 va2 = vec_sra(va2,v6us);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
349 va3 = vec_sra(va3,v6us);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
350
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
351 VEC_LOAD_U8_ADD_S16_STORE_U8(va0);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
352 dst += stride;
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
353 VEC_LOAD_U8_ADD_S16_STORE_U8(va1);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
354 dst += stride;
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
355 VEC_LOAD_U8_ADD_S16_STORE_U8(va2);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
356 dst += stride;
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
357 VEC_LOAD_U8_ADD_S16_STORE_U8(va3);
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
358 }
ce57e3f2b2a7 h264_idct_add_altivec, based on an old patch from Mauricio Alvarez <alvarezATac.upc.edu>, polished by David Conrad <umovimusATgmail.com>
lu_zero
parents: 5019
diff changeset
359
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
360 #define IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7) {\
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
361 /* a0 = SRC(0) + SRC(4); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
362 vec_s16 a0v = vec_add(s0, s4); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
363 /* a2 = SRC(0) - SRC(4); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
364 vec_s16 a2v = vec_sub(s0, s4); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
365 /* a4 = (SRC(2)>>1) - SRC(6); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
366 vec_s16 a4v = vec_sub(vec_sra(s2, onev), s6); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
367 /* a6 = (SRC(6)>>1) + SRC(2); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
368 vec_s16 a6v = vec_add(vec_sra(s6, onev), s2); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
369 /* b0 = a0 + a6; */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
370 vec_s16 b0v = vec_add(a0v, a6v); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
371 /* b2 = a2 + a4; */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
372 vec_s16 b2v = vec_add(a2v, a4v); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
373 /* b4 = a2 - a4; */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
374 vec_s16 b4v = vec_sub(a2v, a4v); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
375 /* b6 = a0 - a6; */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
376 vec_s16 b6v = vec_sub(a0v, a6v); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
377 /* a1 = SRC(5) - SRC(3) - SRC(7) - (SRC(7)>>1); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
378 /* a1 = (SRC(5)-SRC(3)) - (SRC(7) + (SRC(7)>>1)); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
379 vec_s16 a1v = vec_sub( vec_sub(s5, s3), vec_add(s7, vec_sra(s7, onev)) ); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
380 /* a3 = SRC(7) + SRC(1) - SRC(3) - (SRC(3)>>1); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
381 /* a3 = (SRC(7)+SRC(1)) - (SRC(3) + (SRC(3)>>1)); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
382 vec_s16 a3v = vec_sub( vec_add(s7, s1), vec_add(s3, vec_sra(s3, onev)) );\
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
383 /* a5 = SRC(7) - SRC(1) + SRC(5) + (SRC(5)>>1); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
384 /* a5 = (SRC(7)-SRC(1)) + SRC(5) + (SRC(5)>>1); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
385 vec_s16 a5v = vec_add( vec_sub(s7, s1), vec_add(s5, vec_sra(s5, onev)) );\
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
386 /* a7 = SRC(5)+SRC(3) + SRC(1) + (SRC(1)>>1); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
387 vec_s16 a7v = vec_add( vec_add(s5, s3), vec_add(s1, vec_sra(s1, onev)) );\
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
388 /* b1 = (a7>>2) + a1; */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
389 vec_s16 b1v = vec_add( vec_sra(a7v, twov), a1v); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
390 /* b3 = a3 + (a5>>2); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
391 vec_s16 b3v = vec_add(a3v, vec_sra(a5v, twov)); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
392 /* b5 = (a3>>2) - a5; */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
393 vec_s16 b5v = vec_sub( vec_sra(a3v, twov), a5v); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
394 /* b7 = a7 - (a1>>2); */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
395 vec_s16 b7v = vec_sub( a7v, vec_sra(a1v, twov)); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
396 /* DST(0, b0 + b7); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
397 d0 = vec_add(b0v, b7v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
398 /* DST(1, b2 + b5); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
399 d1 = vec_add(b2v, b5v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
400 /* DST(2, b4 + b3); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
401 d2 = vec_add(b4v, b3v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
402 /* DST(3, b6 + b1); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
403 d3 = vec_add(b6v, b1v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
404 /* DST(4, b6 - b1); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
405 d4 = vec_sub(b6v, b1v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
406 /* DST(5, b4 - b3); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
407 d5 = vec_sub(b4v, b3v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
408 /* DST(6, b2 - b5); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
409 d6 = vec_sub(b2v, b5v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
410 /* DST(7, b0 - b7); */ \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
411 d7 = vec_sub(b0v, b7v); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
412 }
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
413
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
414 #define ALTIVEC_STORE_SUM_CLIP(dest, idctv, perm_ldv, perm_stv, sel) { \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
415 /* unaligned load */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
416 vec_u8 hv = vec_ld( 0, dest ); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
417 vec_u8 lv = vec_ld( 7, dest ); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
418 vec_u8 dstv = vec_perm( hv, lv, (vec_u8)perm_ldv ); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
419 vec_s16 idct_sh6 = vec_sra(idctv, sixv); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
420 vec_u16 dst16 = (vec_u16)vec_mergeh(zero_u8v, dstv); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
421 vec_s16 idstsum = vec_adds(idct_sh6, (vec_s16)dst16); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
422 vec_u8 idstsum8 = vec_packsu(zero_s16v, idstsum); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
423 vec_u8 edgehv; \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
424 /* unaligned store */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
425 vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
426 vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
427 lv = vec_sel( lv, bodyv, edgelv ); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
428 vec_st( lv, 7, dest ); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
429 hv = vec_ld( 0, dest ); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
430 edgehv = vec_perm( zero_u8v, sel, perm_stv ); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
431 hv = vec_sel( hv, bodyv, edgehv ); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
432 vec_st( hv, 0, dest ); \
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
433 }
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
434
11382
50415a8f1451 PPC: move prototypes to headers and make some functions static
mru
parents: 11369
diff changeset
435 static void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
436 vec_s16 s0, s1, s2, s3, s4, s5, s6, s7;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
437 vec_s16 d0, d1, d2, d3, d4, d5, d6, d7;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
438 vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
439
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
440 vec_u8 perm_ldv = vec_lvsl(0, dst);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
441 vec_u8 perm_stv = vec_lvsr(8, dst);
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
442
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
443 const vec_u16 onev = vec_splat_u16(1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
444 const vec_u16 twov = vec_splat_u16(2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
445 const vec_u16 sixv = vec_splat_u16(6);
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
446
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
447 const vec_u8 sel = (vec_u8) {0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1};
4260
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
448 LOAD_ZERO;
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
449
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
450 dct[0] += 32; // rounding for the >>6 at the end
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
451
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
452 s0 = vec_ld(0x00, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
453 s1 = vec_ld(0x10, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
454 s2 = vec_ld(0x20, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
455 s3 = vec_ld(0x30, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
456 s4 = vec_ld(0x40, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
457 s5 = vec_ld(0x50, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
458 s6 = vec_ld(0x60, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
459 s7 = vec_ld(0x70, (int16_t*)dct);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
460
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
461 IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
462 d0, d1, d2, d3, d4, d5, d6, d7);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
463
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
464 TRANSPOSE8( d0, d1, d2, d3, d4, d5, d6, d7 );
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
465
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
466 IDCT8_1D_ALTIVEC(d0, d1, d2, d3, d4, d5, d6, d7,
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
467 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
468
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
469 ALTIVEC_STORE_SUM_CLIP(&dst[0*stride], idct0, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
470 ALTIVEC_STORE_SUM_CLIP(&dst[1*stride], idct1, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
471 ALTIVEC_STORE_SUM_CLIP(&dst[2*stride], idct2, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
472 ALTIVEC_STORE_SUM_CLIP(&dst[3*stride], idct3, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
473 ALTIVEC_STORE_SUM_CLIP(&dst[4*stride], idct4, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
474 ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
475 ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
476 ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
477 }
0407913ac6c6 Add IDCT8 routine in Altivec. Patch by yours truely with Linux fixes by Luca Barbato
gpoirier
parents: 4254
diff changeset
478
8530
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
479 static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *block, int stride, int size)
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
480 {
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
481 vec_s16 dc16;
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
482 vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
483 LOAD_ZERO;
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
484 DECLARE_ALIGNED(16, int, dc);
8530
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
485 int i;
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
486
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
487 dc = (block[0] + 32) >> 6;
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
488 dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
489
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
490 if (size == 4)
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
491 dc16 = vec_sld(dc16, zero_s16v, 8);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
492 dcplus = vec_packsu(dc16, zero_s16v);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
493 dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
494
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
495 aligner = vec_lvsr(0, dst);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
496 dcplus = vec_perm(dcplus, dcplus, aligner);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
497 dcminus = vec_perm(dcminus, dcminus, aligner);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
498
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
499 for (i = 0; i < size; i += 4) {
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
500 v0 = vec_ld(0, dst+0*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
501 v1 = vec_ld(0, dst+1*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
502 v2 = vec_ld(0, dst+2*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
503 v3 = vec_ld(0, dst+3*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
504
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
505 v0 = vec_adds(v0, dcplus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
506 v1 = vec_adds(v1, dcplus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
507 v2 = vec_adds(v2, dcplus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
508 v3 = vec_adds(v3, dcplus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
509
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
510 v0 = vec_subs(v0, dcminus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
511 v1 = vec_subs(v1, dcminus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
512 v2 = vec_subs(v2, dcminus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
513 v3 = vec_subs(v3, dcminus);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
514
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
515 vec_st(v0, 0, dst+0*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
516 vec_st(v1, 0, dst+1*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
517 vec_st(v2, 0, dst+2*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
518 vec_st(v3, 0, dst+3*stride);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
519
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
520 dst += 4*stride;
8461
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
521 }
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
522 }
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
523
8530
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
524 static void h264_idct_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
525 {
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
526 h264_idct_dc_add_internal(dst, block, stride, 4);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
527 }
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
528
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
529 static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
530 {
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
531 h264_idct_dc_add_internal(dst, block, stride, 8);
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
532 }
992e2f8bfba7 AltiVec version of h264_idct(8)_dc_add
gpoirier
parents: 8494
diff changeset
533
8544
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
534 static void ff_h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
535 int i;
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
536 for(i=0; i<16; i++){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
537 int nnz = nnzc[ scan8[i] ];
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
538 if(nnz){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
539 if(nnz==1 && block[i*16]) h264_idct_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
540 else ff_h264_idct_add_altivec(dst + block_offset[i], block + i*16, stride);
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
541 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
542 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
543 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
544
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
545 static void ff_h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
546 int i;
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
547 for(i=0; i<16; i++){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
548 if(nnzc[ scan8[i] ]) ff_h264_idct_add_altivec(dst + block_offset[i], block + i*16, stride);
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
549 else if(block[i*16]) h264_idct_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
550 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
551 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
552
8461
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
553 static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
554 int i;
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
555 for(i=0; i<16; i+=4){
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
556 int nnz = nnzc[ scan8[i] ];
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
557 if(nnz){
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
558 if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
559 else ff_h264_idct8_add_altivec (dst + block_offset[i], block + i*16, stride);
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
560 }
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
561 }
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
562 }
11307ea31e57 Disable usage of ff_h264_idct_add_altivec since AltiVec versions of h264_idct_add16,
gpoirier
parents: 7376
diff changeset
563
8544
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
564 static void ff_h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
565 int i;
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
566 for(i=16; i<16+8; i++){
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
567 if(nnzc[ scan8[i] ])
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
568 ff_h264_idct_add_altivec(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
569 else if(block[i*16])
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
570 h264_idct_dc_add_altivec(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
571 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
572 }
0ae8629baf6f Add AltiVec versions of h264_idct_add(8|16|16intra),
gpoirier
parents: 8541
diff changeset
573
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
574 #define transpose4x16(r0, r1, r2, r3) { \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
575 register vec_u8 r4; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
576 register vec_u8 r5; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
577 register vec_u8 r6; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
578 register vec_u8 r7; \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
579 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
580 r4 = vec_mergeh(r0, r2); /*0, 2 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
581 r5 = vec_mergel(r0, r2); /*0, 2 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
582 r6 = vec_mergeh(r1, r3); /*1, 3 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
583 r7 = vec_mergel(r1, r3); /*1, 3 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
584 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
585 r0 = vec_mergeh(r4, r6); /*all set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
586 r1 = vec_mergel(r4, r6); /*all set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
587 r2 = vec_mergeh(r5, r7); /*all set 2*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
588 r3 = vec_mergel(r5, r7); /*all set 3*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
589 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
590
5135
724e7fad19d9 cosmetics
gpoirier
parents: 5134
diff changeset
591 static inline void write16x4(uint8_t *dst, int dst_stride,
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
592 register vec_u8 r0, register vec_u8 r1,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
593 register vec_u8 r2, register vec_u8 r3) {
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
594 DECLARE_ALIGNED(16, unsigned char, result)[64];
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
595 uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst;
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
596 int int_dst_stride = dst_stride/4;
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
597
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
598 vec_st(r0, 0, result);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
599 vec_st(r1, 16, result);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
600 vec_st(r2, 32, result);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
601 vec_st(r3, 48, result);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
602 /* FIXME: there has to be a better way!!!! */
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
603 *dst_int = *src_int;
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
604 *(dst_int+ int_dst_stride) = *(src_int + 1);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
605 *(dst_int+ 2*int_dst_stride) = *(src_int + 2);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
606 *(dst_int+ 3*int_dst_stride) = *(src_int + 3);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
607 *(dst_int+ 4*int_dst_stride) = *(src_int + 4);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
608 *(dst_int+ 5*int_dst_stride) = *(src_int + 5);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
609 *(dst_int+ 6*int_dst_stride) = *(src_int + 6);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
610 *(dst_int+ 7*int_dst_stride) = *(src_int + 7);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
611 *(dst_int+ 8*int_dst_stride) = *(src_int + 8);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
612 *(dst_int+ 9*int_dst_stride) = *(src_int + 9);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
613 *(dst_int+10*int_dst_stride) = *(src_int + 10);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
614 *(dst_int+11*int_dst_stride) = *(src_int + 11);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
615 *(dst_int+12*int_dst_stride) = *(src_int + 12);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
616 *(dst_int+13*int_dst_stride) = *(src_int + 13);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
617 *(dst_int+14*int_dst_stride) = *(src_int + 14);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
618 *(dst_int+15*int_dst_stride) = *(src_int + 15);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
619 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
620
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
621 /** \brief performs a 6x16 transpose of data in src, and stores it to dst
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
622 \todo FIXME: see if we can't spare some vec_lvsl() by them factorizing
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
623 out of unaligned_load() */
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
624 #define readAndTranspose16x6(src, src_stride, r8, r9, r10, r11, r12, r13) {\
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
625 register vec_u8 r0 = unaligned_load(0, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
626 register vec_u8 r1 = unaligned_load( src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
627 register vec_u8 r2 = unaligned_load(2* src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
628 register vec_u8 r3 = unaligned_load(3* src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
629 register vec_u8 r4 = unaligned_load(4* src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
630 register vec_u8 r5 = unaligned_load(5* src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
631 register vec_u8 r6 = unaligned_load(6* src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
632 register vec_u8 r7 = unaligned_load(7* src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
633 register vec_u8 r14 = unaligned_load(14*src_stride, src); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
634 register vec_u8 r15 = unaligned_load(15*src_stride, src); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
635 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
636 r8 = unaligned_load( 8*src_stride, src); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
637 r9 = unaligned_load( 9*src_stride, src); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
638 r10 = unaligned_load(10*src_stride, src); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
639 r11 = unaligned_load(11*src_stride, src); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
640 r12 = unaligned_load(12*src_stride, src); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
641 r13 = unaligned_load(13*src_stride, src); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
642 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
643 /*Merge first pairs*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
644 r0 = vec_mergeh(r0, r8); /*0, 8*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
645 r1 = vec_mergeh(r1, r9); /*1, 9*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
646 r2 = vec_mergeh(r2, r10); /*2,10*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
647 r3 = vec_mergeh(r3, r11); /*3,11*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
648 r4 = vec_mergeh(r4, r12); /*4,12*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
649 r5 = vec_mergeh(r5, r13); /*5,13*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
650 r6 = vec_mergeh(r6, r14); /*6,14*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
651 r7 = vec_mergeh(r7, r15); /*7,15*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
652 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
653 /*Merge second pairs*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
654 r8 = vec_mergeh(r0, r4); /*0,4, 8,12 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
655 r9 = vec_mergel(r0, r4); /*0,4, 8,12 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
656 r10 = vec_mergeh(r1, r5); /*1,5, 9,13 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
657 r11 = vec_mergel(r1, r5); /*1,5, 9,13 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
658 r12 = vec_mergeh(r2, r6); /*2,6,10,14 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
659 r13 = vec_mergel(r2, r6); /*2,6,10,14 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
660 r14 = vec_mergeh(r3, r7); /*3,7,11,15 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
661 r15 = vec_mergel(r3, r7); /*3,7,11,15 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
662 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
663 /*Third merge*/ \
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
664 r0 = vec_mergeh(r8, r12); /*0,2,4,6,8,10,12,14 set 0*/ \
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
665 r1 = vec_mergel(r8, r12); /*0,2,4,6,8,10,12,14 set 1*/ \
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
666 r2 = vec_mergeh(r9, r13); /*0,2,4,6,8,10,12,14 set 2*/ \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
667 r4 = vec_mergeh(r10, r14); /*1,3,5,7,9,11,13,15 set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
668 r5 = vec_mergel(r10, r14); /*1,3,5,7,9,11,13,15 set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
669 r6 = vec_mergeh(r11, r15); /*1,3,5,7,9,11,13,15 set 2*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
670 /* Don't need to compute 3 and 7*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
671 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
672 /*Final merge*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
673 r8 = vec_mergeh(r0, r4); /*all set 0*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
674 r9 = vec_mergel(r0, r4); /*all set 1*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
675 r10 = vec_mergeh(r1, r5); /*all set 2*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
676 r11 = vec_mergel(r1, r5); /*all set 3*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
677 r12 = vec_mergeh(r2, r6); /*all set 4*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
678 r13 = vec_mergel(r2, r6); /*all set 5*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
679 /* Don't need to compute 14 and 15*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
680 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
681 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
682
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
683 // out: o = |x-y| < a
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
684 static inline vec_u8 diff_lt_altivec ( register vec_u8 x,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
685 register vec_u8 y,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
686 register vec_u8 a) {
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
687
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
688 register vec_u8 diff = vec_subs(x, y);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
689 register vec_u8 diffneg = vec_subs(y, x);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
690 register vec_u8 o = vec_or(diff, diffneg); /* |x-y| */
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
691 o = (vec_u8)vec_cmplt(o, a);
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
692 return o;
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
693 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
694
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
695 static inline vec_u8 h264_deblock_mask ( register vec_u8 p0,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
696 register vec_u8 p1,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
697 register vec_u8 q0,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
698 register vec_u8 q1,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
699 register vec_u8 alpha,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
700 register vec_u8 beta) {
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
701
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
702 register vec_u8 mask;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
703 register vec_u8 tempmask;
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
704
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
705 mask = diff_lt_altivec(p0, q0, alpha);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
706 tempmask = diff_lt_altivec(p1, p0, beta);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
707 mask = vec_and(mask, tempmask);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
708 tempmask = diff_lt_altivec(q1, q0, beta);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
709 mask = vec_and(mask, tempmask);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
710
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
711 return mask;
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
712 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
713
5165
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
714 // out: newp1 = clip((p2 + ((p0 + q0 + 1) >> 1)) >> 1, p1-tc0, p1+tc0)
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
715 static inline vec_u8 h264_deblock_q1(register vec_u8 p0,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
716 register vec_u8 p1,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
717 register vec_u8 p2,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
718 register vec_u8 q0,
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
719 register vec_u8 tc0) {
5164
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
720
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
721 register vec_u8 average = vec_avg(p0, q0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
722 register vec_u8 temp;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
723 register vec_u8 uncliped;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
724 register vec_u8 ones;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
725 register vec_u8 max;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
726 register vec_u8 min;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
727 register vec_u8 newp1;
5164
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
728
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
729 temp = vec_xor(average, p2);
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
730 average = vec_avg(average, p2); /*avg(p2, avg(p0, q0)) */
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
731 ones = vec_splat_u8(1);
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
732 temp = vec_and(temp, ones); /*(p2^avg(p0, q0)) & 1 */
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
733 uncliped = vec_subs(average, temp); /*(p2+((p0+q0+1)>>1))>>1 */
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
734 max = vec_adds(p1, tc0);
830b9dd36fef convert h264_deblock_q1 to an inline function.
gpoirier
parents: 5159
diff changeset
735 min = vec_subs(p1, tc0);
5165
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
736 newp1 = vec_max(min, uncliped);
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
737 newp1 = vec_min(max, newp1);
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
738 return newp1;
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
739 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
740
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
741 #define h264_deblock_p0_q0(p0, p1, q0, q1, tc0masked) { \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
742 \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
743 const vec_u8 A0v = vec_sl(vec_splat_u8(10), vec_splat_u8(4)); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
744 \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
745 register vec_u8 pq0bit = vec_xor(p0,q0); \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
746 register vec_u8 q1minus; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
747 register vec_u8 p0minus; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
748 register vec_u8 stage1; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
749 register vec_u8 stage2; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
750 register vec_u8 vec160; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
751 register vec_u8 delta; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
752 register vec_u8 deltaneg; \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
753 \
5159
883a5619f52f Use a faster way to compute 255-val: Instead of creating a vector of
gpoirier
parents: 5145
diff changeset
754 q1minus = vec_nor(q1, q1); /* 255 - q1 */ \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
755 stage1 = vec_avg(p1, q1minus); /* (p1 - q1 + 256)>>1 */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
756 stage2 = vec_sr(stage1, vec_splat_u8(1)); /* (p1 - q1 + 256)>>2 = 64 + (p1 - q1) >> 2 */ \
5159
883a5619f52f Use a faster way to compute 255-val: Instead of creating a vector of
gpoirier
parents: 5145
diff changeset
757 p0minus = vec_nor(p0, p0); /* 255 - p0 */ \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
758 stage1 = vec_avg(q0, p0minus); /* (q0 - p0 + 256)>>1 */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
759 pq0bit = vec_and(pq0bit, vec_splat_u8(1)); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
760 stage2 = vec_avg(stage2, pq0bit); /* 32 + ((q0 - p0)&1 + (p1 - q1) >> 2 + 1) >> 1 */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
761 stage2 = vec_adds(stage2, stage1); /* 160 + ((p0 - q0) + (p1 - q1) >> 2 + 1) >> 1 */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
762 vec160 = vec_ld(0, &A0v); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
763 deltaneg = vec_subs(vec160, stage2); /* -d */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
764 delta = vec_subs(stage2, vec160); /* d */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
765 deltaneg = vec_min(tc0masked, deltaneg); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
766 delta = vec_min(tc0masked, delta); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
767 p0 = vec_subs(p0, deltaneg); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
768 q0 = vec_subs(q0, delta); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
769 p0 = vec_adds(p0, delta); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
770 q0 = vec_adds(q0, deltaneg); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
771 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
772
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
773 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
774 DECLARE_ALIGNED(16, unsigned char, temp)[16]; \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
775 register vec_u8 alphavec; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
776 register vec_u8 betavec; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
777 register vec_u8 mask; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
778 register vec_u8 p1mask; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
779 register vec_u8 q1mask; \
5167
b59514a8d239 restore GCC3 support
gpoirier
parents: 5166
diff changeset
780 register vector signed char tc0vec; \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
781 register vec_u8 finaltc0; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
782 register vec_u8 tc0masked; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
783 register vec_u8 newp1; \
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
784 register vec_u8 newq1; \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
785 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
786 temp[0] = alpha; \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
787 temp[1] = beta; \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
788 alphavec = vec_ld(0, temp); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
789 betavec = vec_splat(alphavec, 0x1); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
790 alphavec = vec_splat(alphavec, 0x0); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
791 mask = h264_deblock_mask(p0, p1, q0, q1, alphavec, betavec); /*if in block */ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
792 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
793 *((int *)temp) = *((int *)tc0); \
5167
b59514a8d239 restore GCC3 support
gpoirier
parents: 5166
diff changeset
794 tc0vec = vec_ld(0, (signed char*)temp); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
795 tc0vec = vec_mergeh(tc0vec, tc0vec); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
796 tc0vec = vec_mergeh(tc0vec, tc0vec); \
5165
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
797 mask = vec_and(mask, vec_cmpgt(tc0vec, vec_splat_s8(-1))); /* if tc0[i] >= 0 */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
798 finaltc0 = vec_and((vec_u8)tc0vec, mask); /* tc = tc0 */ \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
799 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
800 p1mask = diff_lt_altivec(p2, p0, betavec); \
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
801 p1mask = vec_and(p1mask, mask); /* if ( |p2 - p0| < beta) */ \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
802 tc0masked = vec_and(p1mask, (vec_u8)tc0vec); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
803 finaltc0 = vec_sub(finaltc0, p1mask); /* tc++ */ \
5165
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
804 newp1 = h264_deblock_q1(p0, p1, p2, q0, tc0masked); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
805 /*end if*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
806 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
807 q1mask = diff_lt_altivec(q2, q0, betavec); \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
808 q1mask = vec_and(q1mask, mask); /* if ( |q2 - q0| < beta ) */\
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
809 tc0masked = vec_and(q1mask, (vec_u8)tc0vec); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
810 finaltc0 = vec_sub(finaltc0, q1mask); /* tc++ */ \
5165
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
811 newq1 = h264_deblock_q1(p0, q1, q2, q0, tc0masked); \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
812 /*end if*/ \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
813 \
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
814 h264_deblock_p0_q0(p0, p1, q0, q1, finaltc0); \
5165
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
815 p1 = newp1; \
c99fa49eaa80 part 2/2 of fixing Altivec-accelerated H264 luma inloop filter
gpoirier
parents: 5164
diff changeset
816 q1 = newq1; \
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
817 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
818
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
819 static void h264_v_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) {
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
820
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
821 if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) >= 0) {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
822 register vec_u8 p2 = vec_ld(-3*stride, pix);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
823 register vec_u8 p1 = vec_ld(-2*stride, pix);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
824 register vec_u8 p0 = vec_ld(-1*stride, pix);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
825 register vec_u8 q0 = vec_ld(0, pix);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
826 register vec_u8 q1 = vec_ld(stride, pix);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
827 register vec_u8 q2 = vec_ld(2*stride, pix);
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
828 h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
829 vec_st(p1, -2*stride, pix);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
830 vec_st(p0, -1*stride, pix);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
831 vec_st(q0, 0, pix);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
832 vec_st(q1, stride, pix);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
833 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
834 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
835
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
836 static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) {
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
837
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 8461
diff changeset
838 register vec_u8 line0, line1, line2, line3, line4, line5;
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
839 if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) < 0)
5119
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
840 return;
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
841 readAndTranspose16x6(pix-3, stride, line0, line1, line2, line3, line4, line5);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
842 h264_loop_filter_luma_altivec(line0, line1, line2, line3, line4, line5, alpha, beta, tc0);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
843 transpose4x16(line1, line2, line3, line4);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
844 write16x4(pix-2, stride, line1, line2, line3, line4);
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
845 }
ad0c45e0008c Altivec version of h264_(h|v)_loop_filter_luma
gpoirier
parents: 5094
diff changeset
846
8541
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
847 static av_always_inline
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
848 void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset, int w, int h)
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
849 {
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
850 int y, aligned;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
851 vec_u8 vblock;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
852 vec_s16 vtemp, vweight, voffset, v0, v1;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
853 vec_u16 vlog2_denom;
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
854 DECLARE_ALIGNED(16, int32_t, temp)[4];
8541
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
855 LOAD_ZERO;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
856
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
857 offset <<= log2_denom;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
858 if(log2_denom) offset += 1<<(log2_denom-1);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
859 temp[0] = log2_denom;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
860 temp[1] = weight;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
861 temp[2] = offset;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
862
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
863 vtemp = (vec_s16)vec_ld(0, temp);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
864 vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
865 vweight = vec_splat(vtemp, 3);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
866 voffset = vec_splat(vtemp, 5);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
867 aligned = !((unsigned long)block & 0xf);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
868
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
869 for (y=0; y<h; y++) {
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
870 vblock = vec_ld(0, block);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
871
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
872 v0 = (vec_s16)vec_mergeh(zero_u8v, vblock);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
873 v1 = (vec_s16)vec_mergel(zero_u8v, vblock);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
874
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
875 if (w == 16 || aligned) {
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
876 v0 = vec_mladd(v0, vweight, zero_s16v);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
877 v0 = vec_adds(v0, voffset);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
878 v0 = vec_sra(v0, vlog2_denom);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
879 }
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
880 if (w == 16 || !aligned) {
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
881 v1 = vec_mladd(v1, vweight, zero_s16v);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
882 v1 = vec_adds(v1, voffset);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
883 v1 = vec_sra(v1, vlog2_denom);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
884 }
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
885 vblock = vec_packsu(v0, v1);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
886 vec_st(vblock, 0, block);
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
887
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
888 block += stride;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
889 }
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
890 }
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
891
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
892 static av_always_inline
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
893 void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
894 int weightd, int weights, int offset, int w, int h)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
895 {
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
896 int y, dst_aligned, src_aligned;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
897 vec_u8 vsrc, vdst;
8535
8f3e20061aff offset and weights are signed, fixes some non-bitexact issues.
gpoirier
parents: 8531
diff changeset
898 vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3;
8f3e20061aff offset and weights are signed, fixes some non-bitexact issues.
gpoirier
parents: 8531
diff changeset
899 vec_u16 vlog2_denom;
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
900 DECLARE_ALIGNED(16, int32_t, temp)[4];
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
901 LOAD_ZERO;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
902
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
903 offset = ((offset + 1) | 1) << log2_denom;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
904 temp[0] = log2_denom+1;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
905 temp[1] = weights;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
906 temp[2] = weightd;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
907 temp[3] = offset;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
908
8536
87450160a913 fix compilation with GCC-4.3+
gpoirier
parents: 8535
diff changeset
909 vtemp = (vec_s16)vec_ld(0, temp);
87450160a913 fix compilation with GCC-4.3+
gpoirier
parents: 8535
diff changeset
910 vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
911 vweights = vec_splat(vtemp, 3);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
912 vweightd = vec_splat(vtemp, 5);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
913 voffset = vec_splat(vtemp, 7);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
914 dst_aligned = !((unsigned long)dst & 0xf);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
915 src_aligned = !((unsigned long)src & 0xf);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
916
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
917 for (y=0; y<h; y++) {
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
918 vdst = vec_ld(0, dst);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
919 vsrc = vec_ld(0, src);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
920
8536
87450160a913 fix compilation with GCC-4.3+
gpoirier
parents: 8535
diff changeset
921 v0 = (vec_s16)vec_mergeh(zero_u8v, vdst);
87450160a913 fix compilation with GCC-4.3+
gpoirier
parents: 8535
diff changeset
922 v1 = (vec_s16)vec_mergel(zero_u8v, vdst);
87450160a913 fix compilation with GCC-4.3+
gpoirier
parents: 8535
diff changeset
923 v2 = (vec_s16)vec_mergeh(zero_u8v, vsrc);
87450160a913 fix compilation with GCC-4.3+
gpoirier
parents: 8535
diff changeset
924 v3 = (vec_s16)vec_mergel(zero_u8v, vsrc);
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
925
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
926 if (w == 8) {
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
927 if (src_aligned)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
928 v3 = v2;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
929 else
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
930 v2 = v3;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
931 }
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
932
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
933 if (w == 16 || dst_aligned) {
8535
8f3e20061aff offset and weights are signed, fixes some non-bitexact issues.
gpoirier
parents: 8531
diff changeset
934 v0 = vec_mladd(v0, vweightd, zero_s16v);
8f3e20061aff offset and weights are signed, fixes some non-bitexact issues.
gpoirier
parents: 8531
diff changeset
935 v2 = vec_mladd(v2, vweights, zero_s16v);
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
936
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
937 v0 = vec_adds(v0, voffset);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
938 v0 = vec_adds(v0, v2);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
939 v0 = vec_sra(v0, vlog2_denom);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
940 }
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
941 if (w == 16 || !dst_aligned) {
8535
8f3e20061aff offset and weights are signed, fixes some non-bitexact issues.
gpoirier
parents: 8531
diff changeset
942 v1 = vec_mladd(v1, vweightd, zero_s16v);
8f3e20061aff offset and weights are signed, fixes some non-bitexact issues.
gpoirier
parents: 8531
diff changeset
943 v3 = vec_mladd(v3, vweights, zero_s16v);
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
944
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
945 v1 = vec_adds(v1, voffset);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
946 v1 = vec_adds(v1, v3);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
947 v1 = vec_sra(v1, vlog2_denom);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
948 }
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
949 vdst = vec_packsu(v0, v1);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
950 vec_st(vdst, 0, dst);
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
951
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
952 dst += stride;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
953 src += stride;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
954 }
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
955 }
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
956
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
957 #define H264_WEIGHT(W,H) \
8541
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
958 static void ff_weight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
959 weight_h264_WxH_altivec(block, stride, log2_denom, weight, offset, W, H); \
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
960 }\
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
961 static void ff_biweight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
962 biweight_h264_WxH_altivec(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
963 }
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
964
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
965 H264_WEIGHT(16,16)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
966 H264_WEIGHT(16, 8)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
967 H264_WEIGHT( 8,16)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
968 H264_WEIGHT( 8, 8)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
969 H264_WEIGHT( 8, 4)
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
970
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
971 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
972
12475
9fef0a8ddd63 Move mm_support() from libavcodec to libavutil, make it a public
stefano
parents: 12473
diff changeset
973 if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
974 c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
975 c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
9439
ef3a7b711cc0 Rename put_no_rnd_h264_chroma* to reflect its usage in VC1 only
conrad
parents: 9421
diff changeset
976 c->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_altivec;
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9439
diff changeset
977 c->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_altivec;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
978
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
979 #define dspfunc(PFX, IDX, NUM) \
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
980 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
981 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
982 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
983 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
984 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
985 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
986 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
987 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
988 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
989 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
990 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
991 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
992 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
993 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
994 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
995 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
996
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
997 dspfunc(put_h264_qpel, 0, 16);
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
998 dspfunc(avg_h264_qpel, 0, 16);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
999 #undef dspfunc
11499
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1000 }
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1001 }
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1002
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1003 void ff_h264dsp_init_ppc(H264DSPContext *c)
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1004 {
12475
9fef0a8ddd63 Move mm_support() from libavcodec to libavutil, make it a public
stefano
parents: 12473
diff changeset
1005 if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
11499
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1006 c->h264_idct_add = ff_h264_idct_add_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1007 c->h264_idct_add8 = ff_h264_idct_add8_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1008 c->h264_idct_add16 = ff_h264_idct_add16_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1009 c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1010 c->h264_idct_dc_add= h264_idct_dc_add_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1011 c->h264_idct8_dc_add = ff_h264_idct8_dc_add_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1012 c->h264_idct8_add = ff_h264_idct8_add_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1013 c->h264_idct8_add4 = ff_h264_idct8_add4_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1014 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec;
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11382
diff changeset
1015 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec;
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
1016
8541
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
1017 c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16x16_altivec;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
1018 c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels16x8_altivec;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
1019 c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
1020 c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec;
16a315fdad0b add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8536
diff changeset
1021 c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec;
8531
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
1022 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
1023 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
1024 c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
1025 c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
961e40a13102 add AltiVec implementation of biweight_h264_pixels(16|8)x(16|8|4)
gpoirier
parents: 8530
diff changeset
1026 c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
5586
f065fc609145 whitespace/indentation cosmetics
diego
parents: 5585
diff changeset
1027 }
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
1028 }