annotate arm/dsputil_neon.c @ 8790:1045a26cb90d libavcodec

Fix crash when encoding using libschroedinger. Currently only pixel and half-pixel motion vector precisions are supported in libschroedinger. Setting the mv_precision field to 2 (i.e. quarter pixel) causes a crash in the libschroedinger encoder calls. By not setting this parameter, we fall back to the default value used in libschroedinger. patch by Anuradha Suraparaju, anuradha rd.bbc.co uk
author diego
date Tue, 10 Feb 2009 14:27:16 +0000
parents 24a7b5d0eb27
children 9ea1ea6db616
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8334
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
1 /*
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
2 * ARM NEON optimised DSP functions
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
4 *
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
5 * This file is part of FFmpeg.
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
6 *
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
11 *
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
15 * Lesser General Public License for more details.
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
16 *
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
20 */
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
21
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
22 #include <stdint.h>
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
23
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
24 #include "libavcodec/avcodec.h"
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
25 #include "libavcodec/dsputil.h"
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
26
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
27 void ff_put_pixels16_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
28 void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
29 void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
30 void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
31 void ff_put_pixels8_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
32 void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
33 void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
34 void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
35 void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
36 void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
37 void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
38 void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
39 void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
40 void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
41
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
42 void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, int, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
43
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
44 void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
8338
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
45 void ff_put_h264_qpel16_mc10_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
46 void ff_put_h264_qpel16_mc20_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
47 void ff_put_h264_qpel16_mc30_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
48 void ff_put_h264_qpel16_mc01_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
49 void ff_put_h264_qpel16_mc11_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
50 void ff_put_h264_qpel16_mc21_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
51 void ff_put_h264_qpel16_mc31_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
52 void ff_put_h264_qpel16_mc02_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
53 void ff_put_h264_qpel16_mc12_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
54 void ff_put_h264_qpel16_mc22_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
55 void ff_put_h264_qpel16_mc32_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
56 void ff_put_h264_qpel16_mc03_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
57 void ff_put_h264_qpel16_mc13_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
58 void ff_put_h264_qpel16_mc23_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
59 void ff_put_h264_qpel16_mc33_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
60
8334
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
61 void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int);
8338
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
62 void ff_put_h264_qpel8_mc10_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
63 void ff_put_h264_qpel8_mc20_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
64 void ff_put_h264_qpel8_mc30_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
65 void ff_put_h264_qpel8_mc01_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
66 void ff_put_h264_qpel8_mc11_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
67 void ff_put_h264_qpel8_mc21_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
68 void ff_put_h264_qpel8_mc31_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
69 void ff_put_h264_qpel8_mc02_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
70 void ff_put_h264_qpel8_mc12_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
71 void ff_put_h264_qpel8_mc22_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
72 void ff_put_h264_qpel8_mc32_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
73 void ff_put_h264_qpel8_mc03_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
74 void ff_put_h264_qpel8_mc13_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
75 void ff_put_h264_qpel8_mc23_neon(uint8_t *, uint8_t *, int);
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
76 void ff_put_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int);
8334
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
77
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
78 void ff_avg_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
79
8336
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents: 8334
diff changeset
80 void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents: 8334
diff changeset
81 void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents: 8334
diff changeset
82
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents: 8334
diff changeset
83 void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents: 8334
diff changeset
84 void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents: 8334
diff changeset
85
8337
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
86 void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
87 int beta, int8_t *tc0);
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
88 void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
89 int beta, int8_t *tc0);
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
90 void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
91 int beta, int8_t *tc0);
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
92 void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
93 int beta, int8_t *tc0);
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
94
8664
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
95 void ff_weight_h264_pixels_16x16_neon(uint8_t *ds, int stride, int log2_den,
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
96 int weight, int offset);
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
97 void ff_weight_h264_pixels_16x8_neon(uint8_t *ds, int stride, int log2_den,
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
98 int weight, int offset);
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
99 void ff_weight_h264_pixels_8x16_neon(uint8_t *ds, int stride, int log2_den,
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
100 int weight, int offset);
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
101 void ff_weight_h264_pixels_8x8_neon(uint8_t *ds, int stride, int log2_den,
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
102 int weight, int offset);
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
103 void ff_weight_h264_pixels_8x4_neon(uint8_t *ds, int stride, int log2_den,
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
104 int weight, int offset);
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
105 void ff_weight_h264_pixels_4x8_neon(uint8_t *ds, int stride, int log2_den,
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
106 int weight, int offset);
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
107 void ff_weight_h264_pixels_4x4_neon(uint8_t *ds, int stride, int log2_den,
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
108 int weight, int offset);
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
109 void ff_weight_h264_pixels_4x2_neon(uint8_t *ds, int stride, int log2_den,
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
110 int weight, int offset);
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
111
8663
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
112 void ff_biweight_h264_pixels_16x16_neon(uint8_t *dst, uint8_t *src, int stride,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
113 int log2_den, int weightd, int weights,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
114 int offset);
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
115 void ff_biweight_h264_pixels_16x8_neon(uint8_t *dst, uint8_t *src, int stride,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
116 int log2_den, int weightd, int weights,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
117 int offset);
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
118 void ff_biweight_h264_pixels_8x16_neon(uint8_t *dst, uint8_t *src, int stride,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
119 int log2_den, int weightd, int weights,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
120 int offset);
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
121 void ff_biweight_h264_pixels_8x8_neon(uint8_t *dst, uint8_t *src, int stride,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
122 int log2_den, int weightd, int weights,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
123 int offset);
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
124 void ff_biweight_h264_pixels_8x4_neon(uint8_t *dst, uint8_t *src, int stride,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
125 int log2_den, int weightd, int weights,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
126 int offset);
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
127 void ff_biweight_h264_pixels_4x8_neon(uint8_t *dst, uint8_t *src, int stride,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
128 int log2_den, int weightd, int weights,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
129 int offset);
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
130 void ff_biweight_h264_pixels_4x4_neon(uint8_t *dst, uint8_t *src, int stride,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
131 int log2_den, int weightd, int weights,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
132 int offset);
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
133 void ff_biweight_h264_pixels_4x2_neon(uint8_t *dst, uint8_t *src, int stride,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
134 int log2_den, int weightd, int weights,
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
135 int offset);
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
136
8339
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents: 8338
diff changeset
137 void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
8340
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
138 void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
139 void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
140 DCTELEM *block, int stride,
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
141 const uint8_t nnzc[6*8]);
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
142 void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset,
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
143 DCTELEM *block, int stride,
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
144 const uint8_t nnzc[6*8]);
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
145 void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
146 DCTELEM *block, int stride,
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
147 const uint8_t nnzc[6*8]);
8339
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents: 8338
diff changeset
148
8697
307b176f91e7 ARM: NEON optimised vector_fmul
mru
parents: 8664
diff changeset
149 void ff_vector_fmul_neon(float *dst, const float *src, int len);
8698
24a7b5d0eb27 ARM: NEON optimised vector_fmul_window
mru
parents: 8697
diff changeset
150 void ff_vector_fmul_window_neon(float *dst, const float *src0,
24a7b5d0eb27 ARM: NEON optimised vector_fmul_window
mru
parents: 8697
diff changeset
151 const float *src1, const float *win,
24a7b5d0eb27 ARM: NEON optimised vector_fmul_window
mru
parents: 8697
diff changeset
152 float add_bias, int len);
8697
307b176f91e7 ARM: NEON optimised vector_fmul
mru
parents: 8664
diff changeset
153
8492
639169d7fad5 ARM: NEON optimised float_to_int16
mru
parents: 8462
diff changeset
154 void ff_float_to_int16_neon(int16_t *, const float *, long);
639169d7fad5 ARM: NEON optimised float_to_int16
mru
parents: 8462
diff changeset
155 void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
639169d7fad5 ARM: NEON optimised float_to_int16
mru
parents: 8462
diff changeset
156
8334
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
157 void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
158 {
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
159 c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
160 c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
161 c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
162 c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
163 c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
164 c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
165 c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
166 c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
167
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
168 c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
169 c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
170 c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
171 c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
172 c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
173 c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
174 c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
175 c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
176
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
177 c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
178
8336
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents: 8334
diff changeset
179 c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents: 8334
diff changeset
180 c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents: 8334
diff changeset
181
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents: 8334
diff changeset
182 c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents: 8334
diff changeset
183 c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
c8401acb05d1 ARM: NEON optimised {put,avg}_h264_chroma_mc[48]
mru
parents: 8334
diff changeset
184
8338
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
185 c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
186 c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
187 c->put_h264_qpel_pixels_tab[0][ 2] = ff_put_h264_qpel16_mc20_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
188 c->put_h264_qpel_pixels_tab[0][ 3] = ff_put_h264_qpel16_mc30_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
189 c->put_h264_qpel_pixels_tab[0][ 4] = ff_put_h264_qpel16_mc01_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
190 c->put_h264_qpel_pixels_tab[0][ 5] = ff_put_h264_qpel16_mc11_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
191 c->put_h264_qpel_pixels_tab[0][ 6] = ff_put_h264_qpel16_mc21_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
192 c->put_h264_qpel_pixels_tab[0][ 7] = ff_put_h264_qpel16_mc31_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
193 c->put_h264_qpel_pixels_tab[0][ 8] = ff_put_h264_qpel16_mc02_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
194 c->put_h264_qpel_pixels_tab[0][ 9] = ff_put_h264_qpel16_mc12_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
195 c->put_h264_qpel_pixels_tab[0][10] = ff_put_h264_qpel16_mc22_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
196 c->put_h264_qpel_pixels_tab[0][11] = ff_put_h264_qpel16_mc32_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
197 c->put_h264_qpel_pixels_tab[0][12] = ff_put_h264_qpel16_mc03_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
198 c->put_h264_qpel_pixels_tab[0][13] = ff_put_h264_qpel16_mc13_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
199 c->put_h264_qpel_pixels_tab[0][14] = ff_put_h264_qpel16_mc23_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
200 c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
201
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
202 c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
203 c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
204 c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
205 c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
206 c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
207 c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
208 c->put_h264_qpel_pixels_tab[1][ 6] = ff_put_h264_qpel8_mc21_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
209 c->put_h264_qpel_pixels_tab[1][ 7] = ff_put_h264_qpel8_mc31_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
210 c->put_h264_qpel_pixels_tab[1][ 8] = ff_put_h264_qpel8_mc02_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
211 c->put_h264_qpel_pixels_tab[1][ 9] = ff_put_h264_qpel8_mc12_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
212 c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
213 c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
214 c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
215 c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
216 c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_neon;
b294a0d5bc50 ARM: NEON optimised H.264 8x8 and 16x16 qpel MC
mru
parents: 8337
diff changeset
217 c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_neon;
8334
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
218
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
219 c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon;
8337
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
220
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
221 c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
222 c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
223 c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
d43b7f4c5c1c ARM: NEON optimised H.264 loop filter
mru
parents: 8336
diff changeset
224 c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
8339
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents: 8338
diff changeset
225
8664
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
226 c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16x16_neon;
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
227 c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_16x8_neon;
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
228 c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_8x16_neon;
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
229 c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels_8x8_neon;
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
230 c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels_8x4_neon;
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
231 c->weight_h264_pixels_tab[5] = ff_weight_h264_pixels_4x8_neon;
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
232 c->weight_h264_pixels_tab[6] = ff_weight_h264_pixels_4x4_neon;
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
233 c->weight_h264_pixels_tab[7] = ff_weight_h264_pixels_4x2_neon;
882c351e69c2 ARM: NEON optimised H.264 weighted prediction
mru
parents: 8663
diff changeset
234
8663
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
235 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16x16_neon;
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
236 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_16x8_neon;
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
237 c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_8x16_neon;
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
238 c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels_8x8_neon;
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
239 c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels_8x4_neon;
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
240 c->biweight_h264_pixels_tab[5] = ff_biweight_h264_pixels_4x8_neon;
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
241 c->biweight_h264_pixels_tab[6] = ff_biweight_h264_pixels_4x4_neon;
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
242 c->biweight_h264_pixels_tab[7] = ff_biweight_h264_pixels_4x2_neon;
23f7711e777e ARM: NEON optimised H.264 biweighted prediction
mru
parents: 8492
diff changeset
243
8339
a561ec6d1bf6 ARM: NEON optimised h264_idct_add
mru
parents: 8338
diff changeset
244 c->h264_idct_add = ff_h264_idct_add_neon;
8340
834a77844ba3 ARM: NEON optimised h264_idct_dc_add
mru
parents: 8339
diff changeset
245 c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
8462
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
246 c->h264_idct_add16 = ff_h264_idct_add16_neon;
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
247 c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
0ca0e3c98ed5 ARM: add new h264 idct functions
mru
parents: 8359
diff changeset
248 c->h264_idct_add8 = ff_h264_idct_add8_neon;
8492
639169d7fad5 ARM: NEON optimised float_to_int16
mru
parents: 8462
diff changeset
249
8697
307b176f91e7 ARM: NEON optimised vector_fmul
mru
parents: 8664
diff changeset
250 c->vector_fmul = ff_vector_fmul_neon;
8698
24a7b5d0eb27 ARM: NEON optimised vector_fmul_window
mru
parents: 8697
diff changeset
251 c->vector_fmul_window = ff_vector_fmul_window_neon;
8697
307b176f91e7 ARM: NEON optimised vector_fmul
mru
parents: 8664
diff changeset
252
8492
639169d7fad5 ARM: NEON optimised float_to_int16
mru
parents: 8462
diff changeset
253 if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
639169d7fad5 ARM: NEON optimised float_to_int16
mru
parents: 8462
diff changeset
254 c->float_to_int16 = ff_float_to_int16_neon;
639169d7fad5 ARM: NEON optimised float_to_int16
mru
parents: 8462
diff changeset
255 c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
639169d7fad5 ARM: NEON optimised float_to_int16
mru
parents: 8462
diff changeset
256 }
8334
6bdd6dfc3574 ARM: NEON optimised put_pixels functions
mru
parents:
diff changeset
257 }