Mercurial > libavcodec.hg
annotate x86/h264_qpel_mmx.c @ 12483:0159a19bfff7 libavcodec
aacdec: Rework channel mapping compatibility hacks.
For a PCE based configuration map the channels solely based on tags.
For an indexed configuration map the channels solely based on position.
This works with all known exotic samples including al17, elem_id0, bad_concat,
and lfe_is_sce.
author | alexc |
---|---|
date | Fri, 10 Sep 2010 18:01:48 +0000 |
parents | 3941687b4fa9 |
children |
rev | line source |
---|---|
12450
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1 /* |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
2 * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
3 * |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
4 * This file is part of FFmpeg. |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
5 * |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
7 * modify it under the terms of the GNU Lesser General Public |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
8 * License as published by the Free Software Foundation; either |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
10 * |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
14 * Lesser General Public License for more details. |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
15 * |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
16 * You should have received a copy of the GNU Lesser General Public |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
19 */ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
20 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
21 #include "dsputil_mmx.h" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
22 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
23 /***********************************/ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
24 /* motion compensation */ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
25 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
26 #define QPEL_H264V_MM(A,B,C,D,E,F,OP,T,Z,d,q)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
27 "mov"#q" "#C", "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
28 "mov"#d" (%0), "#F" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
29 "paddw "#D", "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
30 "psllw $2, "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
31 "psubw "#B", "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
32 "psubw "#E", "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
33 "punpcklbw "#Z", "#F" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
34 "pmullw %4, "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
35 "paddw %5, "#A" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
36 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
37 "paddw "#F", "#A" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
38 "paddw "#A", "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
39 "psraw $5, "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
40 "packuswb "#T", "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
41 OP(T, (%1), A, d)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
42 "add %3, %1 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
43 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
44 #define QPEL_H264HV_MM(A,B,C,D,E,F,OF,T,Z,d,q)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
45 "mov"#q" "#C", "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
46 "mov"#d" (%0), "#F" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
47 "paddw "#D", "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
48 "psllw $2, "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
49 "paddw %4, "#A" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
50 "psubw "#B", "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
51 "psubw "#E", "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
52 "punpcklbw "#Z", "#F" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
53 "pmullw %3, "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
54 "paddw "#F", "#A" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
55 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
56 "paddw "#A", "#T" \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
57 "mov"#q" "#T", "#OF"(%1) \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
58 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
59 #define QPEL_H264V(A,B,C,D,E,F,OP) QPEL_H264V_MM(A,B,C,D,E,F,OP,%%mm6,%%mm7,d,q) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
60 #define QPEL_H264HV(A,B,C,D,E,F,OF) QPEL_H264HV_MM(A,B,C,D,E,F,OF,%%mm6,%%mm7,d,q) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
61 #define QPEL_H264V_XMM(A,B,C,D,E,F,OP) QPEL_H264V_MM(A,B,C,D,E,F,OP,%%xmm6,%%xmm7,q,dqa) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
62 #define QPEL_H264HV_XMM(A,B,C,D,E,F,OF) QPEL_H264HV_MM(A,B,C,D,E,F,OF,%%xmm6,%%xmm7,q,dqa) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
63 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
64 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
65 #define QPEL_H264(OPNAME, OP, MMX)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
66 static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
67 int h=4;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
68 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
69 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
70 "pxor %%mm7, %%mm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
71 "movq "MANGLE(ff_pw_5) ", %%mm4\n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
72 "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
73 "1: \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
74 "movd -1(%0), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
75 "movd (%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
76 "movd 1(%0), %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
77 "movd 2(%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
78 "punpcklbw %%mm7, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
79 "punpcklbw %%mm7, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
80 "punpcklbw %%mm7, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
81 "punpcklbw %%mm7, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
82 "paddw %%mm0, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
83 "paddw %%mm3, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
84 "movd -2(%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
85 "movd 3(%0), %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
86 "punpcklbw %%mm7, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
87 "punpcklbw %%mm7, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
88 "paddw %%mm3, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
89 "psllw $2, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
90 "psubw %%mm1, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
91 "pmullw %%mm4, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
92 "paddw %%mm5, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
93 "paddw %%mm2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
94 "psraw $5, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
95 "packuswb %%mm0, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
96 OP(%%mm0, (%1),%%mm6, d)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
97 "add %3, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
98 "add %4, %1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
99 "decl %2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
100 " jnz 1b \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
101 : "+a"(src), "+c"(dst), "+g"(h)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
102 : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
103 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
104 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
105 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
106 static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
107 int h=4;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
108 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
109 "pxor %%mm7, %%mm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
110 "movq %0, %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
111 "movq %1, %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
112 :: "m"(ff_pw_5), "m"(ff_pw_16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
113 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
114 do{\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
115 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
116 "movd -1(%0), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
117 "movd (%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
118 "movd 1(%0), %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
119 "movd 2(%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
120 "punpcklbw %%mm7, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
121 "punpcklbw %%mm7, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
122 "punpcklbw %%mm7, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
123 "punpcklbw %%mm7, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
124 "paddw %%mm0, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
125 "paddw %%mm3, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
126 "movd -2(%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
127 "movd 3(%0), %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
128 "punpcklbw %%mm7, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
129 "punpcklbw %%mm7, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
130 "paddw %%mm3, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
131 "psllw $2, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
132 "psubw %%mm1, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
133 "pmullw %%mm4, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
134 "paddw %%mm5, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
135 "paddw %%mm2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
136 "movd (%2), %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
137 "psraw $5, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
138 "packuswb %%mm0, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
139 PAVGB" %%mm3, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
140 OP(%%mm0, (%1),%%mm6, d)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
141 "add %4, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
142 "add %4, %1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
143 "add %3, %2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
144 : "+a"(src), "+c"(dst), "+d"(src2)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
145 : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
146 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
147 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
148 }while(--h);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
149 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
150 static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
151 src -= 2*srcStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
152 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
153 "pxor %%mm7, %%mm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
154 "movd (%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
155 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
156 "movd (%0), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
157 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
158 "movd (%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
159 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
160 "movd (%0), %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
161 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
162 "movd (%0), %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
163 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
164 "punpcklbw %%mm7, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
165 "punpcklbw %%mm7, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
166 "punpcklbw %%mm7, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
167 "punpcklbw %%mm7, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
168 "punpcklbw %%mm7, %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
169 QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
170 QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
171 QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
172 QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
173 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
174 : "+a"(src), "+c"(dst)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
175 : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
176 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
177 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
178 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
179 static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
180 int h=4;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
181 int w=3;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
182 src -= 2*srcStride+2;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
183 while(w--){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
184 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
185 "pxor %%mm7, %%mm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
186 "movd (%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
187 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
188 "movd (%0), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
189 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
190 "movd (%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
191 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
192 "movd (%0), %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
193 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
194 "movd (%0), %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
195 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
196 "punpcklbw %%mm7, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
197 "punpcklbw %%mm7, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
198 "punpcklbw %%mm7, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
199 "punpcklbw %%mm7, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
200 "punpcklbw %%mm7, %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
201 QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
202 QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
203 QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
204 QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
205 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
206 : "+a"(src)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
207 : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
208 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
209 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
210 tmp += 4;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
211 src += 4 - 9*srcStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
212 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
213 tmp -= 3*4;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
214 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
215 "1: \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
216 "movq (%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
217 "paddw 10(%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
218 "movq 2(%0), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
219 "paddw 8(%0), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
220 "movq 4(%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
221 "paddw 6(%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
222 "psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
223 "psraw $2, %%mm0 \n\t"/*(a-b)/4 */\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
224 "psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
225 "paddsw %%mm2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
226 "psraw $2, %%mm0 \n\t"/*((a-b)/4-b+c)/4 */\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
227 "paddw %%mm2, %%mm0 \n\t"/*(a-5*b+20*c)/16 */\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
228 "psraw $6, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
229 "packuswb %%mm0, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
230 OP(%%mm0, (%1),%%mm7, d)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
231 "add $24, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
232 "add %3, %1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
233 "decl %2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
234 " jnz 1b \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
235 : "+a"(tmp), "+c"(dst), "+g"(h)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
236 : "S"((x86_reg)dstStride)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
237 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
238 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
239 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
240 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
241 static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
242 int h=8;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
243 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
244 "pxor %%mm7, %%mm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
245 "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
246 "1: \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
247 "movq (%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
248 "movq 1(%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
249 "movq %%mm0, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
250 "movq %%mm2, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
251 "punpcklbw %%mm7, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
252 "punpckhbw %%mm7, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
253 "punpcklbw %%mm7, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
254 "punpckhbw %%mm7, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
255 "paddw %%mm2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
256 "paddw %%mm3, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
257 "psllw $2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
258 "psllw $2, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
259 "movq -1(%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
260 "movq 2(%0), %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
261 "movq %%mm2, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
262 "movq %%mm4, %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
263 "punpcklbw %%mm7, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
264 "punpckhbw %%mm7, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
265 "punpcklbw %%mm7, %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
266 "punpckhbw %%mm7, %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
267 "paddw %%mm4, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
268 "paddw %%mm3, %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
269 "psubw %%mm2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
270 "psubw %%mm5, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
271 "pmullw %%mm6, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
272 "pmullw %%mm6, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
273 "movd -2(%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
274 "movd 7(%0), %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
275 "punpcklbw %%mm7, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
276 "punpcklbw %%mm7, %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
277 "paddw %%mm3, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
278 "paddw %%mm5, %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
279 "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
280 "paddw %%mm5, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
281 "paddw %%mm5, %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
282 "paddw %%mm2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
283 "paddw %%mm4, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
284 "psraw $5, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
285 "psraw $5, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
286 "packuswb %%mm1, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
287 OP(%%mm0, (%1),%%mm5, q)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
288 "add %3, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
289 "add %4, %1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
290 "decl %2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
291 " jnz 1b \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
292 : "+a"(src), "+c"(dst), "+g"(h)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
293 : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
294 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
295 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
296 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
297 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
298 static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
299 int h=8;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
300 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
301 "pxor %%mm7, %%mm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
302 "movq %0, %%mm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
303 :: "m"(ff_pw_5)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
304 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
305 do{\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
306 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
307 "movq (%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
308 "movq 1(%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
309 "movq %%mm0, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
310 "movq %%mm2, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
311 "punpcklbw %%mm7, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
312 "punpckhbw %%mm7, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
313 "punpcklbw %%mm7, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
314 "punpckhbw %%mm7, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
315 "paddw %%mm2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
316 "paddw %%mm3, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
317 "psllw $2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
318 "psllw $2, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
319 "movq -1(%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
320 "movq 2(%0), %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
321 "movq %%mm2, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
322 "movq %%mm4, %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
323 "punpcklbw %%mm7, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
324 "punpckhbw %%mm7, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
325 "punpcklbw %%mm7, %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
326 "punpckhbw %%mm7, %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
327 "paddw %%mm4, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
328 "paddw %%mm3, %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
329 "psubw %%mm2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
330 "psubw %%mm5, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
331 "pmullw %%mm6, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
332 "pmullw %%mm6, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
333 "movd -2(%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
334 "movd 7(%0), %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
335 "punpcklbw %%mm7, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
336 "punpcklbw %%mm7, %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
337 "paddw %%mm3, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
338 "paddw %%mm5, %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
339 "movq %5, %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
340 "paddw %%mm5, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
341 "paddw %%mm5, %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
342 "paddw %%mm2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
343 "paddw %%mm4, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
344 "psraw $5, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
345 "psraw $5, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
346 "movq (%2), %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
347 "packuswb %%mm1, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
348 PAVGB" %%mm4, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
349 OP(%%mm0, (%1),%%mm5, q)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
350 "add %4, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
351 "add %4, %1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
352 "add %3, %2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
353 : "+a"(src), "+c"(dst), "+d"(src2)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
354 : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
355 "m"(ff_pw_16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
356 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
357 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
358 }while(--h);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
359 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
360 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
361 static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
362 int w= 2;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
363 src -= 2*srcStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
364 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
365 while(w--){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
366 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
367 "pxor %%mm7, %%mm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
368 "movd (%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
369 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
370 "movd (%0), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
371 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
372 "movd (%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
373 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
374 "movd (%0), %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
375 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
376 "movd (%0), %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
377 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
378 "punpcklbw %%mm7, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
379 "punpcklbw %%mm7, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
380 "punpcklbw %%mm7, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
381 "punpcklbw %%mm7, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
382 "punpcklbw %%mm7, %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
383 QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
384 QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
385 QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
386 QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
387 QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
388 QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
389 QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
390 QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
391 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
392 : "+a"(src), "+c"(dst)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
393 : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
394 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
395 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
396 if(h==16){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
397 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
398 QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
399 QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
400 QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
401 QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
402 QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
403 QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
404 QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
405 QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
406 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
407 : "+a"(src), "+c"(dst)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
408 : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
409 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
410 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
411 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
412 src += 4-(h+5)*srcStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
413 dst += 4-h*dstStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
414 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
415 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
416 static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
417 int w = (size+8)>>2;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
418 src -= 2*srcStride+2;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
419 while(w--){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
420 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
421 "pxor %%mm7, %%mm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
422 "movd (%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
423 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
424 "movd (%0), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
425 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
426 "movd (%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
427 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
428 "movd (%0), %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
429 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
430 "movd (%0), %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
431 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
432 "punpcklbw %%mm7, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
433 "punpcklbw %%mm7, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
434 "punpcklbw %%mm7, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
435 "punpcklbw %%mm7, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
436 "punpcklbw %%mm7, %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
437 QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
438 QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
439 QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
440 QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
441 QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 4*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
442 QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
443 QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
444 QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
445 : "+a"(src)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
446 : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
447 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
448 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
449 if(size==16){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
450 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
451 QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 8*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
452 QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 9*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
453 QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
454 QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 11*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
455 QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 12*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
456 QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 13*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
457 QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
458 QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
459 : "+a"(src)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
460 : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
461 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
462 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
463 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
464 tmp += 4;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
465 src += 4 - (size+5)*srcStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
466 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
467 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
468 static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
469 int w = size>>4;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
470 do{\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
471 int h = size;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
472 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
473 "1: \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
474 "movq (%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
475 "movq 8(%0), %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
476 "movq 2(%0), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
477 "movq 10(%0), %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
478 "paddw %%mm4, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
479 "paddw %%mm3, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
480 "paddw 18(%0), %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
481 "paddw 16(%0), %%mm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
482 "movq 4(%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
483 "movq 12(%0), %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
484 "paddw 6(%0), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
485 "paddw 14(%0), %%mm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
486 "psubw %%mm1, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
487 "psubw %%mm4, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
488 "psraw $2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
489 "psraw $2, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
490 "psubw %%mm1, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
491 "psubw %%mm4, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
492 "paddsw %%mm2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
493 "paddsw %%mm5, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
494 "psraw $2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
495 "psraw $2, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
496 "paddw %%mm2, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
497 "paddw %%mm5, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
498 "psraw $6, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
499 "psraw $6, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
500 "packuswb %%mm3, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
501 OP(%%mm0, (%1),%%mm7, q)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
502 "add $48, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
503 "add %3, %1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
504 "decl %2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
505 " jnz 1b \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
506 : "+a"(tmp), "+c"(dst), "+g"(h)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
507 : "S"((x86_reg)dstStride)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
508 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
509 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
510 tmp += 8 - size*24;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
511 dst += 8 - size*dstStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
512 }while(w--);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
513 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
514 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
515 static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
516 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
517 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
518 static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
519 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
520 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
521 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
522 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
523 static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
524 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
525 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
526 src += 8*srcStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
527 dst += 8*dstStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
528 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
529 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
530 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
531 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
532 static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
533 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
534 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
535 src += 8*dstStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
536 dst += 8*dstStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
537 src2 += 8*src2Stride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
538 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
539 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
540 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
541 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
542 static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
543 put_h264_qpel8or16_hv1_lowpass_ ## MMX(tmp, src, tmpStride, srcStride, size);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
544 OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
545 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
546 static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
547 OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 8);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
548 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
549 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
550 static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
551 OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
552 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
553 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
554 static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
555 {\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
556 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
557 "movq (%1), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
558 "movq 24(%1), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
559 "psraw $5, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
560 "psraw $5, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
561 "packuswb %%mm0, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
562 "packuswb %%mm1, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
563 PAVGB" (%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
564 PAVGB" (%0,%3), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
565 OP(%%mm0, (%2), %%mm4, d)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
566 OP(%%mm1, (%2,%4), %%mm5, d)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
567 "lea (%0,%3,2), %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
568 "lea (%2,%4,2), %2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
569 "movq 48(%1), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
570 "movq 72(%1), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
571 "psraw $5, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
572 "psraw $5, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
573 "packuswb %%mm0, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
574 "packuswb %%mm1, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
575 PAVGB" (%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
576 PAVGB" (%0,%3), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
577 OP(%%mm0, (%2), %%mm4, d)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
578 OP(%%mm1, (%2,%4), %%mm5, d)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
579 :"+a"(src8), "+c"(src16), "+d"(dst)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
580 :"S"((x86_reg)src8Stride), "D"((x86_reg)dstStride)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
581 :"memory");\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
582 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
583 static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
584 {\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
585 do{\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
586 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
587 "movq (%1), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
588 "movq 8(%1), %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
589 "movq 48(%1), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
590 "movq 8+48(%1), %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
591 "psraw $5, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
592 "psraw $5, %%mm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
593 "psraw $5, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
594 "psraw $5, %%mm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
595 "packuswb %%mm1, %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
596 "packuswb %%mm3, %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
597 PAVGB" (%0), %%mm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
598 PAVGB" (%0,%3), %%mm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
599 OP(%%mm0, (%2), %%mm5, q)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
600 OP(%%mm2, (%2,%4), %%mm5, q)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
601 ::"a"(src8), "c"(src16), "d"(dst),\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
602 "r"((x86_reg)src8Stride), "r"((x86_reg)dstStride)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
603 :"memory");\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
604 src8 += 2L*src8Stride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
605 src16 += 48;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
606 dst += 2L*dstStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
607 }while(h-=2);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
608 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
609 static void OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
610 {\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
611 OPNAME ## pixels8_l2_shift5_ ## MMX(dst , src16 , src8 , dstStride, src8Stride, h);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
612 OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
613 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
614 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
615 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
616 #if ARCH_X86_64 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
617 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
618 static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
619 int h=16;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
620 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
621 "pxor %%xmm15, %%xmm15 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
622 "movdqa %6, %%xmm14 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
623 "movdqa %7, %%xmm13 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
624 "1: \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
625 "lddqu 6(%0), %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
626 "lddqu -2(%0), %%xmm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
627 "movdqa %%xmm1, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
628 "punpckhbw %%xmm15, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
629 "punpcklbw %%xmm15, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
630 "punpcklbw %%xmm15, %%xmm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
631 "movdqa %%xmm1, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
632 "movdqa %%xmm0, %%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
633 "movdqa %%xmm1, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
634 "movdqa %%xmm0, %%xmm8 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
635 "movdqa %%xmm1, %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
636 "movdqa %%xmm0, %%xmm9 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
637 "movdqa %%xmm0, %%xmm12 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
638 "movdqa %%xmm1, %%xmm11 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
639 "palignr $10,%%xmm0, %%xmm11\n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
640 "palignr $10,%%xmm7, %%xmm12\n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
641 "palignr $2, %%xmm0, %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
642 "palignr $2, %%xmm7, %%xmm9 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
643 "palignr $4, %%xmm0, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
644 "palignr $4, %%xmm7, %%xmm8 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
645 "palignr $6, %%xmm0, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
646 "palignr $6, %%xmm7, %%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
647 "paddw %%xmm0 ,%%xmm11 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
648 "palignr $8, %%xmm0, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
649 "palignr $8, %%xmm7, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
650 "paddw %%xmm12,%%xmm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
651 "paddw %%xmm3, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
652 "paddw %%xmm8, %%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
653 "paddw %%xmm4, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
654 "paddw %%xmm9, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
655 "psllw $2, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
656 "psllw $2, %%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
657 "psubw %%xmm1, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
658 "psubw %%xmm0, %%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
659 "paddw %%xmm13,%%xmm11 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
660 "paddw %%xmm13,%%xmm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
661 "pmullw %%xmm14,%%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
662 "pmullw %%xmm14,%%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
663 "lddqu (%2), %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
664 "paddw %%xmm11,%%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
665 "paddw %%xmm7, %%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
666 "psraw $5, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
667 "psraw $5, %%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
668 "packuswb %%xmm2,%%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
669 "pavgb %%xmm3, %%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
670 OP(%%xmm6, (%1), %%xmm4, dqa)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
671 "add %5, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
672 "add %5, %1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
673 "add %4, %2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
674 "decl %3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
675 "jg 1b \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
676 : "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
677 : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
678 "m"(ff_pw_5), "m"(ff_pw_16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
679 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
680 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
681 } |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
682 #else // ARCH_X86_64 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
683 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
684 static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
685 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
686 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
687 src += 8*dstStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
688 dst += 8*dstStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
689 src2 += 8*src2Stride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
690 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
691 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
692 } |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
693 #endif // ARCH_X86_64 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
694 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
695 #define QPEL_H264_H_XMM(OPNAME, OP, MMX)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
696 static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
697 int h=8;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
698 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
699 "pxor %%xmm7, %%xmm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
700 "movdqa %0, %%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
701 :: "m"(ff_pw_5)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
702 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
703 do{\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
704 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
705 "lddqu -2(%0), %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
706 "movdqa %%xmm1, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
707 "punpckhbw %%xmm7, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
708 "punpcklbw %%xmm7, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
709 "movdqa %%xmm1, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
710 "movdqa %%xmm1, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
711 "movdqa %%xmm1, %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
712 "movdqa %%xmm1, %%xmm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
713 "palignr $2, %%xmm0, %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
714 "palignr $4, %%xmm0, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
715 "palignr $6, %%xmm0, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
716 "palignr $8, %%xmm0, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
717 "palignr $10,%%xmm0, %%xmm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
718 "paddw %%xmm5, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
719 "paddw %%xmm3, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
720 "paddw %%xmm4, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
721 "psllw $2, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
722 "movq (%2), %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
723 "psubw %%xmm1, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
724 "paddw %5, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
725 "pmullw %%xmm6, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
726 "paddw %%xmm0, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
727 "psraw $5, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
728 "packuswb %%xmm2, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
729 "pavgb %%xmm3, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
730 OP(%%xmm2, (%1), %%xmm4, q)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
731 "add %4, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
732 "add %4, %1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
733 "add %3, %2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
734 : "+a"(src), "+c"(dst), "+d"(src2)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
735 : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
736 "m"(ff_pw_16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
737 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
738 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
739 }while(--h);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
740 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
741 QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
742 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
743 static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
744 int h=8;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
745 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
746 "pxor %%xmm7, %%xmm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
747 "movdqa "MANGLE(ff_pw_5)", %%xmm6\n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
748 "1: \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
749 "lddqu -2(%0), %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
750 "movdqa %%xmm1, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
751 "punpckhbw %%xmm7, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
752 "punpcklbw %%xmm7, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
753 "movdqa %%xmm1, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
754 "movdqa %%xmm1, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
755 "movdqa %%xmm1, %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
756 "movdqa %%xmm1, %%xmm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
757 "palignr $2, %%xmm0, %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
758 "palignr $4, %%xmm0, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
759 "palignr $6, %%xmm0, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
760 "palignr $8, %%xmm0, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
761 "palignr $10,%%xmm0, %%xmm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
762 "paddw %%xmm5, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
763 "paddw %%xmm3, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
764 "paddw %%xmm4, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
765 "psllw $2, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
766 "psubw %%xmm1, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
767 "paddw "MANGLE(ff_pw_16)", %%xmm0\n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
768 "pmullw %%xmm6, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
769 "paddw %%xmm0, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
770 "psraw $5, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
771 "packuswb %%xmm2, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
772 OP(%%xmm2, (%1), %%xmm4, q)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
773 "add %3, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
774 "add %4, %1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
775 "decl %2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
776 " jnz 1b \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
777 : "+a"(src), "+c"(dst), "+g"(h)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
778 : "D"((x86_reg)srcStride), "S"((x86_reg)dstStride)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
779 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
780 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
781 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
782 static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
783 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
784 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
785 src += 8*srcStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
786 dst += 8*dstStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
787 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
788 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
789 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
790 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
791 #define QPEL_H264_V_XMM(OPNAME, OP, MMX)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
792 static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
793 src -= 2*srcStride;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
794 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
795 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
796 "pxor %%xmm7, %%xmm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
797 "movq (%0), %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
798 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
799 "movq (%0), %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
800 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
801 "movq (%0), %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
802 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
803 "movq (%0), %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
804 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
805 "movq (%0), %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
806 "add %2, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
807 "punpcklbw %%xmm7, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
808 "punpcklbw %%xmm7, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
809 "punpcklbw %%xmm7, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
810 "punpcklbw %%xmm7, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
811 "punpcklbw %%xmm7, %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
812 QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
813 QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
814 QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
815 QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
816 QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
817 QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
818 QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
819 QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
820 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
821 : "+a"(src), "+c"(dst)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
822 : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
823 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
824 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
825 if(h==16){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
826 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
827 QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
828 QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
829 QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
830 QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
831 QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
832 QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
833 QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
834 QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
835 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
836 : "+a"(src), "+c"(dst)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
837 : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
838 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
839 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
840 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
841 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
842 static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
843 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
844 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
845 static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
846 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
847 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
848 } |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
849 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
850 static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
851 int w = (size+8)>>3; |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
852 src -= 2*srcStride+2; |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
853 while(w--){ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
854 __asm__ volatile( |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
855 "pxor %%xmm7, %%xmm7 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
856 "movq (%0), %%xmm0 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
857 "add %2, %0 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
858 "movq (%0), %%xmm1 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
859 "add %2, %0 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
860 "movq (%0), %%xmm2 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
861 "add %2, %0 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
862 "movq (%0), %%xmm3 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
863 "add %2, %0 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
864 "movq (%0), %%xmm4 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
865 "add %2, %0 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
866 "punpcklbw %%xmm7, %%xmm0 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
867 "punpcklbw %%xmm7, %%xmm1 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
868 "punpcklbw %%xmm7, %%xmm2 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
869 "punpcklbw %%xmm7, %%xmm3 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
870 "punpcklbw %%xmm7, %%xmm4 \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
871 QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 0*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
872 QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 1*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
873 QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 2*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
874 QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 3*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
875 QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 4*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
876 QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 5*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
877 QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 6*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
878 QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 7*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
879 : "+a"(src) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
880 : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
881 : "memory" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
882 ); |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
883 if(size==16){ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
884 __asm__ volatile( |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
885 QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 8*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
886 QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 9*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
887 QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 10*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
888 QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 11*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
889 QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 12*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
890 QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 13*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
891 QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 14*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
892 QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 15*48) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
893 : "+a"(src) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
894 : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
895 : "memory" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
896 ); |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
897 } |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
898 tmp += 8; |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
899 src += 8 - (size+5)*srcStride; |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
900 } |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
901 } |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
902 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
903 #define QPEL_H264_HV2_XMM(OPNAME, OP, MMX)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
904 static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
905 int h = size;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
906 if(size == 16){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
907 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
908 "1: \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
909 "movdqa 32(%0), %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
910 "movdqa 16(%0), %%xmm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
911 "movdqa (%0), %%xmm7 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
912 "movdqa %%xmm4, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
913 "movdqa %%xmm4, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
914 "movdqa %%xmm4, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
915 "movdqa %%xmm4, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
916 "palignr $10, %%xmm5, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
917 "palignr $8, %%xmm5, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
918 "palignr $6, %%xmm5, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
919 "palignr $4, %%xmm5, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
920 "palignr $2, %%xmm5, %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
921 "paddw %%xmm5, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
922 "paddw %%xmm4, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
923 "paddw %%xmm3, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
924 "movdqa %%xmm5, %%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
925 "movdqa %%xmm5, %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
926 "movdqa %%xmm5, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
927 "palignr $8, %%xmm7, %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
928 "palignr $2, %%xmm7, %%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
929 "palignr $10, %%xmm7, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
930 "paddw %%xmm6, %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
931 "movdqa %%xmm5, %%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
932 "palignr $6, %%xmm7, %%xmm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
933 "palignr $4, %%xmm7, %%xmm6 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
934 "paddw %%xmm7, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
935 "paddw %%xmm6, %%xmm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
936 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
937 "psubw %%xmm1, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
938 "psubw %%xmm4, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
939 "psraw $2, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
940 "psraw $2, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
941 "psubw %%xmm1, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
942 "psubw %%xmm4, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
943 "paddw %%xmm2, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
944 "paddw %%xmm5, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
945 "psraw $2, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
946 "psraw $2, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
947 "paddw %%xmm2, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
948 "paddw %%xmm5, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
949 "psraw $6, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
950 "psraw $6, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
951 "packuswb %%xmm0, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
952 OP(%%xmm3, (%1), %%xmm7, dqa)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
953 "add $48, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
954 "add %3, %1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
955 "decl %2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
956 " jnz 1b \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
957 : "+a"(tmp), "+c"(dst), "+g"(h)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
958 : "S"((x86_reg)dstStride)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
959 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
960 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
961 }else{\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
962 __asm__ volatile(\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
963 "1: \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
964 "movdqa 16(%0), %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
965 "movdqa (%0), %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
966 "movdqa %%xmm1, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
967 "movdqa %%xmm1, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
968 "movdqa %%xmm1, %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
969 "movdqa %%xmm1, %%xmm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
970 "palignr $10, %%xmm0, %%xmm5 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
971 "palignr $8, %%xmm0, %%xmm4 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
972 "palignr $6, %%xmm0, %%xmm3 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
973 "palignr $4, %%xmm0, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
974 "palignr $2, %%xmm0, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
975 "paddw %%xmm5, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
976 "paddw %%xmm4, %%xmm1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
977 "paddw %%xmm3, %%xmm2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
978 "psubw %%xmm1, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
979 "psraw $2, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
980 "psubw %%xmm1, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
981 "paddw %%xmm2, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
982 "psraw $2, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
983 "paddw %%xmm2, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
984 "psraw $6, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
985 "packuswb %%xmm0, %%xmm0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
986 OP(%%xmm0, (%1), %%xmm7, q)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
987 "add $48, %0 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
988 "add %3, %1 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
989 "decl %2 \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
990 " jnz 1b \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
991 : "+a"(tmp), "+c"(dst), "+g"(h)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
992 : "S"((x86_reg)dstStride)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
993 : "memory"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
994 );\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
995 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
996 } |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
997 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
998 #define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
999 static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1000 put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1001 OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1002 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1003 static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1004 OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1005 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1006 static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1007 OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1008 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1009 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1010 #define put_pixels8_l2_sse2 put_pixels8_l2_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1011 #define avg_pixels8_l2_sse2 avg_pixels8_l2_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1012 #define put_pixels16_l2_sse2 put_pixels16_l2_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1013 #define avg_pixels16_l2_sse2 avg_pixels16_l2_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1014 #define put_pixels8_l2_ssse3 put_pixels8_l2_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1015 #define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1016 #define put_pixels16_l2_ssse3 put_pixels16_l2_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1017 #define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1018 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1019 #define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1020 #define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1021 #define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1022 #define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1023 #define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1024 #define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1025 #define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1026 #define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1027 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1028 #define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1029 #define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1030 #define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1031 #define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1032 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1033 #define put_h264_qpel8_v_lowpass_ssse3 put_h264_qpel8_v_lowpass_sse2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1034 #define avg_h264_qpel8_v_lowpass_ssse3 avg_h264_qpel8_v_lowpass_sse2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1035 #define put_h264_qpel16_v_lowpass_ssse3 put_h264_qpel16_v_lowpass_sse2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1036 #define avg_h264_qpel16_v_lowpass_ssse3 avg_h264_qpel16_v_lowpass_sse2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1037 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1038 #define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1039 #define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1040 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1041 #define H264_MC(OPNAME, SIZE, MMX, ALIGN) \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1042 H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1043 H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1044 H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1045 H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1046 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1047 static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1048 put_pixels16_sse2(dst, src, stride, 16); |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1049 } |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1050 static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1051 avg_pixels16_sse2(dst, src, stride, 16); |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1052 } |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1053 #define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1054 #define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmx2 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1055 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1056 #define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1057 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1058 OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1059 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1060 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1061 #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1062 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1063 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1064 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1065 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1066 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1067 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1068 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1069 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1070 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1071 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1072 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1073 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1074 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1075 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1076 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1077 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1078 OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1079 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1080 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1081 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1082 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1083 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1084 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1085 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1086 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1087 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1088 OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1089 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1090 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1091 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1092 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1093 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1094 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1095 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1096 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1097 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1098 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1099 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1100 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1101 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1102 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1103 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1104 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1105 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1106 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1107 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1108 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1109 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1110 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1111 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1112 put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1113 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1114 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1115 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1116 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1117 DECLARE_ALIGNED(ALIGN, uint16_t, temp)[SIZE*(SIZE<8?12:24)];\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1118 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1119 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1120 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1121 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1122 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1123 uint8_t * const halfHV= temp;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1124 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1125 assert(((int)temp & 7) == 0);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1126 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1127 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1128 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1129 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1130 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1131 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1132 uint8_t * const halfHV= temp;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1133 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1134 assert(((int)temp & 7) == 0);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1135 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1136 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1137 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1138 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1139 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1140 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1141 uint8_t * const halfHV= temp;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1142 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1143 assert(((int)temp & 7) == 0);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1144 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1145 OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1146 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1147 \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1148 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1149 DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1150 uint8_t * const halfHV= temp;\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1151 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1152 assert(((int)temp & 7) == 0);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1153 put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1154 OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1155 }\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1156 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1157 #define H264_MC_4816(MMX)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1158 H264_MC(put_, 4, MMX, 8)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1159 H264_MC(put_, 8, MMX, 8)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1160 H264_MC(put_, 16,MMX, 8)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1161 H264_MC(avg_, 4, MMX, 8)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1162 H264_MC(avg_, 8, MMX, 8)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1163 H264_MC(avg_, 16,MMX, 8)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1164 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1165 #define H264_MC_816(QPEL, XMM)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1166 QPEL(put_, 8, XMM, 16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1167 QPEL(put_, 16,XMM, 16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1168 QPEL(avg_, 8, XMM, 16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1169 QPEL(avg_, 16,XMM, 16)\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1170 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1171 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1172 #define AVG_3DNOW_OP(a,b,temp, size) \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1173 "mov" #size " " #b ", " #temp " \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1174 "pavgusb " #temp ", " #a " \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1175 "mov" #size " " #a ", " #b " \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1176 #define AVG_MMX2_OP(a,b,temp, size) \ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1177 "mov" #size " " #b ", " #temp " \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1178 "pavgb " #temp ", " #a " \n\t"\ |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1179 "mov" #size " " #a ", " #b " \n\t" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1180 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1181 #define PAVGB "pavgusb" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1182 QPEL_H264(put_, PUT_OP, 3dnow) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1183 QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1184 #undef PAVGB |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1185 #define PAVGB "pavgb" |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1186 QPEL_H264(put_, PUT_OP, mmx2) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1187 QPEL_H264(avg_, AVG_MMX2_OP, mmx2) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1188 QPEL_H264_V_XMM(put_, PUT_OP, sse2) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1189 QPEL_H264_V_XMM(avg_, AVG_MMX2_OP, sse2) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1190 QPEL_H264_HV_XMM(put_, PUT_OP, sse2) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1191 QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, sse2) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1192 #if HAVE_SSSE3 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1193 QPEL_H264_H_XMM(put_, PUT_OP, ssse3) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1194 QPEL_H264_H_XMM(avg_, AVG_MMX2_OP, ssse3) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1195 QPEL_H264_HV2_XMM(put_, PUT_OP, ssse3) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1196 QPEL_H264_HV2_XMM(avg_, AVG_MMX2_OP, ssse3) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1197 QPEL_H264_HV_XMM(put_, PUT_OP, ssse3) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1198 QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, ssse3) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1199 #endif |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1200 #undef PAVGB |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1201 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1202 H264_MC_4816(3dnow) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1203 H264_MC_4816(mmx2) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1204 H264_MC_816(H264_MC_V, sse2) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1205 H264_MC_816(H264_MC_HV, sse2) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1206 #if HAVE_SSSE3 |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1207 H264_MC_816(H264_MC_H, ssse3) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1208 H264_MC_816(H264_MC_HV, ssse3) |
3941687b4fa9
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents:
diff
changeset
|
1209 #endif |