annotate i386/dsputil_mmx.h @ 7922:ed0ebbb168b6 libavcodec

Do not use the generic "alloc missing references" code for h.264 as it does not work correctly in that case. Fixes issue652.
author michael
date Thu, 25 Sep 2008 14:34:14 +0000
parents c4a4495715dd
children eebc7209c47f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5946
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
1 /*
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
2 * MMX optimized DSP utils
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
3 * Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
4 *
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
5 * This file is part of FFmpeg.
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
6 *
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
11 *
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
15 * Lesser General Public License for more details.
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
16 *
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
20 */
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
21
7760
c4a4495715dd Globally rename the header inclusion guard names.
stefano
parents: 7741
diff changeset
22 #ifndef AVCODEC_I386_DSPUTIL_MMX_H
c4a4495715dd Globally rename the header inclusion guard names.
stefano
parents: 7741
diff changeset
23 #define AVCODEC_I386_DSPUTIL_MMX_H
5946
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
24
6013
af8951e74098 add required include to make this file self-contained
aurel
parents: 5952
diff changeset
25 #include <stdint.h>
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 6557
diff changeset
26 #include "libavcodec/dsputil.h"
6013
af8951e74098 add required include to make this file self-contained
aurel
parents: 5952
diff changeset
27
6329
5969caa9190d clean up an ugliness introduced in r11826. this syntax will require fewer changes when adding future sse2 code.
lorenm
parents: 6320
diff changeset
28 typedef struct { uint64_t a, b; } xmm_t;
5969caa9190d clean up an ugliness introduced in r11826. this syntax will require fewer changes when adding future sse2 code.
lorenm
parents: 6320
diff changeset
29
5947
37a03989871b use ff_ prefix for extern vars
aurel
parents: 5946
diff changeset
30 extern const uint64_t ff_bone;
37a03989871b use ff_ prefix for extern vars
aurel
parents: 5946
diff changeset
31 extern const uint64_t ff_wtwo;
5946
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
32
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
33 extern const uint64_t ff_pdw_80000000[2];
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
34
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
35 extern const uint64_t ff_pw_3;
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
36 extern const uint64_t ff_pw_4;
6331
c57670e07668 ssse3 h264 motion compensation.
lorenm
parents: 6329
diff changeset
37 extern const xmm_t ff_pw_5;
7741
dbb5ab337349 Let ff_pw_8 be used as an SSE constant
conrad
parents: 7220
diff changeset
38 extern const xmm_t ff_pw_8;
5946
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
39 extern const uint64_t ff_pw_15;
6331
c57670e07668 ssse3 h264 motion compensation.
lorenm
parents: 6329
diff changeset
40 extern const xmm_t ff_pw_16;
5946
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
41 extern const uint64_t ff_pw_20;
6557
e1208c4f8898 h264 chroma mc ssse3
lorenm
parents: 6403
diff changeset
42 extern const xmm_t ff_pw_28;
6329
5969caa9190d clean up an ugliness introduced in r11826. this syntax will require fewer changes when adding future sse2 code.
lorenm
parents: 6320
diff changeset
43 extern const xmm_t ff_pw_32;
5946
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
44 extern const uint64_t ff_pw_42;
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
45 extern const uint64_t ff_pw_64;
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
46 extern const uint64_t ff_pw_96;
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
47 extern const uint64_t ff_pw_128;
6403
9a736918fd90 split encoding part of dsputil_mmx into its own file
aurel
parents: 6331
diff changeset
48 extern const uint64_t ff_pw_255;
5946
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
49
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
50 extern const uint64_t ff_pb_1;
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
51 extern const uint64_t ff_pb_3;
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
52 extern const uint64_t ff_pb_7;
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
53 extern const uint64_t ff_pb_3F;
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
54 extern const uint64_t ff_pb_A1;
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
55 extern const uint64_t ff_pb_FC;
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
56
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
57 extern const double ff_pd_1[2];
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
58 extern const double ff_pd_2[2];
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents:
diff changeset
59
7220
a94b2cf78a2e Make LOAD4/STORE4 macros more generic.
benoit
parents: 6763
diff changeset
60 #define LOAD4(stride,in,a,b,c,d)\
a94b2cf78a2e Make LOAD4/STORE4 macros more generic.
benoit
parents: 6763
diff changeset
61 "movq 0*"#stride"+"#in", "#a"\n\t"\
a94b2cf78a2e Make LOAD4/STORE4 macros more generic.
benoit
parents: 6763
diff changeset
62 "movq 1*"#stride"+"#in", "#b"\n\t"\
a94b2cf78a2e Make LOAD4/STORE4 macros more generic.
benoit
parents: 6763
diff changeset
63 "movq 2*"#stride"+"#in", "#c"\n\t"\
a94b2cf78a2e Make LOAD4/STORE4 macros more generic.
benoit
parents: 6763
diff changeset
64 "movq 3*"#stride"+"#in", "#d"\n\t"
a94b2cf78a2e Make LOAD4/STORE4 macros more generic.
benoit
parents: 6763
diff changeset
65
a94b2cf78a2e Make LOAD4/STORE4 macros more generic.
benoit
parents: 6763
diff changeset
66 #define STORE4(stride,out,a,b,c,d)\
a94b2cf78a2e Make LOAD4/STORE4 macros more generic.
benoit
parents: 6763
diff changeset
67 "movq "#a", 0*"#stride"+"#out"\n\t"\
a94b2cf78a2e Make LOAD4/STORE4 macros more generic.
benoit
parents: 6763
diff changeset
68 "movq "#b", 1*"#stride"+"#out"\n\t"\
a94b2cf78a2e Make LOAD4/STORE4 macros more generic.
benoit
parents: 6763
diff changeset
69 "movq "#c", 2*"#stride"+"#out"\n\t"\
a94b2cf78a2e Make LOAD4/STORE4 macros more generic.
benoit
parents: 6763
diff changeset
70 "movq "#d", 3*"#stride"+"#out"\n\t"
a94b2cf78a2e Make LOAD4/STORE4 macros more generic.
benoit
parents: 6763
diff changeset
71
6135
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
72 /* in/out: mma=mma+mmb, mmb=mmb-mma */
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
73 #define SUMSUB_BA( a, b ) \
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
74 "paddw "#b", "#a" \n\t"\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
75 "paddw "#b", "#b" \n\t"\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
76 "psubw "#a", "#b" \n\t"
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
77
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
78 #define SBUTTERFLY(a,b,t,n,m)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
79 "mov" #m " " #a ", " #t " \n\t" /* abcd */\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
80 "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
81 "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
82
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
83 #define TRANSPOSE4(a,b,c,d,t)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
84 SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
85 SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
86 SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
87 SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
88
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
89 #ifdef ARCH_X86_64
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
90 // permutes 01234567 -> 05736421
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
91 #define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
92 SBUTTERFLY(a,b,%%xmm8,wd,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
93 SBUTTERFLY(c,d,b,wd,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
94 SBUTTERFLY(e,f,d,wd,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
95 SBUTTERFLY(g,h,f,wd,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
96 SBUTTERFLY(a,c,h,dq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
97 SBUTTERFLY(%%xmm8,b,c,dq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
98 SBUTTERFLY(e,g,b,dq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
99 SBUTTERFLY(d,f,g,dq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
100 SBUTTERFLY(a,e,f,qdq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
101 SBUTTERFLY(%%xmm8,d,e,qdq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
102 SBUTTERFLY(h,b,d,qdq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
103 SBUTTERFLY(c,g,b,qdq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
104 "movdqa %%xmm8, "#g" \n\t"
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
105 #else
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
106 #define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
107 "movdqa "#h", "#t" \n\t"\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
108 SBUTTERFLY(a,b,h,wd,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
109 "movdqa "#h", 16"#t" \n\t"\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
110 "movdqa "#t", "#h" \n\t"\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
111 SBUTTERFLY(c,d,b,wd,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
112 SBUTTERFLY(e,f,d,wd,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
113 SBUTTERFLY(g,h,f,wd,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
114 SBUTTERFLY(a,c,h,dq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
115 "movdqa "#h", "#t" \n\t"\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
116 "movdqa 16"#t", "#h" \n\t"\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
117 SBUTTERFLY(h,b,c,dq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
118 SBUTTERFLY(e,g,b,dq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
119 SBUTTERFLY(d,f,g,dq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
120 SBUTTERFLY(a,e,f,qdq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
121 SBUTTERFLY(h,d,e,qdq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
122 "movdqa "#h", 16"#t" \n\t"\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
123 "movdqa "#t", "#h" \n\t"\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
124 SBUTTERFLY(h,b,d,qdq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
125 SBUTTERFLY(c,g,b,qdq,dqa)\
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
126 "movdqa 16"#t", "#g" \n\t"
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
127 #endif
1c67967d893b Factorize some duplicated code from CAVS and H.264 into a common file.
diego
parents: 6013
diff changeset
128
6403
9a736918fd90 split encoding part of dsputil_mmx into its own file
aurel
parents: 6331
diff changeset
129 #define MOVQ_WONE(regd) \
9a736918fd90 split encoding part of dsputil_mmx into its own file
aurel
parents: 6331
diff changeset
130 asm volatile ( \
9a736918fd90 split encoding part of dsputil_mmx into its own file
aurel
parents: 6331
diff changeset
131 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
9a736918fd90 split encoding part of dsputil_mmx into its own file
aurel
parents: 6331
diff changeset
132 "psrlw $15, %%" #regd ::)
9a736918fd90 split encoding part of dsputil_mmx into its own file
aurel
parents: 6331
diff changeset
133
9a736918fd90 split encoding part of dsputil_mmx into its own file
aurel
parents: 6331
diff changeset
134 void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
9a736918fd90 split encoding part of dsputil_mmx into its own file
aurel
parents: 6331
diff changeset
135
7760
c4a4495715dd Globally rename the header inclusion guard names.
stefano
parents: 7741
diff changeset
136 #endif /* AVCODEC_I386_DSPUTIL_MMX_H */