Mercurial > libavcodec.hg
comparison x86/h264_weight_sse2.asm @ 12367:06bdd447f4f7 libavcodec
Add file missing in r24702
author | darkshikari |
---|---|
date | Thu, 05 Aug 2010 00:49:48 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
12366:09a31ef6ed58 | 12367:06bdd447f4f7 |
---|---|
1 ;***************************************************************************** | |
2 ;* SSE2-optimized weighted prediction code | |
3 ;***************************************************************************** | |
4 ;* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt | |
5 ;* Copyright (C) 2010 Eli Friedman <eli.friedman@gmail.com> | |
6 ;* | |
7 ;* This file is part of FFmpeg. | |
8 ;* | |
9 ;* FFmpeg is free software; you can redistribute it and/or | |
10 ;* modify it under the terms of the GNU Lesser General Public | |
11 ;* License as published by the Free Software Foundation; either | |
12 ;* version 2.1 of the License, or (at your option) any later version. | |
13 ;* | |
14 ;* FFmpeg is distributed in the hope that it will be useful, | |
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 ;* Lesser General Public License for more details. | |
18 ;* | |
19 ;* You should have received a copy of the GNU Lesser General Public | |
20 ;* License along with FFmpeg; if not, write to the Free Software | |
21 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 ;****************************************************************************** | |
23 | |
24 %include "x86inc.asm" | |
25 | |
26 SECTION .text | |
27 INIT_XMM | |
28 | |
29 ;----------------------------------------------------------------------------- | |
30 ; biweight pred: | |
31 ; | |
32 ; void h264_biweight_16x16_sse2(uint8_t *dst, uint8_t *src, int stride, | |
33 ; int log2_denom, int weightd, int weights, | |
34 ; int offset); | |
35 ;----------------------------------------------------------------------------- | |
36 | |
37 %macro BIWEIGHT_SSE2_SETUP 0 | |
38 add r6, 1 | |
39 or r6, 1 | |
40 add r3, 1 | |
41 movd m3, r4 | |
42 movd m4, r5 | |
43 movd m5, r6 | |
44 movd m6, r3 | |
45 pslld m5, m6 | |
46 psrld m5, 1 | |
47 pshuflw m3, m3, 0 | |
48 pshuflw m4, m4, 0 | |
49 pshuflw m5, m5, 0 | |
50 punpcklqdq m3, m3 | |
51 punpcklqdq m4, m4 | |
52 punpcklqdq m5, m5 | |
53 pxor m7, m7 | |
54 %endmacro | |
55 | |
56 %macro BIWEIGHT_SSE2_STEPA 3 | |
57 movh m%1, [r0+%3] | |
58 movh m%2, [r1+%3] | |
59 punpcklbw m%1, m7 | |
60 punpcklbw m%2, m7 | |
61 pmullw m%1, m3 | |
62 pmullw m%2, m4 | |
63 paddsw m%1, m%2 | |
64 %endmacro | |
65 | |
66 %macro BIWEIGHT_SSE2_STEPB 0 | |
67 paddsw m0, m5 | |
68 paddsw m1, m5 | |
69 psraw m0, m6 | |
70 psraw m1, m6 | |
71 packuswb m0, m1 | |
72 %endmacro | |
73 | |
74 cglobal h264_biweight_16x16_sse2, 7, 7, 8 | |
75 BIWEIGHT_SSE2_SETUP | |
76 mov r3, 16 | |
77 | |
78 .nextrow | |
79 BIWEIGHT_SSE2_STEPA 0, 1, 0 | |
80 BIWEIGHT_SSE2_STEPA 1, 2, 8 | |
81 BIWEIGHT_SSE2_STEPB | |
82 mova [r0], m0 | |
83 add r0, r2 | |
84 add r1, r2 | |
85 dec r3 | |
86 jnz .nextrow | |
87 REP_RET | |
88 | |
89 cglobal h264_biweight_8x8_sse2, 7, 7, 8 | |
90 BIWEIGHT_SSE2_SETUP | |
91 mov r3, 4 | |
92 lea r4, [r2*2] | |
93 | |
94 .nextrow | |
95 BIWEIGHT_SSE2_STEPA 0, 1, 0 | |
96 BIWEIGHT_SSE2_STEPA 1, 2, r2 | |
97 BIWEIGHT_SSE2_STEPB | |
98 movh [r0], m0 | |
99 movhps [r0+r2], m0 | |
100 add r0, r4 | |
101 add r1, r4 | |
102 dec r3 | |
103 jnz .nextrow | |
104 REP_RET | |
105 | |
106 %macro BIWEIGHT_SSSE3_SETUP 0 | |
107 add r6, 1 | |
108 or r6, 1 | |
109 add r3, 1 | |
110 movd m4, r4 | |
111 movd m0, r5 | |
112 movd m5, r6 | |
113 movd m6, r3 | |
114 pslld m5, m6 | |
115 psrld m5, 1 | |
116 punpcklbw m4, m0 | |
117 pshuflw m4, m4, 0 | |
118 pshuflw m5, m5, 0 | |
119 punpcklqdq m4, m4 | |
120 punpcklqdq m5, m5 | |
121 %endmacro | |
122 | |
123 %macro BIWEIGHT_SSSE3_OP 0 | |
124 pmaddubsw m0, m4 | |
125 pmaddubsw m2, m4 | |
126 paddsw m0, m5 | |
127 paddsw m2, m5 | |
128 psraw m0, m6 | |
129 psraw m2, m6 | |
130 packuswb m0, m2 | |
131 %endmacro | |
132 | |
133 cglobal h264_biweight_16x16_ssse3, 7, 7, 8 | |
134 BIWEIGHT_SSSE3_SETUP | |
135 mov r3, 16 | |
136 | |
137 .nextrow | |
138 movh m0, [r0] | |
139 movh m2, [r0+8] | |
140 movh m3, [r1+8] | |
141 punpcklbw m0, [r1] | |
142 punpcklbw m2, m3 | |
143 BIWEIGHT_SSSE3_OP | |
144 mova [r0], m0 | |
145 add r0, r2 | |
146 add r1, r2 | |
147 dec r3 | |
148 jnz .nextrow | |
149 REP_RET | |
150 | |
151 cglobal h264_biweight_8x8_ssse3, 7, 7, 8 | |
152 BIWEIGHT_SSSE3_SETUP | |
153 mov r3, 4 | |
154 lea r4, [r2*2] | |
155 | |
156 .nextrow | |
157 movh m0, [r0] | |
158 movh m1, [r1] | |
159 movh m2, [r0+r2] | |
160 movh m3, [r1+r2] | |
161 punpcklbw m0, m1 | |
162 punpcklbw m2, m3 | |
163 BIWEIGHT_SSSE3_OP | |
164 movh [r0], m0 | |
165 movhps [r0+r2], m0 | |
166 add r0, r4 | |
167 add r1, r4 | |
168 dec r3 | |
169 jnz .nextrow | |
170 REP_RET |