Mercurial > libavcodec.hg
annotate x86/h264_weight.asm @ 12485:b42e02e9bf2b libavcodec
Move AMR-NB frame unpacking code to a common file so it can be reused in
the AMR-WB decoder.
Patch by Marcelo Galvo Pvoa.
author | vitor |
---|---|
date | Fri, 10 Sep 2010 19:51:08 +0000 |
parents | 2982071047a2 |
children |
rev | line source |
---|---|
12451
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
1 ;***************************************************************************** |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
2 ;* SSE2-optimized weighted prediction code |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
3 ;***************************************************************************** |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
4 ;* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
5 ;* Copyright (C) 2010 Eli Friedman <eli.friedman@gmail.com> |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
6 ;* |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
7 ;* This file is part of FFmpeg. |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
8 ;* |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
9 ;* FFmpeg is free software; you can redistribute it and/or |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
10 ;* modify it under the terms of the GNU Lesser General Public |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
11 ;* License as published by the Free Software Foundation; either |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
12 ;* version 2.1 of the License, or (at your option) any later version. |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
13 ;* |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
14 ;* FFmpeg is distributed in the hope that it will be useful, |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
17 ;* Lesser General Public License for more details. |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
18 ;* |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
19 ;* You should have received a copy of the GNU Lesser General Public |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
20 ;* License along with FFmpeg; if not, write to the Free Software |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
21 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
22 ;****************************************************************************** |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
23 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
24 %include "x86inc.asm" |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
25 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
26 SECTION .text |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
27 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
28 ;----------------------------------------------------------------------------- |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
29 ; biweight pred: |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
30 ; |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
31 ; void h264_biweight_16x16_sse2(uint8_t *dst, uint8_t *src, int stride, |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
32 ; int log2_denom, int weightd, int weights, |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
33 ; int offset); |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
34 ; and |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
35 ; void h264_weight_16x16_sse2(uint8_t *dst, int stride, |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
36 ; int log2_denom, int weight, |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
37 ; int offset); |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
38 ;----------------------------------------------------------------------------- |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
39 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
40 %macro WEIGHT_SETUP 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
41 add r4, r4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
42 inc r4 |
12457
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12453
diff
changeset
|
43 movd m3, r3d |
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12453
diff
changeset
|
44 movd m5, r4d |
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12453
diff
changeset
|
45 movd m6, r2d |
12451
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
46 pslld m5, m6 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
47 psrld m5, 1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
48 %if mmsize == 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
49 pshuflw m3, m3, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
50 pshuflw m5, m5, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
51 punpcklqdq m3, m3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
52 punpcklqdq m5, m5 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
53 %else |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
54 pshufw m3, m3, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
55 pshufw m5, m5, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
56 %endif |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
57 pxor m7, m7 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
58 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
59 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
60 %macro WEIGHT_OP 2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
61 movh m0, [r0+%1] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
62 movh m1, [r0+%2] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
63 punpcklbw m0, m7 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
64 punpcklbw m1, m7 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
65 pmullw m0, m3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
66 pmullw m1, m3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
67 paddsw m0, m5 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
68 paddsw m1, m5 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
69 psraw m0, m6 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
70 psraw m1, m6 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
71 packuswb m0, m1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
72 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
73 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
74 %macro WEIGHT_FUNC_DBL_MM 1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
75 cglobal h264_weight_16x%1_mmx2, 5, 5, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
76 WEIGHT_SETUP |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
77 mov r2, %1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
78 %if %1 == 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
79 .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
80 WEIGHT_OP 0, 4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
81 mova [r0 ], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
82 WEIGHT_OP 8, 12 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
83 mova [r0+8], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
84 add r0, r1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
85 dec r2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
86 jnz .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
87 REP_RET |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
88 %else |
12452
2c28e7d75e9c
Unscrew breakage after my last commit because of symbol prefixes.
rbultje
parents:
12451
diff
changeset
|
89 jmp mangle(ff_h264_weight_16x16_mmx2.nextrow) |
12451
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
90 %endif |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
91 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
92 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
93 INIT_MMX |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
94 WEIGHT_FUNC_DBL_MM 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
95 WEIGHT_FUNC_DBL_MM 8 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
96 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
97 %macro WEIGHT_FUNC_MM 4 |
12453 | 98 cglobal h264_weight_%1x%2_%4, 7, 7, %3 |
12451
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
99 WEIGHT_SETUP |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
100 mov r2, %2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
101 %if %2 == 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
102 .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
103 WEIGHT_OP 0, mmsize/2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
104 mova [r0], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
105 add r0, r1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
106 dec r2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
107 jnz .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
108 REP_RET |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
109 %else |
12452
2c28e7d75e9c
Unscrew breakage after my last commit because of symbol prefixes.
rbultje
parents:
12451
diff
changeset
|
110 jmp mangle(ff_h264_weight_%1x16_%4.nextrow) |
12451
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
111 %endif |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
112 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
113 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
114 INIT_MMX |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
115 WEIGHT_FUNC_MM 8, 16, 0, mmx2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
116 WEIGHT_FUNC_MM 8, 8, 0, mmx2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
117 WEIGHT_FUNC_MM 8, 4, 0, mmx2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
118 INIT_XMM |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
119 WEIGHT_FUNC_MM 16, 16, 8, sse2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
120 WEIGHT_FUNC_MM 16, 8, 8, sse2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
121 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
122 %macro WEIGHT_FUNC_HALF_MM 5 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
123 cglobal h264_weight_%1x%2_%5, 5, 5, %4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
124 WEIGHT_SETUP |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
125 mov r2, %2/2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
126 lea r3, [r1*2] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
127 %if %2 == mmsize |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
128 .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
129 WEIGHT_OP 0, r1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
130 movh [r0], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
131 %if mmsize == 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
132 movhps [r0+r1], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
133 %else |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
134 psrlq m0, 32 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
135 movh [r0+r1], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
136 %endif |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
137 add r0, r3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
138 dec r2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
139 jnz .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
140 REP_RET |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
141 %else |
12452
2c28e7d75e9c
Unscrew breakage after my last commit because of symbol prefixes.
rbultje
parents:
12451
diff
changeset
|
142 jmp mangle(ff_h264_weight_%1x%3_%5.nextrow) |
12451
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
143 %endif |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
144 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
145 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
146 INIT_MMX |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
147 WEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
148 WEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
149 WEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
150 INIT_XMM |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
151 WEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
152 WEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
153 WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
154 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
155 %macro BIWEIGHT_SETUP 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
156 add r6, 1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
157 or r6, 1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
158 add r3, 1 |
12457
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12453
diff
changeset
|
159 movd m3, r4d |
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12453
diff
changeset
|
160 movd m4, r5d |
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12453
diff
changeset
|
161 movd m5, r6d |
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12453
diff
changeset
|
162 movd m6, r3d |
12451
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
163 pslld m5, m6 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
164 psrld m5, 1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
165 %if mmsize == 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
166 pshuflw m3, m3, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
167 pshuflw m4, m4, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
168 pshuflw m5, m5, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
169 punpcklqdq m3, m3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
170 punpcklqdq m4, m4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
171 punpcklqdq m5, m5 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
172 %else |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
173 pshufw m3, m3, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
174 pshufw m4, m4, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
175 pshufw m5, m5, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
176 %endif |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
177 pxor m7, m7 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
178 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
179 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
180 %macro BIWEIGHT_STEPA 3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
181 movh m%1, [r0+%3] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
182 movh m%2, [r1+%3] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
183 punpcklbw m%1, m7 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
184 punpcklbw m%2, m7 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
185 pmullw m%1, m3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
186 pmullw m%2, m4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
187 paddsw m%1, m%2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
188 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
189 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
190 %macro BIWEIGHT_STEPB 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
191 paddsw m0, m5 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
192 paddsw m1, m5 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
193 psraw m0, m6 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
194 psraw m1, m6 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
195 packuswb m0, m1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
196 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
197 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
198 %macro BIWEIGHT_FUNC_DBL_MM 1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
199 cglobal h264_biweight_16x%1_mmx2, 7, 7, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
200 BIWEIGHT_SETUP |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
201 mov r3, %1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
202 %if %1 == 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
203 .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
204 BIWEIGHT_STEPA 0, 1, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
205 BIWEIGHT_STEPA 1, 2, 4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
206 BIWEIGHT_STEPB |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
207 mova [r0], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
208 BIWEIGHT_STEPA 0, 1, 8 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
209 BIWEIGHT_STEPA 1, 2, 12 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
210 BIWEIGHT_STEPB |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
211 mova [r0+8], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
212 add r0, r2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
213 add r1, r2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
214 dec r3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
215 jnz .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
216 REP_RET |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
217 %else |
12452
2c28e7d75e9c
Unscrew breakage after my last commit because of symbol prefixes.
rbultje
parents:
12451
diff
changeset
|
218 jmp mangle(ff_h264_biweight_16x16_mmx2.nextrow) |
12451
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
219 %endif |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
220 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
221 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
222 INIT_MMX |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
223 BIWEIGHT_FUNC_DBL_MM 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
224 BIWEIGHT_FUNC_DBL_MM 8 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
225 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
226 %macro BIWEIGHT_FUNC_MM 4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
227 cglobal h264_biweight_%1x%2_%4, 7, 7, %3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
228 BIWEIGHT_SETUP |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
229 mov r3, %2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
230 %if %2 == 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
231 .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
232 BIWEIGHT_STEPA 0, 1, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
233 BIWEIGHT_STEPA 1, 2, mmsize/2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
234 BIWEIGHT_STEPB |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
235 mova [r0], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
236 add r0, r2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
237 add r1, r2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
238 dec r3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
239 jnz .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
240 REP_RET |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
241 %else |
12452
2c28e7d75e9c
Unscrew breakage after my last commit because of symbol prefixes.
rbultje
parents:
12451
diff
changeset
|
242 jmp mangle(ff_h264_biweight_%1x16_%4.nextrow) |
12451
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
243 %endif |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
244 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
245 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
246 INIT_MMX |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
247 BIWEIGHT_FUNC_MM 8, 16, 0, mmx2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
248 BIWEIGHT_FUNC_MM 8, 8, 0, mmx2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
249 BIWEIGHT_FUNC_MM 8, 4, 0, mmx2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
250 INIT_XMM |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
251 BIWEIGHT_FUNC_MM 16, 16, 8, sse2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
252 BIWEIGHT_FUNC_MM 16, 8, 8, sse2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
253 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
254 %macro BIWEIGHT_FUNC_HALF_MM 5 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
255 cglobal h264_biweight_%1x%2_%5, 7, 7, %4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
256 BIWEIGHT_SETUP |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
257 mov r3, %2/2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
258 lea r4, [r2*2] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
259 %if %2 == mmsize |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
260 .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
261 BIWEIGHT_STEPA 0, 1, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
262 BIWEIGHT_STEPA 1, 2, r2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
263 BIWEIGHT_STEPB |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
264 movh [r0], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
265 %if mmsize == 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
266 movhps [r0+r2], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
267 %else |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
268 psrlq m0, 32 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
269 movh [r0+r2], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
270 %endif |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
271 add r0, r4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
272 add r1, r4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
273 dec r3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
274 jnz .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
275 REP_RET |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
276 %else |
12452
2c28e7d75e9c
Unscrew breakage after my last commit because of symbol prefixes.
rbultje
parents:
12451
diff
changeset
|
277 jmp mangle(ff_h264_biweight_%1x%3_%5.nextrow) |
12451
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
278 %endif |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
279 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
280 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
281 INIT_MMX |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
282 BIWEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
283 BIWEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
284 BIWEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
285 INIT_XMM |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
286 BIWEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
287 BIWEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
288 BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
289 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
290 %macro BIWEIGHT_SSSE3_SETUP 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
291 add r6, 1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
292 or r6, 1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
293 add r3, 1 |
12457
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12453
diff
changeset
|
294 movd m4, r4d |
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12453
diff
changeset
|
295 movd m0, r5d |
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12453
diff
changeset
|
296 movd m5, r6d |
2982071047a2
Use "d" suffix for general-purpose registers used with movd.
reimar
parents:
12453
diff
changeset
|
297 movd m6, r3d |
12451
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
298 pslld m5, m6 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
299 psrld m5, 1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
300 punpcklbw m4, m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
301 pshuflw m4, m4, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
302 pshuflw m5, m5, 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
303 punpcklqdq m4, m4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
304 punpcklqdq m5, m5 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
305 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
306 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
307 %macro BIWEIGHT_SSSE3_OP 0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
308 pmaddubsw m0, m4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
309 pmaddubsw m2, m4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
310 paddsw m0, m5 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
311 paddsw m2, m5 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
312 psraw m0, m6 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
313 psraw m2, m6 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
314 packuswb m0, m2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
315 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
316 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
317 %macro BIWEIGHT_SSSE3_16 1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
318 cglobal h264_biweight_16x%1_ssse3, 7, 7, 8 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
319 BIWEIGHT_SSSE3_SETUP |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
320 mov r3, %1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
321 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
322 %if %1 == 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
323 .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
324 movh m0, [r0] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
325 movh m2, [r0+8] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
326 movh m3, [r1+8] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
327 punpcklbw m0, [r1] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
328 punpcklbw m2, m3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
329 BIWEIGHT_SSSE3_OP |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
330 mova [r0], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
331 add r0, r2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
332 add r1, r2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
333 dec r3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
334 jnz .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
335 REP_RET |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
336 %else |
12452
2c28e7d75e9c
Unscrew breakage after my last commit because of symbol prefixes.
rbultje
parents:
12451
diff
changeset
|
337 jmp mangle(ff_h264_biweight_16x16_ssse3.nextrow) |
12451
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
338 %endif |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
339 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
340 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
341 INIT_XMM |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
342 BIWEIGHT_SSSE3_16 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
343 BIWEIGHT_SSSE3_16 8 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
344 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
345 %macro BIWEIGHT_SSSE3_8 1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
346 cglobal h264_biweight_8x%1_ssse3, 7, 7, 8 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
347 BIWEIGHT_SSSE3_SETUP |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
348 mov r3, %1/2 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
349 lea r4, [r2*2] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
350 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
351 %if %1 == 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
352 .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
353 movh m0, [r0] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
354 movh m1, [r1] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
355 movh m2, [r0+r2] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
356 movh m3, [r1+r2] |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
357 punpcklbw m0, m1 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
358 punpcklbw m2, m3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
359 BIWEIGHT_SSSE3_OP |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
360 movh [r0], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
361 movhps [r0+r2], m0 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
362 add r0, r4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
363 add r1, r4 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
364 dec r3 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
365 jnz .nextrow |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
366 REP_RET |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
367 %else |
12452
2c28e7d75e9c
Unscrew breakage after my last commit because of symbol prefixes.
rbultje
parents:
12451
diff
changeset
|
368 jmp mangle(ff_h264_biweight_8x16_ssse3.nextrow) |
12451
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
369 %endif |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
370 %endmacro |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
371 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
372 INIT_XMM |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
373 BIWEIGHT_SSSE3_8 16 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
374 BIWEIGHT_SSSE3_8 8 |
4c3e6ff1237e
Rename h264_weight_sse2.asm to h264_weight.asm; add 16x8/8x16/8x4 non-square
rbultje
parents:
diff
changeset
|
375 BIWEIGHT_SSSE3_8 4 |