annotate i386/mpegvideo_mmx.c @ 1903:c94b19667d8b libavcodec

1000l for iive
author michael
date Mon, 22 Mar 2004 20:54:04 +0000
parents e31754bc5b65
children f65d87bfdd5a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
1 /*
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
2 * The simplest mpeg encoder (well, it was the simplest!)
429
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
3 * Copyright (c) 2000,2001 Fabrice Bellard.
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
4 *
429
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
5 * This library is free software; you can redistribute it and/or
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
6 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
7 * License as published by the Free Software Foundation; either
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
8 * version 2 of the License, or (at your option) any later version.
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
9 *
429
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
10 * This library is distributed in the hope that it will be useful,
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
13 * Lesser General Public License for more details.
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
14 *
429
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
15 * You should have received a copy of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
16 * License along with this library; if not, write to the Free Software
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
18 *
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
19 * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru>
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
20 * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
21 */
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
22
14
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
23 #include "../dsputil.h"
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
24 #include "../mpegvideo.h"
220
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
25 #include "../avcodec.h"
14
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
26
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 949
diff changeset
27 extern uint8_t zigzag_direct_noperm[64];
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 949
diff changeset
28 extern uint16_t inv_zigzag_direct16[64];
200
6ab301aaa652 (commit by michael)
arpi_esp
parents: 153
diff changeset
29
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
30 static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
31 static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
32
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
33
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
34 static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
35 DCTELEM *block, int n, int qscale)
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
36 {
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
37 int level, qmul, qadd, nCoeffs;
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
38
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
39 qmul = qscale << 1;
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
40
1661
4c9fd29f1606 h263 slice structured mode
michael
parents: 1597
diff changeset
41 assert(s->block_last_index[n]>=0 || s->h263_aic);
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
42
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
43 if (!s->h263_aic) {
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
44 if (n < 4)
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
45 level = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
46 else
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
47 level = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
48 qadd = (qscale - 1) | 1;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
49 }else{
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
50 qadd = 0;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
51 level= block[0];
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
52 }
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
53 if(s->ac_pred)
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
54 nCoeffs=63;
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
55 else
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
56 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
200
6ab301aaa652 (commit by michael)
arpi_esp
parents: 153
diff changeset
57 //printf("%d %d ", qmul, qadd);
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
58 asm volatile(
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
59 "movd %1, %%mm6 \n\t" //qmul
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
60 "packssdw %%mm6, %%mm6 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
61 "packssdw %%mm6, %%mm6 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
62 "movd %2, %%mm5 \n\t" //qadd
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
63 "pxor %%mm7, %%mm7 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
64 "packssdw %%mm5, %%mm5 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
65 "packssdw %%mm5, %%mm5 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
66 "psubw %%mm5, %%mm7 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
67 "pxor %%mm4, %%mm4 \n\t"
153
acbd3bc999b3 Let loops will be aligned
nickols_k
parents: 145
diff changeset
68 ".balign 16\n\t"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
69 "1: \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
70 "movq (%0, %3), %%mm0 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
71 "movq 8(%0, %3), %%mm1 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
72
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
73 "pmullw %%mm6, %%mm0 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
74 "pmullw %%mm6, %%mm1 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
75
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
76 "movq (%0, %3), %%mm2 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
77 "movq 8(%0, %3), %%mm3 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
78
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
79 "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
80 "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
81
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
82 "pxor %%mm2, %%mm0 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
83 "pxor %%mm3, %%mm1 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
84
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
85 "paddw %%mm7, %%mm0 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
86 "paddw %%mm7, %%mm1 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
87
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
88 "pxor %%mm0, %%mm2 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
89 "pxor %%mm1, %%mm3 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
90
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
91 "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
92 "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
93
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
94 "pandn %%mm2, %%mm0 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
95 "pandn %%mm3, %%mm1 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
96
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
97 "movq %%mm0, (%0, %3) \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
98 "movq %%mm1, 8(%0, %3) \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
99
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
100 "addl $16, %3 \n\t"
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
101 "jng 1b \n\t"
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
102 ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
103 : "memory"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
104 );
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
105 block[0]= level;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
106 }
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
107
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
108
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
109 static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
110 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
111 {
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
112 int level, qmul, qadd, nCoeffs;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
113
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
114 qmul = qscale << 1;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
115 qadd = (qscale - 1) | 1;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
116
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
117 assert(s->block_last_index[n]>=0 || s->h263_aic);
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
118
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
119 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
120 //printf("%d %d ", qmul, qadd);
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
121 asm volatile(
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
122 "movd %1, %%mm6 \n\t" //qmul
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
123 "packssdw %%mm6, %%mm6 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
124 "packssdw %%mm6, %%mm6 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
125 "movd %2, %%mm5 \n\t" //qadd
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
126 "pxor %%mm7, %%mm7 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
127 "packssdw %%mm5, %%mm5 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
128 "packssdw %%mm5, %%mm5 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
129 "psubw %%mm5, %%mm7 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
130 "pxor %%mm4, %%mm4 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
131 ".balign 16\n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
132 "1: \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
133 "movq (%0, %3), %%mm0 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
134 "movq 8(%0, %3), %%mm1 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
135
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
136 "pmullw %%mm6, %%mm0 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
137 "pmullw %%mm6, %%mm1 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
138
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
139 "movq (%0, %3), %%mm2 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
140 "movq 8(%0, %3), %%mm3 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
141
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
142 "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
143 "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
144
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
145 "pxor %%mm2, %%mm0 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
146 "pxor %%mm3, %%mm1 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
147
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
148 "paddw %%mm7, %%mm0 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
149 "paddw %%mm7, %%mm1 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
150
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
151 "pxor %%mm0, %%mm2 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
152 "pxor %%mm1, %%mm3 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
153
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
154 "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
155 "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
156
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
157 "pandn %%mm2, %%mm0 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
158 "pandn %%mm3, %%mm1 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
159
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
160 "movq %%mm0, (%0, %3) \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
161 "movq %%mm1, 8(%0, %3) \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
162
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
163 "addl $16, %3 \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
164 "jng 1b \n\t"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
165 ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
166 : "memory"
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
167 );
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
168 }
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
169
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
170
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
171 /*
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
172 NK:
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
173 Note: looking at PARANOID:
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
174 "enable all paranoid tests for rounding, overflows, etc..."
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
175
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
176 #ifdef PARANOID
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
177 if (level < -2048 || level > 2047)
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
178 fprintf(stderr, "unquant error %d %d\n", i, level);
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
179 #endif
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
180 We can suppose that result of two multiplications can't be greate of 0xFFFF
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
181 i.e. is 16-bit, so we use here only PMULLW instruction and can avoid
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
182 a complex multiplication.
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
183 =====================================================
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
184 Full formula for multiplication of 2 integer numbers
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
185 which are represent as high:low words:
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
186 input: value1 = high1:low1
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
187 value2 = high2:low2
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
188 output: value3 = value1*value2
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
189 value3=high3:low3 (on overflow: modulus 2^32 wrap-around)
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
190 this mean that for 0x123456 * 0x123456 correct result is 0x766cb0ce4
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
191 but this algorithm will compute only 0x66cb0ce4
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
192 this limited by 16-bit size of operands
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
193 ---------------------------------
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
194 tlow1 = high1*low2
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
195 tlow2 = high2*low1
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
196 tlow1 = tlow1 + tlow2
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
197 high3:low3 = low1*low2
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
198 high3 += tlow1
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
199 */
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
200 static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
14
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
201 DCTELEM *block, int n, int qscale)
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
202 {
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
203 int nCoeffs;
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 949
diff changeset
204 const uint16_t *quant_matrix;
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
205 int block0;
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
206
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
207 assert(s->block_last_index[n]>=0);
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
208
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
209 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
200
6ab301aaa652 (commit by michael)
arpi_esp
parents: 153
diff changeset
210
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
211 if (n < 4)
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
212 block0 = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
213 else
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
214 block0 = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
215 /* XXX: only mpeg1 */
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
216 quant_matrix = s->intra_matrix;
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
217 asm volatile(
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
218 "pcmpeqw %%mm7, %%mm7 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
219 "psrlw $15, %%mm7 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
220 "movd %2, %%mm6 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
221 "packssdw %%mm6, %%mm6 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
222 "packssdw %%mm6, %%mm6 \n\t"
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
223 "movl %3, %%eax \n\t"
153
acbd3bc999b3 Let loops will be aligned
nickols_k
parents: 145
diff changeset
224 ".balign 16\n\t"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
225 "1: \n\t"
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
226 "movq (%0, %%eax), %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
227 "movq 8(%0, %%eax), %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
228 "movq (%1, %%eax), %%mm4 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
229 "movq 8(%1, %%eax), %%mm5 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
230 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
231 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
232 "pxor %%mm2, %%mm2 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
233 "pxor %%mm3, %%mm3 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
234 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
235 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
236 "pxor %%mm2, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
237 "pxor %%mm3, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
238 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
239 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
240 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
241 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
242 "pxor %%mm4, %%mm4 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
243 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
244 "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
245 "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
246 "psraw $3, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
247 "psraw $3, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
248 "psubw %%mm7, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
249 "psubw %%mm7, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
250 "por %%mm7, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
251 "por %%mm7, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
252 "pxor %%mm2, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
253 "pxor %%mm3, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
254 "psubw %%mm2, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
255 "psubw %%mm3, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
256 "pandn %%mm0, %%mm4 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
257 "pandn %%mm1, %%mm5 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
258 "movq %%mm4, (%0, %%eax) \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
259 "movq %%mm5, 8(%0, %%eax) \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
260
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
261 "addl $16, %%eax \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
262 "js 1b \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
263 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
264 : "%eax", "memory"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
265 );
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
266 block[0]= block0;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
267 }
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
268
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
269 static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
270 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
271 {
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
272 int nCoeffs;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
273 const uint16_t *quant_matrix;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
274
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
275 assert(s->block_last_index[n]>=0);
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
276
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
277 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
278
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 325
diff changeset
279 quant_matrix = s->inter_matrix;
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
280 asm volatile(
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
281 "pcmpeqw %%mm7, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
282 "psrlw $15, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
283 "movd %2, %%mm6 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
284 "packssdw %%mm6, %%mm6 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
285 "packssdw %%mm6, %%mm6 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
286 "movl %3, %%eax \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
287 ".balign 16\n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
288 "1: \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
289 "movq (%0, %%eax), %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
290 "movq 8(%0, %%eax), %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
291 "movq (%1, %%eax), %%mm4 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
292 "movq 8(%1, %%eax), %%mm5 \n\t"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
293 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
294 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
295 "pxor %%mm2, %%mm2 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
296 "pxor %%mm3, %%mm3 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
297 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
298 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
299 "pxor %%mm2, %%mm0 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
300 "pxor %%mm3, %%mm1 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
301 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
302 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
303 "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
304 "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
305 "paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
306 "paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
307 "pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
308 "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
309 "pxor %%mm4, %%mm4 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
310 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
311 "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
312 "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
313 "psraw $4, %%mm0 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
314 "psraw $4, %%mm1 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
315 "psubw %%mm7, %%mm0 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
316 "psubw %%mm7, %%mm1 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
317 "por %%mm7, %%mm0 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
318 "por %%mm7, %%mm1 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
319 "pxor %%mm2, %%mm0 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
320 "pxor %%mm3, %%mm1 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
321 "psubw %%mm2, %%mm0 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
322 "psubw %%mm3, %%mm1 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
323 "pandn %%mm0, %%mm4 \n\t"
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
324 "pandn %%mm1, %%mm5 \n\t"
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
325 "movq %%mm4, (%0, %%eax) \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
326 "movq %%mm5, 8(%0, %%eax) \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
327
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
328 "addl $16, %%eax \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
329 "js 1b \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
330 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
331 : "%eax", "memory"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
332 );
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
333 }
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
334
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
335 static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
336 DCTELEM *block, int n, int qscale)
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
337 {
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
338 int nCoeffs;
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 949
diff changeset
339 const uint16_t *quant_matrix;
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
340 int block0;
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
341
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
342 assert(s->block_last_index[n]>=0);
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
343
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
344 if(s->alternate_scan) nCoeffs= 63; //FIXME
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
345 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
346
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
347 if (n < 4)
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
348 block0 = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
349 else
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
350 block0 = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
351 quant_matrix = s->intra_matrix;
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
352 asm volatile(
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
353 "pcmpeqw %%mm7, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
354 "psrlw $15, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
355 "movd %2, %%mm6 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
356 "packssdw %%mm6, %%mm6 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
357 "packssdw %%mm6, %%mm6 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
358 "movl %3, %%eax \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
359 ".balign 16\n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
360 "1: \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
361 "movq (%0, %%eax), %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
362 "movq 8(%0, %%eax), %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
363 "movq (%1, %%eax), %%mm4 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
364 "movq 8(%1, %%eax), %%mm5 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
365 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
366 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
367 "pxor %%mm2, %%mm2 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
368 "pxor %%mm3, %%mm3 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
369 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
370 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
371 "pxor %%mm2, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
372 "pxor %%mm3, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
373 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
374 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
375 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
376 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
377 "pxor %%mm4, %%mm4 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
378 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
379 "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
380 "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
381 "psraw $3, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
382 "psraw $3, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
383 "pxor %%mm2, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
384 "pxor %%mm3, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
385 "psubw %%mm2, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
386 "psubw %%mm3, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
387 "pandn %%mm0, %%mm4 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
388 "pandn %%mm1, %%mm5 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
389 "movq %%mm4, (%0, %%eax) \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
390 "movq %%mm5, 8(%0, %%eax) \n\t"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
391
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
392 "addl $16, %%eax \n\t"
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
393 "jng 1b \n\t"
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
394 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
395 : "%eax", "memory"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
396 );
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
397 block[0]= block0;
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
398 //Note, we dont do mismatch control for intra as errors cannot accumulate
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
399 }
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
400
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
401 static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
402 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
403 {
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
404 int nCoeffs;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
405 const uint16_t *quant_matrix;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
406
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
407 assert(s->block_last_index[n]>=0);
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
408
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
409 if(s->alternate_scan) nCoeffs= 63; //FIXME
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
410 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
411
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 325
diff changeset
412 quant_matrix = s->inter_matrix;
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
413 asm volatile(
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
414 "pcmpeqw %%mm7, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
415 "psrlq $48, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
416 "movd %2, %%mm6 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
417 "packssdw %%mm6, %%mm6 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
418 "packssdw %%mm6, %%mm6 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
419 "movl %3, %%eax \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
420 ".balign 16\n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
421 "1: \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
422 "movq (%0, %%eax), %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
423 "movq 8(%0, %%eax), %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
424 "movq (%1, %%eax), %%mm4 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
425 "movq 8(%1, %%eax), %%mm5 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
426 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
427 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
428 "pxor %%mm2, %%mm2 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
429 "pxor %%mm3, %%mm3 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
430 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
431 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
432 "pxor %%mm2, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
433 "pxor %%mm3, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
434 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
435 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
436 "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
437 "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
438 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
439 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
440 "paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
441 "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
442 "pxor %%mm4, %%mm4 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
443 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
444 "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
445 "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
446 "psrlw $4, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
447 "psrlw $4, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
448 "pxor %%mm2, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
449 "pxor %%mm3, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
450 "psubw %%mm2, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
451 "psubw %%mm3, %%mm1 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
452 "pandn %%mm0, %%mm4 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
453 "pandn %%mm1, %%mm5 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
454 "pxor %%mm4, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
455 "pxor %%mm5, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
456 "movq %%mm4, (%0, %%eax) \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
457 "movq %%mm5, 8(%0, %%eax) \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
458
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
459 "addl $16, %%eax \n\t"
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
460 "jng 1b \n\t"
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
461 "movd 124(%0, %3), %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
462 "movq %%mm7, %%mm6 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
463 "psrlq $32, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
464 "pxor %%mm6, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
465 "movq %%mm7, %%mm6 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
466 "psrlq $16, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
467 "pxor %%mm6, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
468 "pslld $31, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
469 "psrlq $15, %%mm7 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
470 "pxor %%mm7, %%mm0 \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
471 "movd %%mm0, 124(%0, %3) \n\t"
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
472
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
473 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
474 : "%eax", "memory"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
475 );
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
476 }
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
477
206
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
478 /* draw the edges of width 'w' of an image of size width, height
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
479 this mmx version can only handle w==8 || w==16 */
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 949
diff changeset
480 static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
206
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
481 {
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 949
diff changeset
482 uint8_t *ptr, *last_line;
206
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
483 int i;
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
484
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
485 last_line = buf + (height - 1) * wrap;
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
486 /* left and right */
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
487 ptr = buf;
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
488 if(w==8)
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
489 {
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
490 asm volatile(
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
491 "1: \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
492 "movd (%0), %%mm0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
493 "punpcklbw %%mm0, %%mm0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
494 "punpcklwd %%mm0, %%mm0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
495 "punpckldq %%mm0, %%mm0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
496 "movq %%mm0, -8(%0) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
497 "movq -8(%0, %2), %%mm1 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
498 "punpckhbw %%mm1, %%mm1 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
499 "punpckhwd %%mm1, %%mm1 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
500 "punpckhdq %%mm1, %%mm1 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
501 "movq %%mm1, (%0, %2) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
502 "addl %1, %0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
503 "cmpl %3, %0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
504 " jb 1b \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
505 : "+r" (ptr)
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
506 : "r" (wrap), "r" (width), "r" (ptr + wrap*height)
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
507 );
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
508 }
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
509 else
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
510 {
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
511 asm volatile(
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
512 "1: \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
513 "movd (%0), %%mm0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
514 "punpcklbw %%mm0, %%mm0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
515 "punpcklwd %%mm0, %%mm0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
516 "punpckldq %%mm0, %%mm0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
517 "movq %%mm0, -8(%0) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
518 "movq %%mm0, -16(%0) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
519 "movq -8(%0, %2), %%mm1 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
520 "punpckhbw %%mm1, %%mm1 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
521 "punpckhwd %%mm1, %%mm1 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
522 "punpckhdq %%mm1, %%mm1 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
523 "movq %%mm1, (%0, %2) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
524 "movq %%mm1, 8(%0, %2) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
525 "addl %1, %0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
526 "cmpl %3, %0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
527 " jb 1b \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
528 : "+r" (ptr)
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
529 : "r" (wrap), "r" (width), "r" (ptr + wrap*height)
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
530 );
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
531 }
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
532
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
533 for(i=0;i<w;i+=4) {
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
534 /* top and bottom (and hopefully also the corners) */
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
535 ptr= buf - (i + 1) * wrap - w;
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
536 asm volatile(
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
537 "1: \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
538 "movq (%1, %0), %%mm0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
539 "movq %%mm0, (%0) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
540 "movq %%mm0, (%0, %2) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
541 "movq %%mm0, (%0, %2, 2) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
542 "movq %%mm0, (%0, %3) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
543 "addl $8, %0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
544 "cmpl %4, %0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
545 " jb 1b \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
546 : "+r" (ptr)
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
547 : "r" ((int)buf - (int)ptr - w), "r" (-wrap), "r" (-wrap*3), "r" (ptr+width+2*w)
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
548 );
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
549 ptr= last_line + (i + 1) * wrap - w;
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
550 asm volatile(
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
551 "1: \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
552 "movq (%1, %0), %%mm0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
553 "movq %%mm0, (%0) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
554 "movq %%mm0, (%0, %2) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
555 "movq %%mm0, (%0, %2, 2) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
556 "movq %%mm0, (%0, %3) \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
557 "addl $8, %0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
558 "cmpl %4, %0 \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
559 " jb 1b \n\t"
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
560 : "+r" (ptr)
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
561 : "r" ((int)last_line - (int)ptr - w), "r" (wrap), "r" (wrap*3), "r" (ptr+width+2*w)
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
562 );
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
563 }
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
564 }
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
565
1719
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
566 static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
567 const int intra= s->mb_intra;
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
568 int *sum= s->dct_error_sum[intra];
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
569 uint16_t *offset= s->dct_offset[intra];
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
570
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
571 s->dct_count[intra]++;
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
572
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
573 asm volatile(
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
574 "pxor %%mm7, %%mm7 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
575 "1: \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
576 "pxor %%mm0, %%mm0 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
577 "pxor %%mm1, %%mm1 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
578 "movq (%0), %%mm2 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
579 "movq 8(%0), %%mm3 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
580 "pcmpgtw %%mm2, %%mm0 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
581 "pcmpgtw %%mm3, %%mm1 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
582 "pxor %%mm0, %%mm2 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
583 "pxor %%mm1, %%mm3 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
584 "psubw %%mm0, %%mm2 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
585 "psubw %%mm1, %%mm3 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
586 "movq %%mm2, %%mm4 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
587 "movq %%mm3, %%mm5 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
588 "psubusw (%2), %%mm2 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
589 "psubusw 8(%2), %%mm3 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
590 "pxor %%mm0, %%mm2 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
591 "pxor %%mm1, %%mm3 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
592 "psubw %%mm0, %%mm2 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
593 "psubw %%mm1, %%mm3 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
594 "movq %%mm2, (%0) \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
595 "movq %%mm3, 8(%0) \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
596 "movq %%mm4, %%mm2 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
597 "movq %%mm5, %%mm3 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
598 "punpcklwd %%mm7, %%mm4 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
599 "punpckhwd %%mm7, %%mm2 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
600 "punpcklwd %%mm7, %%mm5 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
601 "punpckhwd %%mm7, %%mm3 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
602 "paddd (%1), %%mm4 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
603 "paddd 8(%1), %%mm2 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
604 "paddd 16(%1), %%mm5 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
605 "paddd 24(%1), %%mm3 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
606 "movq %%mm4, (%1) \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
607 "movq %%mm2, 8(%1) \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
608 "movq %%mm5, 16(%1) \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
609 "movq %%mm3, 24(%1) \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
610 "addl $16, %0 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
611 "addl $32, %1 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
612 "addl $16, %2 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
613 "cmpl %3, %0 \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
614 " jb 1b \n\t"
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
615 : "+r" (block), "+r" (sum), "+r" (offset)
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
616 : "r"(block+64)
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
617 );
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
618 }
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
619
1720
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
620 static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
621 const int intra= s->mb_intra;
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
622 int *sum= s->dct_error_sum[intra];
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
623 uint16_t *offset= s->dct_offset[intra];
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
624
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
625 s->dct_count[intra]++;
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
626
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
627 asm volatile(
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
628 "pxor %%xmm7, %%xmm7 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
629 "1: \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
630 "pxor %%xmm0, %%xmm0 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
631 "pxor %%xmm1, %%xmm1 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
632 "movdqa (%0), %%xmm2 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
633 "movdqa 16(%0), %%xmm3 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
634 "pcmpgtw %%xmm2, %%xmm0 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
635 "pcmpgtw %%xmm3, %%xmm1 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
636 "pxor %%xmm0, %%xmm2 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
637 "pxor %%xmm1, %%xmm3 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
638 "psubw %%xmm0, %%xmm2 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
639 "psubw %%xmm1, %%xmm3 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
640 "movdqa %%xmm2, %%xmm4 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
641 "movdqa %%xmm3, %%xmm5 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
642 "psubusw (%2), %%xmm2 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
643 "psubusw 16(%2), %%xmm3 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
644 "pxor %%xmm0, %%xmm2 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
645 "pxor %%xmm1, %%xmm3 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
646 "psubw %%xmm0, %%xmm2 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
647 "psubw %%xmm1, %%xmm3 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
648 "movdqa %%xmm2, (%0) \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
649 "movdqa %%xmm3, 16(%0) \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
650 "movdqa %%xmm4, %%xmm6 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
651 "movdqa %%xmm5, %%xmm0 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
652 "punpcklwd %%xmm7, %%xmm4 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
653 "punpckhwd %%xmm7, %%xmm6 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
654 "punpcklwd %%xmm7, %%xmm5 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
655 "punpckhwd %%xmm7, %%xmm0 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
656 "paddd (%1), %%xmm4 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
657 "paddd 16(%1), %%xmm6 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
658 "paddd 32(%1), %%xmm5 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
659 "paddd 48(%1), %%xmm0 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
660 "movdqa %%xmm4, (%1) \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
661 "movdqa %%xmm6, 16(%1) \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
662 "movdqa %%xmm5, 32(%1) \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
663 "movdqa %%xmm0, 48(%1) \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
664 "addl $32, %0 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
665 "addl $64, %1 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
666 "addl $32, %2 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
667 "cmpl %3, %0 \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
668 " jb 1b \n\t"
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
669 : "+r" (block), "+r" (sum), "+r" (offset)
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
670 : "r"(block+64)
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
671 );
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
672 }
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
673
220
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
674 #undef HAVE_MMX2
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
675 #define RENAME(a) a ## _MMX
1565
1a9a63f59849 minor mmx2 optimization if the dct
michael
parents: 1261
diff changeset
676 #define RENAMEl(a) a ## _mmx
220
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
677 #include "mpegvideo_mmx_template.c"
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
678
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
679 #define HAVE_MMX2
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
680 #undef RENAME
1597
4c9165372ab3 noise reduction of dct coefficients
michael
parents: 1565
diff changeset
681 #undef RENAMEl
220
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
682 #define RENAME(a) a ## _MMX2
1565
1a9a63f59849 minor mmx2 optimization if the dct
michael
parents: 1261
diff changeset
683 #define RENAMEl(a) a ## _mmx2
220
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
684 #include "mpegvideo_mmx_template.c"
206
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
685
1765
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
686 #undef RENAME
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
687 #undef RENAMEl
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
688 #define RENAME(a) a ## _SSE2
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
689 #define RENAMEl(a) a ## _sse2
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
690 #include "mpegvideo_mmx_template.c"
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
691
14
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
692 void MPV_common_init_mmx(MpegEncContext *s)
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
693 {
14
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
694 if (mm_flags & MM_MMX) {
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
695 const int dct_algo = s->avctx->dct_algo;
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
696
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
697 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
698 s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
699 s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
700 s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
701 s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
702 s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
312
8cf5507e6ca5 mpeg4 mpeg quantizer support
michaelni
parents: 252
diff changeset
703
350
6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now.
pulento
parents: 344
diff changeset
704 draw_edges = draw_edges_mmx;
1719
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
705
1720
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
706 if (mm_flags & MM_SSE2) {
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
707 s->denoise_dct= denoise_dct_sse2;
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
708 } else {
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
709 s->denoise_dct= denoise_dct_mmx;
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
710 }
220
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
711
625
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents: 620
diff changeset
712 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
1765
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
713 if(mm_flags & MM_SSE2){
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
714 s->dct_quantize= dct_quantize_SSE2;
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
715 } else if(mm_flags & MM_MMXEXT){
625
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents: 620
diff changeset
716 s->dct_quantize= dct_quantize_MMX2;
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents: 620
diff changeset
717 } else {
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents: 620
diff changeset
718 s->dct_quantize= dct_quantize_MMX;
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents: 620
diff changeset
719 }
350
6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now.
pulento
parents: 344
diff changeset
720 }
14
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
721 }
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
722 }