annotate i386/mpegvideo_mmx.c @ 4091:3c00eb82db0d libavcodec

Rename dvdsub.c to dvdsubdec.c.
author diego
date Fri, 27 Oct 2006 23:37:35 +0000
parents c8c591fe26f8
children 580d2c397251
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
1 /*
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
2 * The simplest mpeg encoder (well, it was the simplest!)
429
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
3 * Copyright (c) 2000,2001 Fabrice Bellard.
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
429
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
15 * Lesser General Public License for more details.
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
16 *
429
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3576
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
20 *
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
21 * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru>
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
22 * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
23 */
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
24
14
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
25 #include "../dsputil.h"
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
26 #include "../mpegvideo.h"
220
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
27 #include "../avcodec.h"
3398
e0927bc44a10 Move REG_* macros from libavcodec/i386/mmx.h to libavutil/x86_cpu.h
lucabe
parents: 3281
diff changeset
28 #include "x86_cpu.h"
14
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
29
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 949
diff changeset
30 extern uint8_t zigzag_direct_noperm[64];
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 949
diff changeset
31 extern uint16_t inv_zigzag_direct16[64];
200
6ab301aaa652 (commit by michael)
arpi_esp
parents: 153
diff changeset
32
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
33 static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
34 static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
35
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
36
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
37 static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
38 DCTELEM *block, int n, int qscale)
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
39 {
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 2024
diff changeset
40 long level, qmul, qadd, nCoeffs;
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
41
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
42 qmul = qscale << 1;
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
43
1661
4c9fd29f1606 h263 slice structured mode
michael
parents: 1597
diff changeset
44 assert(s->block_last_index[n]>=0 || s->h263_aic);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2293
diff changeset
45
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
46 if (!s->h263_aic) {
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
47 if (n < 4)
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
48 level = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
49 else
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
50 level = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
51 qadd = (qscale - 1) | 1;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
52 }else{
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
53 qadd = 0;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
54 level= block[0];
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
55 }
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
56 if(s->ac_pred)
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
57 nCoeffs=63;
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
58 else
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
59 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
200
6ab301aaa652 (commit by michael)
arpi_esp
parents: 153
diff changeset
60 //printf("%d %d ", qmul, qadd);
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
61 asm volatile(
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
62 "movd %1, %%mm6 \n\t" //qmul
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
63 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
64 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
65 "movd %2, %%mm5 \n\t" //qadd
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
66 "pxor %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
67 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
68 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
69 "psubw %%mm5, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
70 "pxor %%mm4, %%mm4 \n\t"
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
71 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
72 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
73 "movq (%0, %3), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
74 "movq 8(%0, %3), %%mm1 \n\t"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
75
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
76 "pmullw %%mm6, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
77 "pmullw %%mm6, %%mm1 \n\t"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
78
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
79 "movq (%0, %3), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
80 "movq 8(%0, %3), %%mm3 \n\t"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
81
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
82 "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
83 "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
84
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
85 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
86 "pxor %%mm3, %%mm1 \n\t"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
87
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
88 "paddw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
89 "paddw %%mm7, %%mm1 \n\t"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
90
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
91 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
92 "pxor %%mm1, %%mm3 \n\t"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
93
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
94 "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
95 "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
96
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
97 "pandn %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
98 "pandn %%mm3, %%mm1 \n\t"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
99
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
100 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
101 "movq %%mm1, 8(%0, %3) \n\t"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
102
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
103 "add $16, %3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
104 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
105 ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
106 : "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
107 );
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
108 block[0]= level;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
109 }
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
110
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
111
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
112 static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
113 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
114 {
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 2024
diff changeset
115 long qmul, qadd, nCoeffs;
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
116
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
117 qmul = qscale << 1;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
118 qadd = (qscale - 1) | 1;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
119
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
120 assert(s->block_last_index[n]>=0 || s->h263_aic);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2293
diff changeset
121
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
122 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
123 //printf("%d %d ", qmul, qadd);
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
124 asm volatile(
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
125 "movd %1, %%mm6 \n\t" //qmul
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
126 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
127 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
128 "movd %2, %%mm5 \n\t" //qadd
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
129 "pxor %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
130 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
131 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
132 "psubw %%mm5, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
133 "pxor %%mm4, %%mm4 \n\t"
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
134 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
135 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
136 "movq (%0, %3), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
137 "movq 8(%0, %3), %%mm1 \n\t"
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
138
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
139 "pmullw %%mm6, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
140 "pmullw %%mm6, %%mm1 \n\t"
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
141
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
142 "movq (%0, %3), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
143 "movq 8(%0, %3), %%mm3 \n\t"
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
144
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
145 "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
146 "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
147
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
148 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
149 "pxor %%mm3, %%mm1 \n\t"
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
150
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
151 "paddw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
152 "paddw %%mm7, %%mm1 \n\t"
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
153
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
154 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
155 "pxor %%mm1, %%mm3 \n\t"
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
156
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
157 "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
158 "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
159
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
160 "pandn %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
161 "pandn %%mm3, %%mm1 \n\t"
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
162
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
163 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
164 "movq %%mm1, 8(%0, %3) \n\t"
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
165
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
166 "add $16, %3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
167 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
168 ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
169 : "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
170 );
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
171 }
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
172
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
173
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
174 /*
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
175 NK:
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
176 Note: looking at PARANOID:
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
177 "enable all paranoid tests for rounding, overflows, etc..."
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
178
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
179 #ifdef PARANOID
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
180 if (level < -2048 || level > 2047)
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
181 fprintf(stderr, "unquant error %d %d\n", i, level);
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
182 #endif
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
183 We can suppose that result of two multiplications can't be greate of 0xFFFF
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
184 i.e. is 16-bit, so we use here only PMULLW instruction and can avoid
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
185 a complex multiplication.
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
186 =====================================================
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
187 Full formula for multiplication of 2 integer numbers
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
188 which are represent as high:low words:
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
189 input: value1 = high1:low1
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
190 value2 = high2:low2
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
191 output: value3 = value1*value2
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
192 value3=high3:low3 (on overflow: modulus 2^32 wrap-around)
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
193 this mean that for 0x123456 * 0x123456 correct result is 0x766cb0ce4
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
194 but this algorithm will compute only 0x66cb0ce4
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
195 this limited by 16-bit size of operands
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
196 ---------------------------------
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
197 tlow1 = high1*low2
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
198 tlow2 = high2*low1
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
199 tlow1 = tlow1 + tlow2
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
200 high3:low3 = low1*low2
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
201 high3 += tlow1
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
202 */
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
203 static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
14
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
204 DCTELEM *block, int n, int qscale)
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
205 {
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 2024
diff changeset
206 long nCoeffs;
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 949
diff changeset
207 const uint16_t *quant_matrix;
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
208 int block0;
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
209
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
210 assert(s->block_last_index[n]>=0);
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
211
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
212 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
200
6ab301aaa652 (commit by michael)
arpi_esp
parents: 153
diff changeset
213
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2293
diff changeset
214 if (n < 4)
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
215 block0 = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
216 else
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
217 block0 = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
218 /* XXX: only mpeg1 */
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
219 quant_matrix = s->intra_matrix;
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
220 asm volatile(
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
221 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
222 "psrlw $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
223 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
224 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
225 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
226 "mov %3, %%"REG_a" \n\t"
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
227 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
228 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
229 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
230 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
231 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
232 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
233 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
234 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
235 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
236 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
237 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
238 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
239 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
240 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
241 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
242 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
243 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
244 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
245 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
246 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
247 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
248 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
249 "psraw $3, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
250 "psraw $3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
251 "psubw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
252 "psubw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
253 "por %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
254 "por %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
255 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
256 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
257 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
258 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
259 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
260 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
261 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
262 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
263
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
264 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
265 "js 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
266 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
267 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
268 );
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
269 block[0]= block0;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
270 }
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
271
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
272 static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
273 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
274 {
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 2024
diff changeset
275 long nCoeffs;
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
276 const uint16_t *quant_matrix;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
277
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
278 assert(s->block_last_index[n]>=0);
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
279
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
280 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
281
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 325
diff changeset
282 quant_matrix = s->inter_matrix;
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
283 asm volatile(
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
284 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
285 "psrlw $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
286 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
287 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
288 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
289 "mov %3, %%"REG_a" \n\t"
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
290 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
291 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
292 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
293 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
294 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
295 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
296 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
297 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
298 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
299 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
300 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
301 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
302 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
303 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
304 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
305 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
306 "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
307 "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
308 "paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
309 "paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
310 "pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
311 "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
312 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
313 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
314 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
315 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
316 "psraw $4, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
317 "psraw $4, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
318 "psubw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
319 "psubw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
320 "por %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
321 "por %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
322 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
323 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
324 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
325 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
326 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
327 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
328 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
329 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
330
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
331 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
332 "js 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
333 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
334 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
335 );
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
336 }
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
337
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
338 static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
339 DCTELEM *block, int n, int qscale)
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
340 {
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 2024
diff changeset
341 long nCoeffs;
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 949
diff changeset
342 const uint16_t *quant_matrix;
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
343 int block0;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2293
diff changeset
344
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
345 assert(s->block_last_index[n]>=0);
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
346
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
347 if(s->alternate_scan) nCoeffs= 63; //FIXME
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
348 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
349
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2293
diff changeset
350 if (n < 4)
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
351 block0 = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
352 else
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
353 block0 = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
354 quant_matrix = s->intra_matrix;
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
355 asm volatile(
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
356 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
357 "psrlw $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
358 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
359 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
360 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
361 "mov %3, %%"REG_a" \n\t"
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
362 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
363 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
364 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
365 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
366 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
367 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
368 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
369 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
370 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
371 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
372 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
373 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
374 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
375 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
376 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
377 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
378 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
379 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
380 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
381 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
382 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
383 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
384 "psraw $3, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
385 "psraw $3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
386 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
387 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
388 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
389 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
390 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
391 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
392 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
393 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
145
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at>
arpi_esp
parents: 14
diff changeset
394
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
395 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
396 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
397 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
398 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
399 );
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
400 block[0]= block0;
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
401 //Note, we dont do mismatch control for intra as errors cannot accumulate
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
402 }
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
403
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
404 static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
405 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
406 {
2293
15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>)
michael
parents: 2024
diff changeset
407 long nCoeffs;
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
408 const uint16_t *quant_matrix;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2293
diff changeset
409
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
410 assert(s->block_last_index[n]>=0);
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
411
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
412 if(s->alternate_scan) nCoeffs= 63; //FIXME
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
413 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
414
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 325
diff changeset
415 quant_matrix = s->inter_matrix;
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
416 asm volatile(
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
417 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
418 "psrlq $48, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
419 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
420 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
421 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
422 "mov %3, %%"REG_a" \n\t"
3576
f7125bf10892 Support for MacIntel, last part: balign directives
gpoirier
parents: 3398
diff changeset
423 ASMALIGN(4)
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
424 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
425 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
426 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
427 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
428 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
429 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
430 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
431 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
432 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
433 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
434 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
435 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
436 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
437 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
438 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
439 "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
440 "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
441 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
442 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
443 "paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
444 "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
445 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
446 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
447 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
448 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
449 "psrlw $4, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
450 "psrlw $4, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
451 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
452 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
453 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
454 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
455 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
456 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
457 "pxor %%mm4, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
458 "pxor %%mm5, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
459 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
460 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
325
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx)
michaelni
parents: 312
diff changeset
461
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
462 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
463 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
464 "movd 124(%0, %3), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
465 "movq %%mm7, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
466 "psrlq $32, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
467 "pxor %%mm6, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
468 "movq %%mm7, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
469 "psrlq $16, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
470 "pxor %%mm6, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
471 "pslld $31, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
472 "psrlq $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
473 "pxor %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
474 "movd %%mm0, 124(%0, %3) \n\t"
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2293
diff changeset
475
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
476 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
477 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
478 );
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
479 }
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
480
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2293
diff changeset
481 /* draw the edges of width 'w' of an image of size width, height
206
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
482 this mmx version can only handle w==8 || w==16 */
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 949
diff changeset
483 static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
206
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
484 {
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 949
diff changeset
485 uint8_t *ptr, *last_line;
206
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
486 int i;
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
487
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
488 last_line = buf + (height - 1) * wrap;
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
489 /* left and right */
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
490 ptr = buf;
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
491 if(w==8)
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
492 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
493 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
494 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
495 "movd (%0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
496 "punpcklbw %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
497 "punpcklwd %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
498 "punpckldq %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
499 "movq %%mm0, -8(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
500 "movq -8(%0, %2), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
501 "punpckhbw %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
502 "punpckhwd %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
503 "punpckhdq %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
504 "movq %%mm1, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
505 "add %1, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
506 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
507 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
508 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
509 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
510 );
206
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
511 }
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
512 else
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
513 {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
514 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
515 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
516 "movd (%0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
517 "punpcklbw %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
518 "punpcklwd %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
519 "punpckldq %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
520 "movq %%mm0, -8(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
521 "movq %%mm0, -16(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
522 "movq -8(%0, %2), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
523 "punpckhbw %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
524 "punpckhwd %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
525 "punpckhdq %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
526 "movq %%mm1, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
527 "movq %%mm1, 8(%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
528 "add %1, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
529 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
530 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
531 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
532 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
533 );
206
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
534 }
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2293
diff changeset
535
206
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
536 for(i=0;i<w;i+=4) {
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
537 /* top and bottom (and hopefully also the corners) */
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
538 ptr= buf - (i + 1) * wrap - w;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
539 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
540 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
541 "movq (%1, %0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
542 "movq %%mm0, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
543 "movq %%mm0, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
544 "movq %%mm0, (%0, %2, 2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
545 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
546 "add $8, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
547 "cmp %4, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
548 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
549 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
550 : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
551 );
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
552 ptr= last_line + (i + 1) * wrap - w;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
553 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
554 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
555 "movq (%1, %0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
556 "movq %%mm0, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
557 "movq %%mm0, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
558 "movq %%mm0, (%0, %2, 2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
559 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
560 "add $8, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
561 "cmp %4, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
562 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
563 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
564 : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
565 );
206
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
566 }
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
567 }
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
568
1719
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
569 static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
570 const int intra= s->mb_intra;
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
571 int *sum= s->dct_error_sum[intra];
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
572 uint16_t *offset= s->dct_offset[intra];
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
573
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
574 s->dct_count[intra]++;
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
575
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
576 asm volatile(
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
577 "pxor %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
578 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
579 "pxor %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
580 "pxor %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
581 "movq (%0), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
582 "movq 8(%0), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
583 "pcmpgtw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
584 "pcmpgtw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
585 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
586 "pxor %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
587 "psubw %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
588 "psubw %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
589 "movq %%mm2, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
590 "movq %%mm3, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
591 "psubusw (%2), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
592 "psubusw 8(%2), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
593 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
594 "pxor %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
595 "psubw %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
596 "psubw %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
597 "movq %%mm2, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
598 "movq %%mm3, 8(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
599 "movq %%mm4, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
600 "movq %%mm5, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
601 "punpcklwd %%mm7, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
602 "punpckhwd %%mm7, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
603 "punpcklwd %%mm7, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
604 "punpckhwd %%mm7, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
605 "paddd (%1), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
606 "paddd 8(%1), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
607 "paddd 16(%1), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
608 "paddd 24(%1), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
609 "movq %%mm4, (%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
610 "movq %%mm2, 8(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
611 "movq %%mm5, 16(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
612 "movq %%mm3, 24(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
613 "add $16, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
614 "add $32, %1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
615 "add $16, %2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
616 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
617 " jb 1b \n\t"
1719
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
618 : "+r" (block), "+r" (sum), "+r" (offset)
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
619 : "r"(block+64)
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
620 );
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
621 }
4e72fb256b25 denoise_dct_mmx()
michael
parents: 1689
diff changeset
622
1720
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
623 static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
624 const int intra= s->mb_intra;
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
625 int *sum= s->dct_error_sum[intra];
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
626 uint16_t *offset= s->dct_offset[intra];
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
627
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
628 s->dct_count[intra]++;
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
629
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
630 asm volatile(
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
631 "pxor %%xmm7, %%xmm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
632 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
633 "pxor %%xmm0, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
634 "pxor %%xmm1, %%xmm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
635 "movdqa (%0), %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
636 "movdqa 16(%0), %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
637 "pcmpgtw %%xmm2, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
638 "pcmpgtw %%xmm3, %%xmm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
639 "pxor %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
640 "pxor %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
641 "psubw %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
642 "psubw %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
643 "movdqa %%xmm2, %%xmm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
644 "movdqa %%xmm3, %%xmm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
645 "psubusw (%2), %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
646 "psubusw 16(%2), %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
647 "pxor %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
648 "pxor %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
649 "psubw %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
650 "psubw %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
651 "movdqa %%xmm2, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
652 "movdqa %%xmm3, 16(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
653 "movdqa %%xmm4, %%xmm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
654 "movdqa %%xmm5, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
655 "punpcklwd %%xmm7, %%xmm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
656 "punpckhwd %%xmm7, %%xmm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
657 "punpcklwd %%xmm7, %%xmm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
658 "punpckhwd %%xmm7, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
659 "paddd (%1), %%xmm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
660 "paddd 16(%1), %%xmm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
661 "paddd 32(%1), %%xmm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
662 "paddd 48(%1), %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
663 "movdqa %%xmm4, (%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
664 "movdqa %%xmm6, 16(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
665 "movdqa %%xmm5, 32(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
666 "movdqa %%xmm0, 48(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
667 "add $32, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
668 "add $64, %1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
669 "add $32, %2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
670 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
671 " jb 1b \n\t"
1720
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
672 : "+r" (block), "+r" (sum), "+r" (offset)
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
673 : "r"(block+64)
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
674 );
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
675 }
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
676
220
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
677 #undef HAVE_MMX2
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
678 #define RENAME(a) a ## _MMX
1565
1a9a63f59849 minor mmx2 optimization if the dct
michael
parents: 1261
diff changeset
679 #define RENAMEl(a) a ## _mmx
220
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
680 #include "mpegvideo_mmx_template.c"
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
681
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
682 #define HAVE_MMX2
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
683 #undef RENAME
1597
4c9165372ab3 noise reduction of dct coefficients
michael
parents: 1565
diff changeset
684 #undef RENAMEl
220
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
685 #define RENAME(a) a ## _MMX2
1565
1a9a63f59849 minor mmx2 optimization if the dct
michael
parents: 1261
diff changeset
686 #define RENAMEl(a) a ## _mmx2
220
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
687 #include "mpegvideo_mmx_template.c"
206
994aa8623443 (commit by michael)
arpi_esp
parents: 200
diff changeset
688
1765
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
689 #undef RENAME
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
690 #undef RENAMEl
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
691 #define RENAME(a) a ## _SSE2
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
692 #define RENAMEl(a) a ## _sse2
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
693 #include "mpegvideo_mmx_template.c"
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
694
14
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
695 void MPV_common_init_mmx(MpegEncContext *s)
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
696 {
14
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
697 if (mm_flags & MM_MMX) {
706
e65798d228ea idct permutation cleanup, idct can be selected per context now
michaelni
parents: 687
diff changeset
698 const int dct_algo = s->avctx->dct_algo;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2293
diff changeset
699
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
700 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
701 s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
702 s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
703 s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
3281
7fac25904a8b missmatch control for mpeg2 intra dequantization if bitexact=1
michael
parents: 3036
diff changeset
704 if(!(s->flags & CODEC_FLAG_BITEXACT))
7fac25904a8b missmatch control for mpeg2 intra dequantization if bitexact=1
michael
parents: 3036
diff changeset
705 s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
1689
1a2db2073848 split intra / inter dequantization
michael
parents: 1661
diff changeset
706 s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
312
8cf5507e6ca5 mpeg4 mpeg quantizer support
michaelni
parents: 252
diff changeset
707
350
6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now.
pulento
parents: 344
diff changeset
708 draw_edges = draw_edges_mmx;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2293
diff changeset
709
1720
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1719
diff changeset
710 if (mm_flags & MM_SSE2) {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
711 s->denoise_dct= denoise_dct_sse2;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
712 } else {
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
713 s->denoise_dct= denoise_dct_mmx;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
714 }
220
0b234715e205 (commit by michael)
arpi_esp
parents: 206
diff changeset
715
625
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents: 620
diff changeset
716 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
1765
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
717 if(mm_flags & MM_SSE2){
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
718 s->dct_quantize= dct_quantize_SSE2;
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
michael
parents: 1720
diff changeset
719 } else if(mm_flags & MM_MMXEXT){
625
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents: 620
diff changeset
720 s->dct_quantize= dct_quantize_MMX2;
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents: 620
diff changeset
721 } else {
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents: 620
diff changeset
722 s->dct_quantize= dct_quantize_MMX;
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents: 620
diff changeset
723 }
350
6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now.
pulento
parents: 344
diff changeset
724 }
14
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff
glantau
parents: 8
diff changeset
725 }
8
1b4461b5a7fb Sync with mplayer's stuff
nickols_k
parents:
diff changeset
726 }