libavcodec.hg: i386/mpegvideo

annotate i386/mpegvideo_mmx.c @ 3980:5afe4253a220 libavcodec

replace a few and/sub/... by cmov this is faster on P3, should be faster on AMD, and should be slower on P4 its disabled by default (benchmarks welcome so we know when to enable it)

author	michael
date	Tue, 10 Oct 2006 01:08:39 +0000
parents	c8c591fe26f8
children	580d2c397251

rev	line source
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	1 /*
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	2 * The simplest mpeg encoder (well, it was the simplest!)
429 718a22dc121f license/copyright change glantau parents: 350 diff changeset	3 * Copyright (c) 2000,2001 Fabrice Bellard.
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	4 *
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3576 diff changeset	5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3576 diff changeset	6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3576 diff changeset	7 * FFmpeg is free software; you can redistribute it and/or
429 718a22dc121f license/copyright change glantau parents: 350 diff changeset	8 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change glantau parents: 350 diff changeset	9 * License as published by the Free Software Foundation; either
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3576 diff changeset	10 * version 2.1 of the License, or (at your option) any later version.
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	11 *
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3576 diff changeset	12 * FFmpeg is distributed in the hope that it will be useful,
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429 718a22dc121f license/copyright change glantau parents: 350 diff changeset	14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change glantau parents: 350 diff changeset	15 * Lesser General Public License for more details.
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	16 *
429 718a22dc121f license/copyright change glantau parents: 350 diff changeset	17 * You should have received a copy of the GNU Lesser General Public
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3576 diff changeset	18 * License along with FFmpeg; if not, write to the Free Software
3036 0b546eab515d Update licensing information: The FSF changed postal address. diego parents: 2979 diff changeset	19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	20 *
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	21 * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru>
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	22 * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	23 */
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	24
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	25 #include "../dsputil.h"
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	26 #include "../mpegvideo.h"
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	27 #include "../avcodec.h"
3398 e0927bc44a10 Move REG_* macros from libavcodec/i386/mmx.h to libavutil/x86_cpu.h lucabe parents: 3281 diff changeset	28 #include "x86_cpu.h"
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	29
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	30 extern uint8_t zigzag_direct_noperm[64];
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	31 extern uint16_t inv_zigzag_direct16[64];
200 6ab301aaa652 (commit by michael) arpi_esp parents: 153 diff changeset	32
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	33 static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	34 static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	35
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	36
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	37 static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	38 DCTELEM *block, int n, int qscale)
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	39 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	40 long level, qmul, qadd, nCoeffs;
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	41
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	42 qmul = qscale << 1;
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	43
1661 4c9fd29f1606 h263 slice structured mode michael parents: 1597 diff changeset	44 assert(s->block_last_index[n]>=0 \|\| s->h263_aic);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	45
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	46 if (!s->h263_aic) {
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	47 if (n < 4)
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	48 level = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	49 else
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	50 level = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	51 qadd = (qscale - 1) \| 1;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	52 }else{
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	53 qadd = 0;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	54 level= block[0];
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	55 }
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	56 if(s->ac_pred)
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	57 nCoeffs=63;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	58 else
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	59 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
200 6ab301aaa652 (commit by michael) arpi_esp parents: 153 diff changeset	60 //printf("%d %d ", qmul, qadd);
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	61 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	62 "movd %1, %%mm6 \n\t" //qmul
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	63 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	64 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	65 "movd %2, %%mm5 \n\t" //qadd
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	66 "pxor %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	67 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	68 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	69 "psubw %%mm5, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	70 "pxor %%mm4, %%mm4 \n\t"
3576 f7125bf10892 Support for MacIntel, last part: balign directives gpoirier parents: 3398 diff changeset	71 ASMALIGN(4)
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	72 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	73 "movq (%0, %3), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	74 "movq 8(%0, %3), %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	75
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	76 "pmullw %%mm6, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	77 "pmullw %%mm6, %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	78
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	79 "movq (%0, %3), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	80 "movq 8(%0, %3), %%mm3 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	81
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	82 "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	83 "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	84
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	85 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	86 "pxor %%mm3, %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	87
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	88 "paddw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	89 "paddw %%mm7, %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	90
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	91 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	92 "pxor %%mm1, %%mm3 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	93
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	94 "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	95 "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	96
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	97 "pandn %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	98 "pandn %%mm3, %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	99
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	100 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	101 "movq %%mm1, 8(%0, %3) \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	102
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	103 "add $16, %3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	104 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	105 ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	106 : "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	107 );
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	108 block[0]= level;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	109 }
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	110
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	111
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	112 static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	113 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	114 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	115 long qmul, qadd, nCoeffs;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	116
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	117 qmul = qscale << 1;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	118 qadd = (qscale - 1) \| 1;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	119
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	120 assert(s->block_last_index[n]>=0 \|\| s->h263_aic);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	121
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	122 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	123 //printf("%d %d ", qmul, qadd);
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	124 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	125 "movd %1, %%mm6 \n\t" //qmul
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	126 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	127 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	128 "movd %2, %%mm5 \n\t" //qadd
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	129 "pxor %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	130 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	131 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	132 "psubw %%mm5, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	133 "pxor %%mm4, %%mm4 \n\t"
3576 f7125bf10892 Support for MacIntel, last part: balign directives gpoirier parents: 3398 diff changeset	134 ASMALIGN(4)
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	135 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	136 "movq (%0, %3), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	137 "movq 8(%0, %3), %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	138
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	139 "pmullw %%mm6, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	140 "pmullw %%mm6, %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	141
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	142 "movq (%0, %3), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	143 "movq 8(%0, %3), %%mm3 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	144
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	145 "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	146 "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	147
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	148 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	149 "pxor %%mm3, %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	150
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	151 "paddw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	152 "paddw %%mm7, %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	153
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	154 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	155 "pxor %%mm1, %%mm3 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	156
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	157 "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	158 "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	159
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	160 "pandn %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	161 "pandn %%mm3, %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	162
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	163 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	164 "movq %%mm1, 8(%0, %3) \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	165
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	166 "add $16, %3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	167 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	168 ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	169 : "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	170 );
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	171 }
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	172
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	173
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	174 /*
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	175 NK:
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	176 Note: looking at PARANOID:
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	177 "enable all paranoid tests for rounding, overflows, etc..."
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	178
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	179 #ifdef PARANOID
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	180 if (level < -2048 \|\| level > 2047)
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	181 fprintf(stderr, "unquant error %d %d\n", i, level);
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	182 #endif
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	183 We can suppose that result of two multiplications can't be greate of 0xFFFF
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	184 i.e. is 16-bit, so we use here only PMULLW instruction and can avoid
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	185 a complex multiplication.
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	186 =====================================================
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	187 Full formula for multiplication of 2 integer numbers
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	188 which are represent as high:low words:
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	189 input: value1 = high1:low1
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	190 value2 = high2:low2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	191 output: value3 = value1*value2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	192 value3=high3:low3 (on overflow: modulus 2^32 wrap-around)
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	193 this mean that for 0x123456 * 0x123456 correct result is 0x766cb0ce4
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	194 but this algorithm will compute only 0x66cb0ce4
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	195 this limited by 16-bit size of operands
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	196 ---------------------------------
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	197 tlow1 = high1*low2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	198 tlow2 = high2*low1
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	199 tlow1 = tlow1 + tlow2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	200 high3:low3 = low1*low2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	201 high3 += tlow1
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	202 */
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	203 static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	204 DCTELEM *block, int n, int qscale)
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	205 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	206 long nCoeffs;
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	207 const uint16_t *quant_matrix;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	208 int block0;
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	209
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	210 assert(s->block_last_index[n]>=0);
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	211
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	212 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
200 6ab301aaa652 (commit by michael) arpi_esp parents: 153 diff changeset	213
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	214 if (n < 4)
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	215 block0 = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	216 else
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	217 block0 = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	218 /* XXX: only mpeg1 */
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	219 quant_matrix = s->intra_matrix;
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	220 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	221 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	222 "psrlw $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	223 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	224 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	225 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	226 "mov %3, %%"REG_a" \n\t"
3576 f7125bf10892 Support for MacIntel, last part: balign directives gpoirier parents: 3398 diff changeset	227 ASMALIGN(4)
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	228 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	229 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	230 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	231 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	232 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	233 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	234 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	235 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	236 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	237 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	238 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	239 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	240 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	241 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	242 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	243 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	244 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	245 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	246 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	247 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	248 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	249 "psraw $3, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	250 "psraw $3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	251 "psubw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	252 "psubw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	253 "por %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	254 "por %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	255 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	256 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	257 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	258 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	259 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	260 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	261 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	262 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	263
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	264 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	265 "js 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	266 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	267 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	268 );
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	269 block[0]= block0;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	270 }
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	271
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	272 static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	273 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	274 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	275 long nCoeffs;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	276 const uint16_t *quant_matrix;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	277
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	278 assert(s->block_last_index[n]>=0);
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	279
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	280 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	281
344 9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream michaelni parents: 325 diff changeset	282 quant_matrix = s->inter_matrix;
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	283 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	284 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	285 "psrlw $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	286 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	287 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	288 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	289 "mov %3, %%"REG_a" \n\t"
3576 f7125bf10892 Support for MacIntel, last part: balign directives gpoirier parents: 3398 diff changeset	290 ASMALIGN(4)
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	291 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	292 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	293 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	294 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	295 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	296 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	297 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	298 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	299 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	300 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	301 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	302 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	303 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	304 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	305 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	306 "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	307 "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	308 "paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	309 "paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	310 "pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])2 + 1)q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	311 "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])2 + 1)q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	312 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	313 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	314 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	315 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	316 "psraw $4, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	317 "psraw $4, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	318 "psubw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	319 "psubw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	320 "por %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	321 "por %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	322 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	323 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	324 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	325 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	326 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	327 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	328 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	329 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	330
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	331 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	332 "js 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	333 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	334 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	335 );
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	336 }
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	337
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	338 static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	339 DCTELEM *block, int n, int qscale)
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	340 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	341 long nCoeffs;
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	342 const uint16_t *quant_matrix;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	343 int block0;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	344
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	345 assert(s->block_last_index[n]>=0);
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	346
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	347 if(s->alternate_scan) nCoeffs= 63; //FIXME
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	348 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	349
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	350 if (n < 4)
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	351 block0 = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	352 else
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	353 block0 = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	354 quant_matrix = s->intra_matrix;
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	355 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	356 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	357 "psrlw $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	358 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	359 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	360 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	361 "mov %3, %%"REG_a" \n\t"
3576 f7125bf10892 Support for MacIntel, last part: balign directives gpoirier parents: 3398 diff changeset	362 ASMALIGN(4)
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	363 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	364 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	365 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	366 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	367 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	368 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	369 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	370 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	371 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	372 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	373 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	374 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	375 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	376 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	377 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	378 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	379 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	380 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	381 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	382 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	383 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	384 "psraw $3, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	385 "psraw $3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	386 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	387 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	388 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	389 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	390 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	391 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	392 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	393 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	394
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	395 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	396 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	397 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	398 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	399 );
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	400 block[0]= block0;
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	401 //Note, we dont do mismatch control for intra as errors cannot accumulate
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	402 }
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	403
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	404 static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	405 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	406 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	407 long nCoeffs;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	408 const uint16_t *quant_matrix;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	409
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	410 assert(s->block_last_index[n]>=0);
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	411
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	412 if(s->alternate_scan) nCoeffs= 63; //FIXME
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	413 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	414
344 9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream michaelni parents: 325 diff changeset	415 quant_matrix = s->inter_matrix;
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	416 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	417 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	418 "psrlq $48, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	419 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	420 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	421 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	422 "mov %3, %%"REG_a" \n\t"
3576 f7125bf10892 Support for MacIntel, last part: balign directives gpoirier parents: 3398 diff changeset	423 ASMALIGN(4)
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	424 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	425 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	426 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	427 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	428 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	429 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	430 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	431 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	432 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	433 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	434 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	435 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	436 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	437 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	438 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	439 "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	440 "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	441 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])2q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	442 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])2q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	443 "paddw %%mm4, %%mm0 \n\t" // (abs(block[i])2 + 1)q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	444 "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])2 + 1)q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	445 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	446 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	447 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	448 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	449 "psrlw $4, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	450 "psrlw $4, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	451 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	452 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	453 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	454 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	455 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	456 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	457 "pxor %%mm4, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	458 "pxor %%mm5, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	459 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	460 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	461
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	462 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	463 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	464 "movd 124(%0, %3), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	465 "movq %%mm7, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	466 "psrlq $32, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	467 "pxor %%mm6, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	468 "movq %%mm7, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	469 "psrlq $16, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	470 "pxor %%mm6, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	471 "pslld $31, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	472 "psrlq $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	473 "pxor %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	474 "movd %%mm0, 124(%0, %3) \n\t"
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	475
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	476 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	477 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	478 );
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	479 }
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	480
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	481 /* draw the edges of width 'w' of an image of size width, height
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	482 this mmx version can only handle w==8 \|\| w==16 */
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	483 static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	484 {
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	485 uint8_t ptr, last_line;
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	486 int i;
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	487
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	488 last_line = buf + (height - 1) * wrap;
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	489 /* left and right */
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	490 ptr = buf;
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	491 if(w==8)
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	492 {
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	493 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	494 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	495 "movd (%0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	496 "punpcklbw %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	497 "punpcklwd %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	498 "punpckldq %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	499 "movq %%mm0, -8(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	500 "movq -8(%0, %2), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	501 "punpckhbw %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	502 "punpckhwd %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	503 "punpckhdq %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	504 "movq %%mm1, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	505 "add %1, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	506 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	507 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	508 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	509 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	510 );
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	511 }
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	512 else
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	513 {
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	514 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	515 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	516 "movd (%0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	517 "punpcklbw %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	518 "punpcklwd %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	519 "punpckldq %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	520 "movq %%mm0, -8(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	521 "movq %%mm0, -16(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	522 "movq -8(%0, %2), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	523 "punpckhbw %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	524 "punpckhwd %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	525 "punpckhdq %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	526 "movq %%mm1, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	527 "movq %%mm1, 8(%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	528 "add %1, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	529 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	530 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	531 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	532 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	533 );
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	534 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	535
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	536 for(i=0;i<w;i+=4) {
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	537 /* top and bottom (and hopefully also the corners) */
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	538 ptr= buf - (i + 1) * wrap - w;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	539 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	540 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	541 "movq (%1, %0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	542 "movq %%mm0, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	543 "movq %%mm0, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	544 "movq %%mm0, (%0, %2, 2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	545 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	546 "add $8, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	547 "cmp %4, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	548 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	549 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	550 : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap3), "r" (ptr+width+2w)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	551 );
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	552 ptr= last_line + (i + 1) * wrap - w;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	553 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	554 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	555 "movq (%1, %0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	556 "movq %%mm0, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	557 "movq %%mm0, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	558 "movq %%mm0, (%0, %2, 2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	559 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	560 "add $8, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	561 "cmp %4, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	562 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	563 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	564 : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap3), "r" (ptr+width+2w)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	565 );
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	566 }
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	567 }
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	568
1719 4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	569 static void denoise_dct_mmx(MpegEncContext s, DCTELEM block){
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	570 const int intra= s->mb_intra;
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	571 int *sum= s->dct_error_sum[intra];
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	572 uint16_t *offset= s->dct_offset[intra];
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	573
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	574 s->dct_count[intra]++;
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	575
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	576 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	577 "pxor %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	578 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	579 "pxor %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	580 "pxor %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	581 "movq (%0), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	582 "movq 8(%0), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	583 "pcmpgtw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	584 "pcmpgtw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	585 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	586 "pxor %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	587 "psubw %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	588 "psubw %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	589 "movq %%mm2, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	590 "movq %%mm3, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	591 "psubusw (%2), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	592 "psubusw 8(%2), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	593 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	594 "pxor %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	595 "psubw %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	596 "psubw %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	597 "movq %%mm2, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	598 "movq %%mm3, 8(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	599 "movq %%mm4, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	600 "movq %%mm5, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	601 "punpcklwd %%mm7, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	602 "punpckhwd %%mm7, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	603 "punpcklwd %%mm7, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	604 "punpckhwd %%mm7, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	605 "paddd (%1), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	606 "paddd 8(%1), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	607 "paddd 16(%1), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	608 "paddd 24(%1), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	609 "movq %%mm4, (%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	610 "movq %%mm2, 8(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	611 "movq %%mm5, 16(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	612 "movq %%mm3, 24(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	613 "add $16, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	614 "add $32, %1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	615 "add $16, %2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	616 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	617 " jb 1b \n\t"
1719 4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	618 : "+r" (block), "+r" (sum), "+r" (offset)
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	619 : "r"(block+64)
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	620 );
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	621 }
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	622
1720 96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	623 static void denoise_dct_sse2(MpegEncContext s, DCTELEM block){
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	624 const int intra= s->mb_intra;
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	625 int *sum= s->dct_error_sum[intra];
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	626 uint16_t *offset= s->dct_offset[intra];
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	627
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	628 s->dct_count[intra]++;
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	629
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	630 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	631 "pxor %%xmm7, %%xmm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	632 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	633 "pxor %%xmm0, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	634 "pxor %%xmm1, %%xmm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	635 "movdqa (%0), %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	636 "movdqa 16(%0), %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	637 "pcmpgtw %%xmm2, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	638 "pcmpgtw %%xmm3, %%xmm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	639 "pxor %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	640 "pxor %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	641 "psubw %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	642 "psubw %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	643 "movdqa %%xmm2, %%xmm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	644 "movdqa %%xmm3, %%xmm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	645 "psubusw (%2), %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	646 "psubusw 16(%2), %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	647 "pxor %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	648 "pxor %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	649 "psubw %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	650 "psubw %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	651 "movdqa %%xmm2, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	652 "movdqa %%xmm3, 16(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	653 "movdqa %%xmm4, %%xmm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	654 "movdqa %%xmm5, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	655 "punpcklwd %%xmm7, %%xmm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	656 "punpckhwd %%xmm7, %%xmm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	657 "punpcklwd %%xmm7, %%xmm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	658 "punpckhwd %%xmm7, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	659 "paddd (%1), %%xmm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	660 "paddd 16(%1), %%xmm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	661 "paddd 32(%1), %%xmm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	662 "paddd 48(%1), %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	663 "movdqa %%xmm4, (%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	664 "movdqa %%xmm6, 16(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	665 "movdqa %%xmm5, 32(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	666 "movdqa %%xmm0, 48(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	667 "add $32, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	668 "add $64, %1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	669 "add $32, %2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	670 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	671 " jb 1b \n\t"
1720 96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	672 : "+r" (block), "+r" (sum), "+r" (offset)
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	673 : "r"(block+64)
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	674 );
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	675 }
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	676
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	677 #undef HAVE_MMX2
0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	678 #define RENAME(a) a ## _MMX
1565 1a9a63f59849 minor mmx2 optimization if the dct michael parents: 1261 diff changeset	679 #define RENAMEl(a) a ## _mmx
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	680 #include "mpegvideo_mmx_template.c"
0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	681
0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	682 #define HAVE_MMX2
0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	683 #undef RENAME
1597 4c9165372ab3 noise reduction of dct coefficients michael parents: 1565 diff changeset	684 #undef RENAMEl
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	685 #define RENAME(a) a ## _MMX2
1565 1a9a63f59849 minor mmx2 optimization if the dct michael parents: 1261 diff changeset	686 #define RENAMEl(a) a ## _mmx2
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	687 #include "mpegvideo_mmx_template.c"
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	688
1765 e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	689 #undef RENAME
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	690 #undef RENAMEl
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	691 #define RENAME(a) a ## _SSE2
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	692 #define RENAMEl(a) a ## _sse2
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	693 #include "mpegvideo_mmx_template.c"
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	694
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	695 void MPV_common_init_mmx(MpegEncContext *s)
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	696 {
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	697 if (mm_flags & MM_MMX) {
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	698 const int dct_algo = s->avctx->dct_algo;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	699
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	700 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	701 s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	702 s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	703 s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
3281 7fac25904a8b missmatch control for mpeg2 intra dequantization if bitexact=1 michael parents: 3036 diff changeset	704 if(!(s->flags & CODEC_FLAG_BITEXACT))
7fac25904a8b missmatch control for mpeg2 intra dequantization if bitexact=1 michael parents: 3036 diff changeset	705 s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	706 s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
312 8cf5507e6ca5 mpeg4 mpeg quantizer support michaelni parents: 252 diff changeset	707
350 6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now. pulento parents: 344 diff changeset	708 draw_edges = draw_edges_mmx;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	709
1720 96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	710 if (mm_flags & MM_SSE2) {
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	711 s->denoise_dct= denoise_dct_sse2;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	712 } else {
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	713 s->denoise_dct= denoise_dct_mmx;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	714 }
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	715
625 bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	716 if(dct_algo==FF_DCT_AUTO \|\| dct_algo==FF_DCT_MMX){
1765 e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	717 if(mm_flags & MM_SSE2){
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	718 s->dct_quantize= dct_quantize_SSE2;
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	719 } else if(mm_flags & MM_MMXEXT){
625 bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	720 s->dct_quantize= dct_quantize_MMX2;
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	721 } else {
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	722 s->dct_quantize= dct_quantize_MMX;
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	723 }
350 6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now. pulento parents: 344 diff changeset	724 }
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	725 }
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	726 }

Mercurial > libavcodec.hg

annotate i386/mpegvideo_mmx.c @ 3980:5afe4253a220 libavcodec