libavcodec.hg: i386/mpegvideo

annotate i386/mpegvideo_mmx.c @ 3176:babf844e1308 libavcodec

Init simplification and 2% faster wma_decode_block on amd64 with tables use instead of pow().

author	banan
date	Wed, 08 Mar 2006 09:26:57 +0000
parents	0b546eab515d
children	7fac25904a8b

rev	line source
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	1 /*
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	2 * The simplest mpeg encoder (well, it was the simplest!)
429 718a22dc121f license/copyright change glantau parents: 350 diff changeset	3 * Copyright (c) 2000,2001 Fabrice Bellard.
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	4 *
429 718a22dc121f license/copyright change glantau parents: 350 diff changeset	5 * This library is free software; you can redistribute it and/or
718a22dc121f license/copyright change glantau parents: 350 diff changeset	6 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change glantau parents: 350 diff changeset	7 * License as published by the Free Software Foundation; either
718a22dc121f license/copyright change glantau parents: 350 diff changeset	8 * version 2 of the License, or (at your option) any later version.
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	9 *
429 718a22dc121f license/copyright change glantau parents: 350 diff changeset	10 * This library is distributed in the hope that it will be useful,
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429 718a22dc121f license/copyright change glantau parents: 350 diff changeset	12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change glantau parents: 350 diff changeset	13 * Lesser General Public License for more details.
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	14 *
429 718a22dc121f license/copyright change glantau parents: 350 diff changeset	15 * You should have received a copy of the GNU Lesser General Public
718a22dc121f license/copyright change glantau parents: 350 diff changeset	16 * License along with this library; if not, write to the Free Software
3036 0b546eab515d Update licensing information: The FSF changed postal address. diego parents: 2979 diff changeset	17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	18 *
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	19 * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru>
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	20 * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	21 */
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	22
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	23 #include "../dsputil.h"
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	24 #include "../mpegvideo.h"
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	25 #include "../avcodec.h"
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	26 #include "mmx.h"
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	27
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	28 extern uint8_t zigzag_direct_noperm[64];
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	29 extern uint16_t inv_zigzag_direct16[64];
200 6ab301aaa652 (commit by michael) arpi_esp parents: 153 diff changeset	30
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	31 static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	32 static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	33
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	34
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	35 static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	36 DCTELEM *block, int n, int qscale)
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	37 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	38 long level, qmul, qadd, nCoeffs;
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	39
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	40 qmul = qscale << 1;
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	41
1661 4c9fd29f1606 h263 slice structured mode michael parents: 1597 diff changeset	42 assert(s->block_last_index[n]>=0 \|\| s->h263_aic);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	43
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	44 if (!s->h263_aic) {
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	45 if (n < 4)
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	46 level = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	47 else
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	48 level = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	49 qadd = (qscale - 1) \| 1;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	50 }else{
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	51 qadd = 0;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	52 level= block[0];
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	53 }
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	54 if(s->ac_pred)
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	55 nCoeffs=63;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	56 else
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	57 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
200 6ab301aaa652 (commit by michael) arpi_esp parents: 153 diff changeset	58 //printf("%d %d ", qmul, qadd);
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	59 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	60 "movd %1, %%mm6 \n\t" //qmul
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	61 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	62 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	63 "movd %2, %%mm5 \n\t" //qadd
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	64 "pxor %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	65 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	66 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	67 "psubw %%mm5, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	68 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	69 ".balign 16 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	70 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	71 "movq (%0, %3), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	72 "movq 8(%0, %3), %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	73
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	74 "pmullw %%mm6, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	75 "pmullw %%mm6, %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	76
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	77 "movq (%0, %3), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	78 "movq 8(%0, %3), %%mm3 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	79
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	80 "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	81 "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	82
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	83 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	84 "pxor %%mm3, %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	85
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	86 "paddw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	87 "paddw %%mm7, %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	88
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	89 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	90 "pxor %%mm1, %%mm3 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	91
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	92 "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	93 "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	94
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	95 "pandn %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	96 "pandn %%mm3, %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	97
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	98 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	99 "movq %%mm1, 8(%0, %3) \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	100
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	101 "add $16, %3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	102 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	103 ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	104 : "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	105 );
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	106 block[0]= level;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	107 }
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	108
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	109
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	110 static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	111 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	112 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	113 long qmul, qadd, nCoeffs;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	114
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	115 qmul = qscale << 1;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	116 qadd = (qscale - 1) \| 1;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	117
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	118 assert(s->block_last_index[n]>=0 \|\| s->h263_aic);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	119
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	120 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	121 //printf("%d %d ", qmul, qadd);
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	122 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	123 "movd %1, %%mm6 \n\t" //qmul
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	124 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	125 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	126 "movd %2, %%mm5 \n\t" //qadd
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	127 "pxor %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	128 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	129 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	130 "psubw %%mm5, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	131 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	132 ".balign 16 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	133 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	134 "movq (%0, %3), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	135 "movq 8(%0, %3), %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	136
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	137 "pmullw %%mm6, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	138 "pmullw %%mm6, %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	139
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	140 "movq (%0, %3), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	141 "movq 8(%0, %3), %%mm3 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	142
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	143 "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	144 "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	145
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	146 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	147 "pxor %%mm3, %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	148
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	149 "paddw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	150 "paddw %%mm7, %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	151
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	152 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	153 "pxor %%mm1, %%mm3 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	154
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	155 "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	156 "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	157
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	158 "pandn %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	159 "pandn %%mm3, %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	160
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	161 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	162 "movq %%mm1, 8(%0, %3) \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	163
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	164 "add $16, %3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	165 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	166 ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	167 : "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	168 );
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	169 }
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	170
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	171
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	172 /*
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	173 NK:
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	174 Note: looking at PARANOID:
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	175 "enable all paranoid tests for rounding, overflows, etc..."
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	176
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	177 #ifdef PARANOID
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	178 if (level < -2048 \|\| level > 2047)
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	179 fprintf(stderr, "unquant error %d %d\n", i, level);
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	180 #endif
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	181 We can suppose that result of two multiplications can't be greate of 0xFFFF
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	182 i.e. is 16-bit, so we use here only PMULLW instruction and can avoid
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	183 a complex multiplication.
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	184 =====================================================
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	185 Full formula for multiplication of 2 integer numbers
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	186 which are represent as high:low words:
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	187 input: value1 = high1:low1
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	188 value2 = high2:low2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	189 output: value3 = value1*value2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	190 value3=high3:low3 (on overflow: modulus 2^32 wrap-around)
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	191 this mean that for 0x123456 * 0x123456 correct result is 0x766cb0ce4
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	192 but this algorithm will compute only 0x66cb0ce4
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	193 this limited by 16-bit size of operands
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	194 ---------------------------------
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	195 tlow1 = high1*low2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	196 tlow2 = high2*low1
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	197 tlow1 = tlow1 + tlow2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	198 high3:low3 = low1*low2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	199 high3 += tlow1
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	200 */
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	201 static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	202 DCTELEM *block, int n, int qscale)
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	203 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	204 long nCoeffs;
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	205 const uint16_t *quant_matrix;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	206 int block0;
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	207
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	208 assert(s->block_last_index[n]>=0);
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	209
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	210 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
200 6ab301aaa652 (commit by michael) arpi_esp parents: 153 diff changeset	211
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	212 if (n < 4)
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	213 block0 = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	214 else
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	215 block0 = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	216 /* XXX: only mpeg1 */
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	217 quant_matrix = s->intra_matrix;
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	218 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	219 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	220 "psrlw $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	221 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	222 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	223 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	224 "mov %3, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	225 ".balign 16 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	226 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	227 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	228 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	229 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	230 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	231 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	232 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	233 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	234 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	235 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	236 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	237 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	238 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	239 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	240 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	241 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	242 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	243 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	244 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	245 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	246 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	247 "psraw $3, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	248 "psraw $3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	249 "psubw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	250 "psubw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	251 "por %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	252 "por %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	253 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	254 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	255 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	256 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	257 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	258 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	259 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	260 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	261
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	262 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	263 "js 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	264 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	265 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	266 );
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	267 block[0]= block0;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	268 }
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	269
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	270 static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	271 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	272 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	273 long nCoeffs;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	274 const uint16_t *quant_matrix;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	275
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	276 assert(s->block_last_index[n]>=0);
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	277
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	278 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	279
344 9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream michaelni parents: 325 diff changeset	280 quant_matrix = s->inter_matrix;
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	281 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	282 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	283 "psrlw $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	284 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	285 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	286 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	287 "mov %3, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	288 ".balign 16 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	289 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	290 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	291 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	292 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	293 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	294 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	295 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	296 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	297 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	298 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	299 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	300 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	301 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	302 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	303 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	304 "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	305 "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	306 "paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	307 "paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	308 "pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])2 + 1)q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	309 "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])2 + 1)q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	310 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	311 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	312 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	313 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	314 "psraw $4, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	315 "psraw $4, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	316 "psubw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	317 "psubw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	318 "por %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	319 "por %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	320 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	321 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	322 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	323 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	324 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	325 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	326 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	327 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	328
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	329 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	330 "js 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	331 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	332 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	333 );
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	334 }
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	335
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	336 static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	337 DCTELEM *block, int n, int qscale)
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	338 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	339 long nCoeffs;
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	340 const uint16_t *quant_matrix;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	341 int block0;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	342
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	343 assert(s->block_last_index[n]>=0);
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	344
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	345 if(s->alternate_scan) nCoeffs= 63; //FIXME
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	346 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	347
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	348 if (n < 4)
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	349 block0 = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	350 else
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	351 block0 = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	352 quant_matrix = s->intra_matrix;
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	353 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	354 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	355 "psrlw $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	356 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	357 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	358 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	359 "mov %3, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	360 ".balign 16 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	361 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	362 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	363 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	364 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	365 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	366 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	367 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	368 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	369 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	370 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	371 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	372 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	373 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	374 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	375 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	376 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	377 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	378 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	379 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	380 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	381 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	382 "psraw $3, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	383 "psraw $3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	384 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	385 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	386 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	387 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	388 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	389 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	390 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	391 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	392
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	393 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	394 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	395 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	396 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	397 );
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	398 block[0]= block0;
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	399 //Note, we dont do mismatch control for intra as errors cannot accumulate
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	400 }
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	401
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	402 static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	403 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	404 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	405 long nCoeffs;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	406 const uint16_t *quant_matrix;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	407
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	408 assert(s->block_last_index[n]>=0);
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	409
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	410 if(s->alternate_scan) nCoeffs= 63; //FIXME
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	411 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	412
344 9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream michaelni parents: 325 diff changeset	413 quant_matrix = s->inter_matrix;
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	414 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	415 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	416 "psrlq $48, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	417 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	418 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	419 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	420 "mov %3, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	421 ".balign 16 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	422 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	423 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	424 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	425 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	426 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	427 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	428 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	429 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	430 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	431 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	432 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	433 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	434 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	435 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	436 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	437 "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	438 "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	439 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])2q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	440 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])2q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	441 "paddw %%mm4, %%mm0 \n\t" // (abs(block[i])2 + 1)q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	442 "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])2 + 1)q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	443 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	444 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	445 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	446 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	447 "psrlw $4, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	448 "psrlw $4, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	449 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	450 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	451 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	452 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	453 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	454 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	455 "pxor %%mm4, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	456 "pxor %%mm5, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	457 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	458 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	459
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	460 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	461 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	462 "movd 124(%0, %3), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	463 "movq %%mm7, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	464 "psrlq $32, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	465 "pxor %%mm6, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	466 "movq %%mm7, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	467 "psrlq $16, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	468 "pxor %%mm6, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	469 "pslld $31, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	470 "psrlq $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	471 "pxor %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	472 "movd %%mm0, 124(%0, %3) \n\t"
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	473
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	474 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	475 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	476 );
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	477 }
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	478
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	479 /* draw the edges of width 'w' of an image of size width, height
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	480 this mmx version can only handle w==8 \|\| w==16 */
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	481 static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	482 {
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	483 uint8_t ptr, last_line;
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	484 int i;
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	485
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	486 last_line = buf + (height - 1) * wrap;
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	487 /* left and right */
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	488 ptr = buf;
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	489 if(w==8)
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	490 {
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	491 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	492 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	493 "movd (%0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	494 "punpcklbw %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	495 "punpcklwd %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	496 "punpckldq %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	497 "movq %%mm0, -8(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	498 "movq -8(%0, %2), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	499 "punpckhbw %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	500 "punpckhwd %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	501 "punpckhdq %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	502 "movq %%mm1, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	503 "add %1, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	504 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	505 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	506 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	507 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	508 );
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	509 }
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	510 else
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	511 {
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	512 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	513 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	514 "movd (%0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	515 "punpcklbw %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	516 "punpcklwd %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	517 "punpckldq %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	518 "movq %%mm0, -8(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	519 "movq %%mm0, -16(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	520 "movq -8(%0, %2), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	521 "punpckhbw %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	522 "punpckhwd %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	523 "punpckhdq %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	524 "movq %%mm1, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	525 "movq %%mm1, 8(%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	526 "add %1, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	527 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	528 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	529 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	530 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	531 );
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	532 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	533
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	534 for(i=0;i<w;i+=4) {
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	535 /* top and bottom (and hopefully also the corners) */
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	536 ptr= buf - (i + 1) * wrap - w;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	537 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	538 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	539 "movq (%1, %0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	540 "movq %%mm0, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	541 "movq %%mm0, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	542 "movq %%mm0, (%0, %2, 2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	543 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	544 "add $8, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	545 "cmp %4, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	546 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	547 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	548 : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap3), "r" (ptr+width+2w)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	549 );
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	550 ptr= last_line + (i + 1) * wrap - w;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	551 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	552 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	553 "movq (%1, %0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	554 "movq %%mm0, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	555 "movq %%mm0, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	556 "movq %%mm0, (%0, %2, 2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	557 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	558 "add $8, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	559 "cmp %4, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	560 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	561 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	562 : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap3), "r" (ptr+width+2w)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	563 );
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	564 }
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	565 }
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	566
1719 4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	567 static void denoise_dct_mmx(MpegEncContext s, DCTELEM block){
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	568 const int intra= s->mb_intra;
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	569 int *sum= s->dct_error_sum[intra];
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	570 uint16_t *offset= s->dct_offset[intra];
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	571
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	572 s->dct_count[intra]++;
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	573
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	574 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	575 "pxor %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	576 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	577 "pxor %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	578 "pxor %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	579 "movq (%0), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	580 "movq 8(%0), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	581 "pcmpgtw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	582 "pcmpgtw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	583 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	584 "pxor %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	585 "psubw %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	586 "psubw %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	587 "movq %%mm2, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	588 "movq %%mm3, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	589 "psubusw (%2), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	590 "psubusw 8(%2), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	591 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	592 "pxor %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	593 "psubw %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	594 "psubw %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	595 "movq %%mm2, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	596 "movq %%mm3, 8(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	597 "movq %%mm4, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	598 "movq %%mm5, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	599 "punpcklwd %%mm7, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	600 "punpckhwd %%mm7, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	601 "punpcklwd %%mm7, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	602 "punpckhwd %%mm7, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	603 "paddd (%1), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	604 "paddd 8(%1), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	605 "paddd 16(%1), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	606 "paddd 24(%1), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	607 "movq %%mm4, (%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	608 "movq %%mm2, 8(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	609 "movq %%mm5, 16(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	610 "movq %%mm3, 24(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	611 "add $16, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	612 "add $32, %1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	613 "add $16, %2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	614 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	615 " jb 1b \n\t"
1719 4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	616 : "+r" (block), "+r" (sum), "+r" (offset)
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	617 : "r"(block+64)
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	618 );
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	619 }
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	620
1720 96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	621 static void denoise_dct_sse2(MpegEncContext s, DCTELEM block){
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	622 const int intra= s->mb_intra;
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	623 int *sum= s->dct_error_sum[intra];
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	624 uint16_t *offset= s->dct_offset[intra];
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	625
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	626 s->dct_count[intra]++;
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	627
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	628 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	629 "pxor %%xmm7, %%xmm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	630 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	631 "pxor %%xmm0, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	632 "pxor %%xmm1, %%xmm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	633 "movdqa (%0), %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	634 "movdqa 16(%0), %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	635 "pcmpgtw %%xmm2, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	636 "pcmpgtw %%xmm3, %%xmm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	637 "pxor %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	638 "pxor %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	639 "psubw %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	640 "psubw %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	641 "movdqa %%xmm2, %%xmm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	642 "movdqa %%xmm3, %%xmm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	643 "psubusw (%2), %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	644 "psubusw 16(%2), %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	645 "pxor %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	646 "pxor %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	647 "psubw %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	648 "psubw %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	649 "movdqa %%xmm2, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	650 "movdqa %%xmm3, 16(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	651 "movdqa %%xmm4, %%xmm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	652 "movdqa %%xmm5, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	653 "punpcklwd %%xmm7, %%xmm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	654 "punpckhwd %%xmm7, %%xmm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	655 "punpcklwd %%xmm7, %%xmm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	656 "punpckhwd %%xmm7, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	657 "paddd (%1), %%xmm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	658 "paddd 16(%1), %%xmm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	659 "paddd 32(%1), %%xmm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	660 "paddd 48(%1), %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	661 "movdqa %%xmm4, (%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	662 "movdqa %%xmm6, 16(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	663 "movdqa %%xmm5, 32(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	664 "movdqa %%xmm0, 48(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	665 "add $32, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	666 "add $64, %1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	667 "add $32, %2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	668 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	669 " jb 1b \n\t"
1720 96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	670 : "+r" (block), "+r" (sum), "+r" (offset)
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	671 : "r"(block+64)
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	672 );
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	673 }
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	674
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	675 #undef HAVE_MMX2
0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	676 #define RENAME(a) a ## _MMX
1565 1a9a63f59849 minor mmx2 optimization if the dct michael parents: 1261 diff changeset	677 #define RENAMEl(a) a ## _mmx
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	678 #include "mpegvideo_mmx_template.c"
0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	679
0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	680 #define HAVE_MMX2
0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	681 #undef RENAME
1597 4c9165372ab3 noise reduction of dct coefficients michael parents: 1565 diff changeset	682 #undef RENAMEl
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	683 #define RENAME(a) a ## _MMX2
1565 1a9a63f59849 minor mmx2 optimization if the dct michael parents: 1261 diff changeset	684 #define RENAMEl(a) a ## _mmx2
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	685 #include "mpegvideo_mmx_template.c"
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	686
1765 e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	687 #undef RENAME
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	688 #undef RENAMEl
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	689 #define RENAME(a) a ## _SSE2
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	690 #define RENAMEl(a) a ## _sse2
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	691 #include "mpegvideo_mmx_template.c"
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	692
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	693 void MPV_common_init_mmx(MpegEncContext *s)
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	694 {
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	695 if (mm_flags & MM_MMX) {
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	696 const int dct_algo = s->avctx->dct_algo;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	697
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	698 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	699 s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	700 s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	701 s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	702 s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	703 s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
312 8cf5507e6ca5 mpeg4 mpeg quantizer support michaelni parents: 252 diff changeset	704
350 6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now. pulento parents: 344 diff changeset	705 draw_edges = draw_edges_mmx;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	706
1720 96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	707 if (mm_flags & MM_SSE2) {
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	708 s->denoise_dct= denoise_dct_sse2;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	709 } else {
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	710 s->denoise_dct= denoise_dct_mmx;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	711 }
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	712
625 bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	713 if(dct_algo==FF_DCT_AUTO \|\| dct_algo==FF_DCT_MMX){
1765 e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	714 if(mm_flags & MM_SSE2){
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	715 s->dct_quantize= dct_quantize_SSE2;
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	716 } else if(mm_flags & MM_MMXEXT){
625 bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	717 s->dct_quantize= dct_quantize_MMX2;
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	718 } else {
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	719 s->dct_quantize= dct_quantize_MMX;
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	720 }
350 6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now. pulento parents: 344 diff changeset	721 }
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	722 }
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	723 }

Mercurial > libavcodec.hg

annotate i386/mpegvideo_mmx.c @ 3176:babf844e1308 libavcodec