libavcodec.hg: i386/mpegvideo

annotate i386/mpegvideo_mmx.c @ 4449:cb49f6384eb5 libavcodec

Simplify checks, use that we know that cnt will not be < 0

author	reimar
date	Wed, 31 Jan 2007 21:00:48 +0000
parents	bbe0bc387a19
children	0b1e761135cd

rev	line source
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	1 /*
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	2 * The simplest mpeg encoder (well, it was the simplest!)
429 718a22dc121f license/copyright change glantau parents: 350 diff changeset	3 * Copyright (c) 2000,2001 Fabrice Bellard.
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	4 *
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3576 diff changeset	5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3576 diff changeset	6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3576 diff changeset	7 * FFmpeg is free software; you can redistribute it and/or
429 718a22dc121f license/copyright change glantau parents: 350 diff changeset	8 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change glantau parents: 350 diff changeset	9 * License as published by the Free Software Foundation; either
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3576 diff changeset	10 * version 2.1 of the License, or (at your option) any later version.
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	11 *
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3576 diff changeset	12 * FFmpeg is distributed in the hope that it will be useful,
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429 718a22dc121f license/copyright change glantau parents: 350 diff changeset	14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change glantau parents: 350 diff changeset	15 * Lesser General Public License for more details.
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	16 *
429 718a22dc121f license/copyright change glantau parents: 350 diff changeset	17 * You should have received a copy of the GNU Lesser General Public
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3576 diff changeset	18 * License along with FFmpeg; if not, write to the Free Software
3036 0b546eab515d Update licensing information: The FSF changed postal address. diego parents: 2979 diff changeset	19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	20 *
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	21 * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru>
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	22 * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	23 */
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	24
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	25 #include "../dsputil.h"
8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	26 #include "../mpegvideo.h"
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	27 #include "../avcodec.h"
3398 e0927bc44a10 Move REG_* macros from libavcodec/i386/mmx.h to libavutil/x86_cpu.h lucabe parents: 3281 diff changeset	28 #include "x86_cpu.h"
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	29
4197 bbe0bc387a19 revert bad checkin mru parents: 4196 diff changeset	30 extern uint16_t inv_zigzag_direct16[64];
200 6ab301aaa652 (commit by michael) arpi_esp parents: 153 diff changeset	31
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	32 static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	33 static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	34
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	35
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	36 static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	37 DCTELEM *block, int n, int qscale)
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	38 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	39 long level, qmul, qadd, nCoeffs;
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	40
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	41 qmul = qscale << 1;
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	42
1661 4c9fd29f1606 h263 slice structured mode michael parents: 1597 diff changeset	43 assert(s->block_last_index[n]>=0 \|\| s->h263_aic);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	44
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	45 if (!s->h263_aic) {
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	46 if (n < 4)
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	47 level = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	48 else
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	49 level = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	50 qadd = (qscale - 1) \| 1;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	51 }else{
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	52 qadd = 0;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	53 level= block[0];
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	54 }
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	55 if(s->ac_pred)
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	56 nCoeffs=63;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	57 else
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	58 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
200 6ab301aaa652 (commit by michael) arpi_esp parents: 153 diff changeset	59 //printf("%d %d ", qmul, qadd);
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	60 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	61 "movd %1, %%mm6 \n\t" //qmul
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	62 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	63 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	64 "movd %2, %%mm5 \n\t" //qadd
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	65 "pxor %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	66 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	67 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	68 "psubw %%mm5, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	69 "pxor %%mm4, %%mm4 \n\t"
3576 f7125bf10892 Support for MacIntel, last part: balign directives gpoirier parents: 3398 diff changeset	70 ASMALIGN(4)
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	71 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	72 "movq (%0, %3), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	73 "movq 8(%0, %3), %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	74
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	75 "pmullw %%mm6, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	76 "pmullw %%mm6, %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	77
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	78 "movq (%0, %3), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	79 "movq 8(%0, %3), %%mm3 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	80
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	81 "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	82 "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	83
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	84 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	85 "pxor %%mm3, %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	86
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	87 "paddw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	88 "paddw %%mm7, %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	89
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	90 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	91 "pxor %%mm1, %%mm3 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	92
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	93 "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	94 "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	95
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	96 "pandn %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	97 "pandn %%mm3, %%mm1 \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	98
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	99 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	100 "movq %%mm1, 8(%0, %3) \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	101
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	102 "add $16, %3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	103 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	104 ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	105 : "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	106 );
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	107 block[0]= level;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	108 }
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	109
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	110
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	111 static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	112 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	113 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	114 long qmul, qadd, nCoeffs;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	115
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	116 qmul = qscale << 1;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	117 qadd = (qscale - 1) \| 1;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	118
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	119 assert(s->block_last_index[n]>=0 \|\| s->h263_aic);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	120
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	121 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	122 //printf("%d %d ", qmul, qadd);
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	123 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	124 "movd %1, %%mm6 \n\t" //qmul
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	125 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	126 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	127 "movd %2, %%mm5 \n\t" //qadd
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	128 "pxor %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	129 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	130 "packssdw %%mm5, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	131 "psubw %%mm5, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	132 "pxor %%mm4, %%mm4 \n\t"
3576 f7125bf10892 Support for MacIntel, last part: balign directives gpoirier parents: 3398 diff changeset	133 ASMALIGN(4)
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	134 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	135 "movq (%0, %3), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	136 "movq 8(%0, %3), %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	137
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	138 "pmullw %%mm6, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	139 "pmullw %%mm6, %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	140
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	141 "movq (%0, %3), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	142 "movq 8(%0, %3), %%mm3 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	143
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	144 "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	145 "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	146
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	147 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	148 "pxor %%mm3, %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	149
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	150 "paddw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	151 "paddw %%mm7, %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	152
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	153 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	154 "pxor %%mm1, %%mm3 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	155
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	156 "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	157 "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	158
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	159 "pandn %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	160 "pandn %%mm3, %%mm1 \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	161
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	162 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	163 "movq %%mm1, 8(%0, %3) \n\t"
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	164
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	165 "add $16, %3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	166 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	167 ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	168 : "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	169 );
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	170 }
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	171
bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	172
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	173 /*
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	174 NK:
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	175 Note: looking at PARANOID:
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	176 "enable all paranoid tests for rounding, overflows, etc..."
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	177
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	178 #ifdef PARANOID
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	179 if (level < -2048 \|\| level > 2047)
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	180 fprintf(stderr, "unquant error %d %d\n", i, level);
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	181 #endif
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	182 We can suppose that result of two multiplications can't be greate of 0xFFFF
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	183 i.e. is 16-bit, so we use here only PMULLW instruction and can avoid
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	184 a complex multiplication.
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	185 =====================================================
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	186 Full formula for multiplication of 2 integer numbers
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	187 which are represent as high:low words:
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	188 input: value1 = high1:low1
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	189 value2 = high2:low2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	190 output: value3 = value1*value2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	191 value3=high3:low3 (on overflow: modulus 2^32 wrap-around)
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	192 this mean that for 0x123456 * 0x123456 correct result is 0x766cb0ce4
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	193 but this algorithm will compute only 0x66cb0ce4
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	194 this limited by 16-bit size of operands
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	195 ---------------------------------
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	196 tlow1 = high1*low2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	197 tlow2 = high2*low1
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	198 tlow1 = tlow1 + tlow2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	199 high3:low3 = low1*low2
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	200 high3 += tlow1
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	201 */
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	202 static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	203 DCTELEM *block, int n, int qscale)
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	204 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	205 long nCoeffs;
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	206 const uint16_t *quant_matrix;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	207 int block0;
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	208
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	209 assert(s->block_last_index[n]>=0);
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	210
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	211 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
200 6ab301aaa652 (commit by michael) arpi_esp parents: 153 diff changeset	212
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	213 if (n < 4)
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	214 block0 = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	215 else
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	216 block0 = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	217 /* XXX: only mpeg1 */
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	218 quant_matrix = s->intra_matrix;
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	219 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	220 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	221 "psrlw $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	222 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	223 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	224 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	225 "mov %3, %%"REG_a" \n\t"
3576 f7125bf10892 Support for MacIntel, last part: balign directives gpoirier parents: 3398 diff changeset	226 ASMALIGN(4)
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	227 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	228 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	229 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	230 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	231 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	232 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	233 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	234 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	235 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	236 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	237 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	238 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	239 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	240 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	241 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	242 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	243 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	244 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	245 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	246 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	247 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	248 "psraw $3, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	249 "psraw $3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	250 "psubw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	251 "psubw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	252 "por %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	253 "por %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	254 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	255 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	256 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	257 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	258 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	259 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	260 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	261 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	262
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	263 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	264 "js 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	265 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	266 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	267 );
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	268 block[0]= block0;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	269 }
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	270
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	271 static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	272 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	273 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	274 long nCoeffs;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	275 const uint16_t *quant_matrix;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	276
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	277 assert(s->block_last_index[n]>=0);
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	278
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	279 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	280
344 9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream michaelni parents: 325 diff changeset	281 quant_matrix = s->inter_matrix;
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	282 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	283 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	284 "psrlw $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	285 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	286 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	287 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	288 "mov %3, %%"REG_a" \n\t"
3576 f7125bf10892 Support for MacIntel, last part: balign directives gpoirier parents: 3398 diff changeset	289 ASMALIGN(4)
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	290 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	291 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	292 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	293 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	294 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	295 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	296 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	297 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	298 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	299 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	300 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	301 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	302 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	303 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	304 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	305 "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	306 "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	307 "paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	308 "paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	309 "pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])2 + 1)q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	310 "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])2 + 1)q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	311 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	312 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	313 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	314 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	315 "psraw $4, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	316 "psraw $4, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	317 "psubw %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	318 "psubw %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	319 "por %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	320 "por %%mm7, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	321 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	322 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	323 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	324 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	325 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	326 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	327 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	328 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	329
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	330 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	331 "js 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	332 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	333 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	334 );
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	335 }
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	336
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	337 static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	338 DCTELEM *block, int n, int qscale)
15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	339 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	340 long nCoeffs;
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	341 const uint16_t *quant_matrix;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	342 int block0;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	343
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	344 assert(s->block_last_index[n]>=0);
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	345
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	346 if(s->alternate_scan) nCoeffs= 63; //FIXME
e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	347 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	348
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	349 if (n < 4)
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	350 block0 = block[0] * s->y_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	351 else
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	352 block0 = block[0] * s->c_dc_scale;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	353 quant_matrix = s->intra_matrix;
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	354 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	355 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	356 "psrlw $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	357 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	358 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	359 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	360 "mov %3, %%"REG_a" \n\t"
3576 f7125bf10892 Support for MacIntel, last part: balign directives gpoirier parents: 3398 diff changeset	361 ASMALIGN(4)
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	362 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	363 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	364 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	365 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	366 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	367 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	368 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	369 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	370 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	371 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	372 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	373 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	374 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	375 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	376 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	377 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	378 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	379 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	380 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	381 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	382 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	383 "psraw $3, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	384 "psraw $3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	385 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	386 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	387 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	388 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	389 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	390 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	391 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	392 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
145 bd1adece8280 dct_unquantize_h263_mmx() by Michael Niedermayer <michaelni@gmx.at> arpi_esp parents: 14 diff changeset	393
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	394 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	395 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	396 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	397 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	398 );
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	399 block[0]= block0;
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	400 //Note, we dont do mismatch control for intra as errors cannot accumulate
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	401 }
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	402
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	403 static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	404 DCTELEM *block, int n, int qscale)
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	405 {
2293 15cfba1b97b5 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64 patch by (Aurelien Jacobs <aurel at gnuage dot org>) michael parents: 2024 diff changeset	406 long nCoeffs;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	407 const uint16_t *quant_matrix;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	408
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	409 assert(s->block_last_index[n]>=0);
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	410
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	411 if(s->alternate_scan) nCoeffs= 63; //FIXME
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	412 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	413
344 9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream michaelni parents: 325 diff changeset	414 quant_matrix = s->inter_matrix;
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	415 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	416 "pcmpeqw %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	417 "psrlq $48, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	418 "movd %2, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	419 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	420 "packssdw %%mm6, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	421 "mov %3, %%"REG_a" \n\t"
3576 f7125bf10892 Support for MacIntel, last part: balign directives gpoirier parents: 3398 diff changeset	422 ASMALIGN(4)
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	423 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	424 "movq (%0, %%"REG_a"), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	425 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	426 "movq (%1, %%"REG_a"), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	427 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	428 "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	429 "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	430 "pxor %%mm2, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	431 "pxor %%mm3, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	432 "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	433 "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	434 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	435 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	436 "psubw %%mm2, %%mm0 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	437 "psubw %%mm3, %%mm1 \n\t" // abs(block[i])
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	438 "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	439 "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	440 "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])2q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	441 "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])2q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	442 "paddw %%mm4, %%mm0 \n\t" // (abs(block[i])2 + 1)q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	443 "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])2 + 1)q
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	444 "pxor %%mm4, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	445 "pxor %%mm5, %%mm5 \n\t" // FIXME slow
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	446 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	447 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	448 "psrlw $4, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	449 "psrlw $4, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	450 "pxor %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	451 "pxor %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	452 "psubw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	453 "psubw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	454 "pandn %%mm0, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	455 "pandn %%mm1, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	456 "pxor %%mm4, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	457 "pxor %%mm5, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	458 "movq %%mm4, (%0, %%"REG_a") \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	459 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
325 15efd80cf51e mpeg2/mpeg4 dequantizer support (c & mmx) michaelni parents: 312 diff changeset	460
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	461 "add $16, %%"REG_a" \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	462 "jng 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	463 "movd 124(%0, %3), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	464 "movq %%mm7, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	465 "psrlq $32, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	466 "pxor %%mm6, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	467 "movq %%mm7, %%mm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	468 "psrlq $16, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	469 "pxor %%mm6, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	470 "pslld $31, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	471 "psrlq $15, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	472 "pxor %%mm7, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	473 "movd %%mm0, 124(%0, %3) \n\t"
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	474
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	475 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	476 : "%"REG_a, "memory"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	477 );
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	478 }
1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	479
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	480 /* draw the edges of width 'w' of an image of size width, height
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	481 this mmx version can only handle w==8 \|\| w==16 */
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	482 static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	483 {
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 949 diff changeset	484 uint8_t ptr, last_line;
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	485 int i;
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	486
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	487 last_line = buf + (height - 1) * wrap;
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	488 /* left and right */
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	489 ptr = buf;
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	490 if(w==8)
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	491 {
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	492 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	493 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	494 "movd (%0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	495 "punpcklbw %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	496 "punpcklwd %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	497 "punpckldq %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	498 "movq %%mm0, -8(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	499 "movq -8(%0, %2), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	500 "punpckhbw %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	501 "punpckhwd %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	502 "punpckhdq %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	503 "movq %%mm1, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	504 "add %1, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	505 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	506 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	507 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	508 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	509 );
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	510 }
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	511 else
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	512 {
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	513 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	514 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	515 "movd (%0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	516 "punpcklbw %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	517 "punpcklwd %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	518 "punpckldq %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	519 "movq %%mm0, -8(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	520 "movq %%mm0, -16(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	521 "movq -8(%0, %2), %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	522 "punpckhbw %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	523 "punpckhwd %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	524 "punpckhdq %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	525 "movq %%mm1, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	526 "movq %%mm1, 8(%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	527 "add %1, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	528 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	529 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	530 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	531 : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	532 );
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	533 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	534
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	535 for(i=0;i<w;i+=4) {
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	536 /* top and bottom (and hopefully also the corners) */
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	537 ptr= buf - (i + 1) * wrap - w;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	538 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	539 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	540 "movq (%1, %0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	541 "movq %%mm0, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	542 "movq %%mm0, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	543 "movq %%mm0, (%0, %2, 2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	544 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	545 "add $8, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	546 "cmp %4, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	547 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	548 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	549 : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap3), "r" (ptr+width+2w)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	550 );
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	551 ptr= last_line + (i + 1) * wrap - w;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	552 asm volatile(
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	553 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	554 "movq (%1, %0), %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	555 "movq %%mm0, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	556 "movq %%mm0, (%0, %2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	557 "movq %%mm0, (%0, %2, 2) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	558 "movq %%mm0, (%0, %3) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	559 "add $8, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	560 "cmp %4, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	561 " jb 1b \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	562 : "+r" (ptr)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	563 : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap3), "r" (ptr+width+2w)
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	564 );
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	565 }
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	566 }
994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	567
1719 4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	568 static void denoise_dct_mmx(MpegEncContext s, DCTELEM block){
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	569 const int intra= s->mb_intra;
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	570 int *sum= s->dct_error_sum[intra];
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	571 uint16_t *offset= s->dct_offset[intra];
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	572
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	573 s->dct_count[intra]++;
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	574
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	575 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	576 "pxor %%mm7, %%mm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	577 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	578 "pxor %%mm0, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	579 "pxor %%mm1, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	580 "movq (%0), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	581 "movq 8(%0), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	582 "pcmpgtw %%mm2, %%mm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	583 "pcmpgtw %%mm3, %%mm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	584 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	585 "pxor %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	586 "psubw %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	587 "psubw %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	588 "movq %%mm2, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	589 "movq %%mm3, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	590 "psubusw (%2), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	591 "psubusw 8(%2), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	592 "pxor %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	593 "pxor %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	594 "psubw %%mm0, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	595 "psubw %%mm1, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	596 "movq %%mm2, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	597 "movq %%mm3, 8(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	598 "movq %%mm4, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	599 "movq %%mm5, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	600 "punpcklwd %%mm7, %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	601 "punpckhwd %%mm7, %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	602 "punpcklwd %%mm7, %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	603 "punpckhwd %%mm7, %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	604 "paddd (%1), %%mm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	605 "paddd 8(%1), %%mm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	606 "paddd 16(%1), %%mm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	607 "paddd 24(%1), %%mm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	608 "movq %%mm4, (%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	609 "movq %%mm2, 8(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	610 "movq %%mm5, 16(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	611 "movq %%mm3, 24(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	612 "add $16, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	613 "add $32, %1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	614 "add $16, %2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	615 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	616 " jb 1b \n\t"
1719 4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	617 : "+r" (block), "+r" (sum), "+r" (offset)
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	618 : "r"(block+64)
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	619 );
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	620 }
4e72fb256b25 denoise_dct_mmx() michael parents: 1689 diff changeset	621
1720 96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	622 static void denoise_dct_sse2(MpegEncContext s, DCTELEM block){
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	623 const int intra= s->mb_intra;
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	624 int *sum= s->dct_error_sum[intra];
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	625 uint16_t *offset= s->dct_offset[intra];
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	626
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	627 s->dct_count[intra]++;
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	628
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	629 asm volatile(
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	630 "pxor %%xmm7, %%xmm7 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	631 "1: \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	632 "pxor %%xmm0, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	633 "pxor %%xmm1, %%xmm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	634 "movdqa (%0), %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	635 "movdqa 16(%0), %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	636 "pcmpgtw %%xmm2, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	637 "pcmpgtw %%xmm3, %%xmm1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	638 "pxor %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	639 "pxor %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	640 "psubw %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	641 "psubw %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	642 "movdqa %%xmm2, %%xmm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	643 "movdqa %%xmm3, %%xmm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	644 "psubusw (%2), %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	645 "psubusw 16(%2), %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	646 "pxor %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	647 "pxor %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	648 "psubw %%xmm0, %%xmm2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	649 "psubw %%xmm1, %%xmm3 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	650 "movdqa %%xmm2, (%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	651 "movdqa %%xmm3, 16(%0) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	652 "movdqa %%xmm4, %%xmm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	653 "movdqa %%xmm5, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	654 "punpcklwd %%xmm7, %%xmm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	655 "punpckhwd %%xmm7, %%xmm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	656 "punpcklwd %%xmm7, %%xmm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	657 "punpckhwd %%xmm7, %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	658 "paddd (%1), %%xmm4 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	659 "paddd 16(%1), %%xmm6 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	660 "paddd 32(%1), %%xmm5 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	661 "paddd 48(%1), %%xmm0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	662 "movdqa %%xmm4, (%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	663 "movdqa %%xmm6, 16(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	664 "movdqa %%xmm5, 32(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	665 "movdqa %%xmm0, 48(%1) \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	666 "add $32, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	667 "add $64, %1 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	668 "add $32, %2 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	669 "cmp %3, %0 \n\t"
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	670 " jb 1b \n\t"
1720 96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	671 : "+r" (block), "+r" (sum), "+r" (offset)
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	672 : "r"(block+64)
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	673 );
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	674 }
96a86bd1e0d5 denoise_dct_sse2() patch by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1719 diff changeset	675
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	676 #undef HAVE_MMX2
0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	677 #define RENAME(a) a ## _MMX
1565 1a9a63f59849 minor mmx2 optimization if the dct michael parents: 1261 diff changeset	678 #define RENAMEl(a) a ## _mmx
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	679 #include "mpegvideo_mmx_template.c"
0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	680
0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	681 #define HAVE_MMX2
0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	682 #undef RENAME
1597 4c9165372ab3 noise reduction of dct coefficients michael parents: 1565 diff changeset	683 #undef RENAMEl
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	684 #define RENAME(a) a ## _MMX2
1565 1a9a63f59849 minor mmx2 optimization if the dct michael parents: 1261 diff changeset	685 #define RENAMEl(a) a ## _mmx2
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	686 #include "mpegvideo_mmx_template.c"
206 994aa8623443 (commit by michael) arpi_esp parents: 200 diff changeset	687
1765 e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	688 #undef RENAME
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	689 #undef RENAMEl
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	690 #define RENAME(a) a ## _SSE2
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	691 #define RENAMEl(a) a ## _sse2
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	692 #include "mpegvideo_mmx_template.c"
e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	693
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	694 void MPV_common_init_mmx(MpegEncContext *s)
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	695 {
4197 bbe0bc387a19 revert bad checkin mru parents: 4196 diff changeset	696 if (mm_flags & MM_MMX) {
706 e65798d228ea idct permutation cleanup, idct can be selected per context now michaelni parents: 687 diff changeset	697 const int dct_algo = s->avctx->dct_algo;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	698
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	699 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	700 s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	701 s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	702 s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
3281 7fac25904a8b missmatch control for mpeg2 intra dequantization if bitexact=1 michael parents: 3036 diff changeset	703 if(!(s->flags & CODEC_FLAG_BITEXACT))
7fac25904a8b missmatch control for mpeg2 intra dequantization if bitexact=1 michael parents: 3036 diff changeset	704 s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
1689 1a2db2073848 split intra / inter dequantization michael parents: 1661 diff changeset	705 s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
312 8cf5507e6ca5 mpeg4 mpeg quantizer support michaelni parents: 252 diff changeset	706
350 6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now. pulento parents: 344 diff changeset	707 draw_edges = draw_edges_mmx;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2293 diff changeset	708
4197 bbe0bc387a19 revert bad checkin mru parents: 4196 diff changeset	709 if (mm_flags & MM_SSE2) {
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	710 s->denoise_dct= denoise_dct_sse2;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	711 } else {
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	712 s->denoise_dct= denoise_dct_mmx;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2967 diff changeset	713 }
220 0b234715e205 (commit by michael) arpi_esp parents: 206 diff changeset	714
625 bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	715 if(dct_algo==FF_DCT_AUTO \|\| dct_algo==FF_DCT_MMX){
4197 bbe0bc387a19 revert bad checkin mru parents: 4196 diff changeset	716 if(mm_flags & MM_SSE2){
1765 e31754bc5b65 SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>) michael parents: 1720 diff changeset	717 s->dct_quantize= dct_quantize_SSE2;
4197 bbe0bc387a19 revert bad checkin mru parents: 4196 diff changeset	718 } else if(mm_flags & MM_MMXEXT){
625 bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	719 s->dct_quantize= dct_quantize_MMX2;
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	720 } else {
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	721 s->dct_quantize= dct_quantize_MMX;
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) michaelni parents: 620 diff changeset	722 }
350 6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now. pulento parents: 344 diff changeset	723 }
14 8ceb13af9cb6 renamed - use of s->dct_unquantize function pointer - SHOULD add faster h263 mmx specific unquantization stuff glantau parents: 8 diff changeset	724 }
8 1b4461b5a7fb Sync with mplayer's stuff nickols_k parents: diff changeset	725 }

Mercurial > libavcodec.hg

annotate i386/mpegvideo_mmx.c @ 4449:cb49f6384eb5 libavcodec